removed BEGIN_OPENCV_DEVICE_NAMESPACE macros
This commit is contained in:
parent
d926541311
commit
0f53f2993e
@ -425,21 +425,20 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Polar <-> Cart
|
// Polar <-> Cart
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace mathfunc
|
|
||||||
{
|
{
|
||||||
|
namespace mathfunc
|
||||||
|
{
|
||||||
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
|
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
|
using namespace ::cv::gpu::device::mathfunc;
|
||||||
|
|
||||||
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
|
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
|
||||||
CV_Assert(x.depth() == CV_32F);
|
CV_Assert(x.depth() == CV_32F);
|
||||||
@ -459,7 +458,7 @@ namespace
|
|||||||
|
|
||||||
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
|
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
|
using namespace ::cv::gpu::device::mathfunc;
|
||||||
|
|
||||||
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
|
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
|
||||||
CV_Assert(mag.depth() == CV_32F);
|
CV_Assert(mag.depth() == CV_32F);
|
||||||
|
@ -55,19 +55,18 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace bilateral_filter
|
|
||||||
{
|
{
|
||||||
|
namespace bilateral_filter
|
||||||
|
{
|
||||||
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
||||||
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::bilateral_filter;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -52,19 +52,18 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace blend
|
|
||||||
{
|
{
|
||||||
|
namespace blend
|
||||||
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
||||||
|
|
||||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::blend;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ blend;
|
|
||||||
|
|
||||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||||
GpuMat& result, Stream& stream)
|
GpuMat& result, Stream& stream)
|
||||||
|
@ -82,10 +82,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace bf_match
|
|
||||||
{
|
{
|
||||||
|
namespace bf_match
|
||||||
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
@ -105,10 +105,10 @@ namespace bf_match
|
|||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace bf_knnmatch
|
namespace bf_knnmatch
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
@ -128,10 +128,10 @@ namespace bf_knnmatch
|
|||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace bf_radius_match
|
namespace bf_radius_match
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
@ -153,9 +153,8 @@ namespace bf_radius_match
|
|||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Train collection
|
// Train collection
|
||||||
@ -199,7 +198,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
|
using namespace ::cv::gpu::device::bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
@ -341,7 +340,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
|
|||||||
if (query.empty() || trainCollection.empty())
|
if (query.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
|
using namespace ::cv::gpu::device::bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
@ -452,7 +451,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
|
using namespace ::cv::gpu::device::bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
@ -581,7 +580,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
|
|||||||
if (query.empty() || trainCollection.empty())
|
if (query.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
|
using namespace ::cv::gpu::device::bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
@ -762,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
|
using namespace ::cv::gpu::device::bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
@ -893,7 +892,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
|
|||||||
if (query.empty() || empty())
|
if (query.empty() || empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
|
using namespace ::cv::gpu::device::bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
|
@ -56,31 +56,30 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace transform_points
|
|
||||||
{
|
{
|
||||||
|
namespace transform_points
|
||||||
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace project_points
|
namespace project_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace solve_pnp_ransac
|
namespace solve_pnp_ransac
|
||||||
{
|
{
|
||||||
int maxNumIters();
|
int maxNumIters();
|
||||||
|
|
||||||
void computeHypothesisScores(
|
void computeHypothesisScores(
|
||||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||||
const float3* transl_vectors, const float3* object, const float2* image,
|
const float3* transl_vectors, const float3* object, const float2* image,
|
||||||
const float dist_threshold, int* hypothesis_scores);
|
const float dist_threshold, int* hypothesis_scores);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -51,8 +51,8 @@ void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
||||||
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -67,142 +67,141 @@ BEGIN_OPENCV_DEVICE_NAMESPACE
|
|||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_8u) \
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_8u) \
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_32f)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_32f)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgba)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr555)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr565)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr555)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr565)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr555)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr565)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr555)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr565)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr555)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr555)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr565)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr565)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_gray)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_gray)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_gray)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_gray)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_gray)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_gray)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_gray)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hsv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hsv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hsv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hsv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hsv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hsv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hsv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hsv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hsv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hsv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hsv)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hsv)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hsv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hsv4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hsv4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hsv4)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hsv4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hls)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hls)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hls)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hls)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hls4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_hls4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hls4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_hls4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hls)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hls)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hls)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hls)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hls4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_hls4)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hls4)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_hls4)
|
||||||
|
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_rgb)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_rgb)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_rgba)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_rgba)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls_to_bgra)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_bgr)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_bgr)
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_bgra)
|
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_bgra)
|
||||||
|
|
||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
|
||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
|
||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -45,18 +45,18 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
namespace bf_knnmatch
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Reduction
|
||||||
|
|
||||||
namespace bf_knnmatch {
|
template <int BLOCK_SIZE>
|
||||||
|
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Reduction
|
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
|
||||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
float* s_distance, int* s_trainIdx)
|
float* s_distance, int* s_trainIdx)
|
||||||
{
|
{
|
||||||
float myBestDistance1 = numeric_limits<float>::max();
|
float myBestDistance1 = numeric_limits<float>::max();
|
||||||
float myBestDistance2 = numeric_limits<float>::max();
|
float myBestDistance2 = numeric_limits<float>::max();
|
||||||
int myBestTrainIdx1 = -1;
|
int myBestTrainIdx1 = -1;
|
||||||
@ -120,14 +120,14 @@ __device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
|||||||
|
|
||||||
bestTrainIdx1 = myBestTrainIdx1;
|
bestTrainIdx1 = myBestTrainIdx1;
|
||||||
bestTrainIdx2 = myBestTrainIdx2;
|
bestTrainIdx2 = myBestTrainIdx2;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2,
|
int& bestImgIdx1, int& bestImgIdx2,
|
||||||
float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||||
{
|
{
|
||||||
float myBestDistance1 = numeric_limits<float>::max();
|
float myBestDistance1 = numeric_limits<float>::max();
|
||||||
float myBestDistance2 = numeric_limits<float>::max();
|
float myBestDistance2 = numeric_limits<float>::max();
|
||||||
int myBestTrainIdx1 = -1;
|
int myBestTrainIdx1 = -1;
|
||||||
@ -203,29 +203,29 @@ __device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
|||||||
|
|
||||||
bestImgIdx1 = myBestImgIdx1;
|
bestImgIdx1 = myBestImgIdx1;
|
||||||
bestImgIdx2 = myBestImgIdx2;
|
bestImgIdx2 = myBestImgIdx2;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
{
|
{
|
||||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||||
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -278,11 +278,11 @@ __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -311,13 +311,13 @@ __global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>
|
|||||||
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -328,11 +328,11 @@ void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, c
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -372,13 +372,13 @@ __global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>
|
|||||||
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -389,18 +389,18 @@ void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -457,11 +457,11 @@ __device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -488,13 +488,13 @@ __global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train
|
|||||||
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -505,11 +505,11 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const M
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -547,13 +547,13 @@ __global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trai
|
|||||||
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -564,18 +564,18 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -631,11 +631,11 @@ __device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -662,13 +662,13 @@ __global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const
|
|||||||
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -679,11 +679,11 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mas
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -721,13 +721,13 @@ __global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int
|
|||||||
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
|
||||||
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -738,16 +738,16 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const M
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knnMatch 2 dispatcher
|
// knnMatch 2 dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
@ -772,13 +772,13 @@ void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, cons
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
match<16, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
@ -803,14 +803,14 @@ void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
match<16, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Calc distance kernel
|
// Calc distance kernel
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void calcDistanceUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
__global__ void calcDistanceUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -855,11 +855,11 @@ __global__ void calcDistanceUnrolled(const DevMem2D_<T> query, const DevMem2D_<T
|
|||||||
|
|
||||||
allDist.ptr(queryIdx)[trainIdx] = distVal;
|
allDist.ptr(queryIdx)[trainIdx] = distVal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -870,11 +870,11 @@ void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void calcDistance(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
__global__ void calcDistance(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -918,11 +918,11 @@ __global__ void calcDistance(const DevMem2D_<T> query, const DevMem2D_<T> train,
|
|||||||
|
|
||||||
allDist.ptr(queryIdx)[trainIdx] = distVal;
|
allDist.ptr(queryIdx)[trainIdx] = distVal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -933,16 +933,16 @@ void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Ma
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Calc Distance dispatcher
|
// Calc Distance dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Df& allDist,
|
const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
calcDistanceUnrolled<16, 64, Dist>(query, train, mask, allDist, stream);
|
calcDistanceUnrolled<16, 64, Dist>(query, train, mask, allDist, stream);
|
||||||
@ -967,14 +967,14 @@ void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train
|
|||||||
{
|
{
|
||||||
calcDistance<16, Dist>(query, train, mask, allDist, stream);
|
calcDistance<16, Dist>(query, train, mask, allDist, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// find knn match kernel
|
// find knn match kernel
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
||||||
{
|
{
|
||||||
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
||||||
__shared__ float s_dist[SMEM_SIZE];
|
__shared__ float s_dist[SMEM_SIZE];
|
||||||
__shared__ int s_trainIdx[SMEM_SIZE];
|
__shared__ int s_trainIdx[SMEM_SIZE];
|
||||||
@ -1011,11 +1011,11 @@ __global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrSt
|
|||||||
distance.ptr(queryIdx)[i] = dist;
|
distance.ptr(queryIdx)[i] = dist;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, 1, 1);
|
const dim3 block(BLOCK_SIZE, 1, 1);
|
||||||
const dim3 grid(trainIdx.rows, 1, 1);
|
const dim3 grid(trainIdx.rows, 1, 1);
|
||||||
|
|
||||||
@ -1027,21 +1027,21 @@ void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, c
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void findKnnMatchDispatcher(int k, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream)
|
void findKnnMatchDispatcher(int k, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
findKnnMatch<256>(k, static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), allDist, stream);
|
findKnnMatch<256>(k, static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match Dispatcher
|
// knn match Dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (k == 2)
|
if (k == 2)
|
||||||
{
|
{
|
||||||
match2Dispatcher<Dist>(query, train, mask, trainIdx, distance, cc, stream);
|
match2Dispatcher<Dist>(query, train, mask, trainIdx, distance, cc, stream);
|
||||||
@ -1051,111 +1051,109 @@ void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k
|
|||||||
calcDistanceDispatcher<Dist>(query, train, mask, allDist, cc, stream);
|
calcDistanceDispatcher<Dist>(query, train, mask, allDist, cc, stream);
|
||||||
findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream);
|
findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match caller
|
// knn match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void match2L1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void match2L2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void match2Hamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2Hamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2Hamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2Hamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
|
} // namespace bf_knnmatch
|
||||||
} // namespace bf_knnmatch
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -45,16 +45,16 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace bf_match {
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Reduction
|
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
|
||||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx)
|
|
||||||
{
|
{
|
||||||
|
namespace bf_match
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Reduction
|
||||||
|
|
||||||
|
template <int BLOCK_SIZE>
|
||||||
|
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx)
|
||||||
|
{
|
||||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||||
s_trainIdx += threadIdx.y * BLOCK_SIZE;
|
s_trainIdx += threadIdx.y * BLOCK_SIZE;
|
||||||
|
|
||||||
@ -64,11 +64,11 @@ __device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_d
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
reducePredVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<volatile float>());
|
reducePredVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<volatile float>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||||
{
|
{
|
||||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||||
s_trainIdx += threadIdx.y * BLOCK_SIZE;
|
s_trainIdx += threadIdx.y * BLOCK_SIZE;
|
||||||
s_imgIdx += threadIdx.y * BLOCK_SIZE;
|
s_imgIdx += threadIdx.y * BLOCK_SIZE;
|
||||||
@ -80,27 +80,27 @@ __device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestI
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
reducePredVal2<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, s_imgIdx, bestImgIdx, threadIdx.x, less<volatile float>());
|
reducePredVal2<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, s_imgIdx, bestImgIdx, threadIdx.x, less<volatile float>());
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
{
|
{
|
||||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||||
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -140,11 +140,11 @@ __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int
|
|||||||
bestTrainIdx = trainIdx;
|
bestTrainIdx = trainIdx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -171,13 +171,13 @@ __global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>
|
|||||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -188,12 +188,12 @@ void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, c
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -230,13 +230,13 @@ __global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>
|
|||||||
bestImgIdx[queryIdx] = myBestImgIdx;
|
bestImgIdx[queryIdx] = myBestImgIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -247,16 +247,16 @@ void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -300,11 +300,11 @@ __device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx
|
|||||||
bestTrainIdx = trainIdx;
|
bestTrainIdx = trainIdx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -329,13 +329,13 @@ __global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train
|
|||||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -346,12 +346,12 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const M
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -386,13 +386,13 @@ __global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trai
|
|||||||
bestImgIdx[queryIdx] = myBestImgIdx;
|
bestImgIdx[queryIdx] = myBestImgIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -403,16 +403,16 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
{
|
{
|
||||||
Dist dist;
|
Dist dist;
|
||||||
@ -455,11 +455,11 @@ __device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const
|
|||||||
bestTrainIdx = trainIdx;
|
bestTrainIdx = trainIdx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -484,13 +484,13 @@ __global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const
|
|||||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -501,12 +501,12 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mas
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
|
||||||
@ -540,13 +540,13 @@ __global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int
|
|||||||
bestImgIdx[queryIdx] = myBestImgIdx;
|
bestImgIdx[queryIdx] = myBestImgIdx;
|
||||||
bestDistance[queryIdx] = myBestDistance;
|
bestDistance[queryIdx] = myBestDistance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -557,16 +557,16 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const M
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, train, mask, trainIdx, distance, stream);
|
matchUnrolledCached<16, 64, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||||
@ -591,13 +591,13 @@ void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, train, mask, trainIdx, distance, stream);
|
match<16, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||||
@ -622,15 +622,15 @@ void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
match<16, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match caller
|
// Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
@ -643,19 +643,19 @@ template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
@ -668,19 +668,19 @@ template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
@ -693,18 +693,18 @@ template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem
|
|||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
@ -717,19 +717,19 @@ template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
@ -742,19 +742,19 @@ template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
@ -767,14 +767,12 @@ template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem
|
|||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
} // namespace bf_match
|
||||||
} // namespace bf_match
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -45,17 +45,17 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace bf_radius_match {
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Match Unrolled
|
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
|
||||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
|
||||||
{
|
{
|
||||||
|
namespace bf_radius_match
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Match Unrolled
|
||||||
|
|
||||||
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||||
|
__global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
||||||
|
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||||
|
{
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
|
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -110,12 +110,12 @@ __global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -127,13 +127,13 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float m
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
|
|
||||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||||
@ -159,15 +159,15 @@ void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
__global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
||||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
|
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -221,13 +221,13 @@ __global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> t
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -239,13 +239,13 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistan
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
|
|
||||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||||
@ -271,16 +271,16 @@ void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float m
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||||
@ -305,13 +305,13 @@ void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T>
|
template <typename Dist, typename T>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
@ -336,15 +336,15 @@ void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int
|
|||||||
{
|
{
|
||||||
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Radius Match caller
|
// Radius Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
@ -357,19 +357,19 @@ template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
@ -382,19 +382,19 @@ template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db&
|
|||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
@ -407,61 +407,59 @@ template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem
|
|||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
} // namespace bf_radius_match
|
||||||
} // namespace bf_radius_match
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -43,22 +43,22 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace bilateral_filter {
|
|
||||||
|
|
||||||
__constant__ float* ctable_color;
|
|
||||||
__constant__ float* ctable_space;
|
|
||||||
__constant__ size_t ctable_space_step;
|
|
||||||
|
|
||||||
__constant__ int cndisp;
|
|
||||||
__constant__ int cradius;
|
|
||||||
|
|
||||||
__constant__ short cedge_disc;
|
|
||||||
__constant__ short cmax_disc;
|
|
||||||
|
|
||||||
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
|
||||||
{
|
{
|
||||||
|
namespace bilateral_filter
|
||||||
|
{
|
||||||
|
__constant__ float* ctable_color;
|
||||||
|
__constant__ float* ctable_space;
|
||||||
|
__constant__ size_t ctable_space_step;
|
||||||
|
|
||||||
|
__constant__ int cndisp;
|
||||||
|
__constant__ int cradius;
|
||||||
|
|
||||||
|
__constant__ short cedge_disc;
|
||||||
|
__constant__ short cmax_disc;
|
||||||
|
|
||||||
|
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||||
size_t table_space_step = table_space.step / sizeof(float);
|
size_t table_space_step = table_space.step / sizeof(float);
|
||||||
@ -69,11 +69,11 @@ void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int ra
|
|||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int channels>
|
template <int channels>
|
||||||
struct DistRgbMax
|
struct DistRgbMax
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||||
{
|
{
|
||||||
uchar x = ::abs(a[0] - b[0]);
|
uchar x = ::abs(a[0] - b[0]);
|
||||||
@ -81,20 +81,20 @@ struct DistRgbMax
|
|||||||
uchar z = ::abs(a[2] - b[2]);
|
uchar z = ::abs(a[2] - b[2]);
|
||||||
return (::max(::max(x, y), z));
|
return (::max(::max(x, y), z));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct DistRgbMax<1>
|
struct DistRgbMax<1>
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||||
{
|
{
|
||||||
return ::abs(a[0] - b[0]);
|
return ::abs(a[0] - b[0]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int channels, typename T>
|
template <int channels, typename T>
|
||||||
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
|
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
|
||||||
{
|
{
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||||
|
|
||||||
@ -173,11 +173,11 @@ __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar*
|
|||||||
*(disp + y * disp_step + x) = dp[id];
|
*(disp + y * disp_step + x) = dp[id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
grid.x = divUp(disp.cols, threads.x << 1);
|
grid.x = divUp(disp.cols, threads.x << 1);
|
||||||
@ -211,18 +211,16 @@ void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int
|
|||||||
|
|
||||||
if (stream != 0)
|
if (stream != 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
|
} // namespace bilateral_filter
|
||||||
} // namespace bilateral_filter
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -42,14 +42,14 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace blend {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
|
||||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
|
|
||||||
{
|
{
|
||||||
|
namespace blend
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||||
|
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
|
||||||
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -62,11 +62,11 @@ __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> i
|
|||||||
T p2 = img2.ptr(y)[x];
|
T p2 = img2.ptr(y)[x];
|
||||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
@ -75,15 +75,15 @@ void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> i
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||||
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
||||||
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
|
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -99,10 +99,10 @@ __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, c
|
|||||||
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
|
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
|
||||||
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
|
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
@ -111,8 +111,6 @@ void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, Ptr
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
} // namespace blend
|
||||||
} // namespace blend
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -44,12 +44,12 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
|
||||||
|
|
||||||
namespace transform_points
|
|
||||||
{
|
{
|
||||||
|
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||||
|
|
||||||
|
namespace transform_points
|
||||||
|
{
|
||||||
__constant__ float3 crot0;
|
__constant__ float3 crot0;
|
||||||
__constant__ float3 crot1;
|
__constant__ float3 crot1;
|
||||||
__constant__ float3 crot2;
|
__constant__ float3 crot2;
|
||||||
@ -74,12 +74,12 @@ namespace transform_points
|
|||||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||||
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, TransformOp(), stream);
|
::cv::gpu::device::transform(src, dst, TransformOp(), stream);
|
||||||
}
|
}
|
||||||
} // namespace transform_points
|
} // namespace transform_points
|
||||||
|
|
||||||
namespace project_points
|
namespace project_points
|
||||||
{
|
{
|
||||||
__constant__ float3 crot0;
|
__constant__ float3 crot0;
|
||||||
__constant__ float3 crot1;
|
__constant__ float3 crot1;
|
||||||
__constant__ float3 crot2;
|
__constant__ float3 crot2;
|
||||||
@ -113,12 +113,12 @@ namespace project_points
|
|||||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||||
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, ProjectOp(), stream);
|
::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
|
||||||
}
|
}
|
||||||
} // namespace project_points
|
} // namespace project_points
|
||||||
|
|
||||||
namespace solve_pnp_ransac
|
namespace solve_pnp_ransac
|
||||||
{
|
{
|
||||||
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
|
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
|
||||||
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
|
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
|
||||||
|
|
||||||
@ -187,6 +187,5 @@ namespace solve_pnp_ransac
|
|||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
} // namespace solvepnp_ransac
|
} // namespace solvepnp_ransac
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -44,12 +44,12 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace canny {
|
|
||||||
|
|
||||||
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
|
||||||
{
|
{
|
||||||
|
namespace canny
|
||||||
|
{
|
||||||
|
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||||
|
{
|
||||||
__shared__ int smem[16][18];
|
__shared__ int smem[16][18];
|
||||||
|
|
||||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -71,10 +71,10 @@ __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi d
|
|||||||
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
|
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -82,26 +82,26 @@ void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int ro
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
struct L1
|
struct L1
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float calc(int x, int y)
|
static __device__ __forceinline__ float calc(int x, int y)
|
||||||
{
|
{
|
||||||
return ::abs(x) + ::abs(y);
|
return ::abs(x) + ::abs(y);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct L2
|
struct L2
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float calc(int x, int y)
|
static __device__ __forceinline__ float calc(int x, int y)
|
||||||
{
|
{
|
||||||
return ::sqrtf(x * x + y * y);
|
return ::sqrtf(x * x + y * y);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
|
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
|
||||||
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
||||||
{
|
{
|
||||||
__shared__ int sdx[18][16];
|
__shared__ int sdx[18][16];
|
||||||
__shared__ int sdy[18][16];
|
__shared__ int sdy[18][16];
|
||||||
|
|
||||||
@ -133,10 +133,10 @@ template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, co
|
|||||||
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
|
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
|
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -148,19 +148,19 @@ void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi d
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
||||||
{
|
{
|
||||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (i < rows && j < cols)
|
if (i < rows && j < cols)
|
||||||
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
|
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
|
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -172,15 +172,15 @@ void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int col
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define CANNY_SHIFT 15
|
#define CANNY_SHIFT 15
|
||||||
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
|
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
|
||||||
|
|
||||||
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
__shared__ float smem[18][18];
|
__shared__ float smem[18][18];
|
||||||
|
|
||||||
const int j = blockIdx.x * 16 + threadIdx.x;
|
const int j = blockIdx.x * 16 + threadIdx.x;
|
||||||
@ -239,13 +239,13 @@ __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag
|
|||||||
|
|
||||||
map.ptr(i + 1)[j + 1] = edge_type;
|
map.ptr(i + 1)[j + 1] = edge_type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CANNY_SHIFT
|
#undef CANNY_SHIFT
|
||||||
#undef TG22
|
#undef TG22
|
||||||
|
|
||||||
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -253,14 +253,14 @@ void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows,
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
__device__ unsigned int counter = 0;
|
__device__ unsigned int counter = 0;
|
||||||
|
|
||||||
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
|
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 120
|
#if __CUDA_ARCH__ >= 120
|
||||||
|
|
||||||
__shared__ int smem[18][18];
|
__shared__ int smem[18][18];
|
||||||
@ -335,10 +335,10 @@ __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int co
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
|
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -346,13 +346,13 @@ void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||||
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||||
|
|
||||||
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
|
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 120
|
#if __CUDA_ARCH__ >= 120
|
||||||
|
|
||||||
const int stack_size = 512;
|
const int stack_size = 512;
|
||||||
@ -441,10 +441,10 @@ __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
|
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||||
|
|
||||||
@ -466,19 +466,19 @@ void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int row
|
|||||||
|
|
||||||
std::swap(st1, st2);
|
std::swap(st1, st2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
|
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
|
||||||
{
|
{
|
||||||
const int j = blockIdx.x * 16 + threadIdx.x;
|
const int j = blockIdx.x * 16 + threadIdx.x;
|
||||||
const int i = blockIdx.y * 16 + threadIdx.y;
|
const int i = blockIdx.y * 16 + threadIdx.y;
|
||||||
|
|
||||||
if (i < rows && j < cols)
|
if (i < rows && j < cols)
|
||||||
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
|
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
|
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16, 1);
|
dim3 block(16, 16, 1);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
|
||||||
|
|
||||||
@ -486,8 +486,6 @@ void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
} // namespace canny
|
||||||
} // namespace canny
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -44,181 +44,181 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/color.hpp"
|
#include "opencv2/gpu/device/color.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
|
||||||
{
|
{
|
||||||
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||||
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
||||||
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream) \
|
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream) \
|
||||||
@ -226,7 +226,7 @@ DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
|||||||
traits::functor_type functor = traits::create_functor(); \
|
traits::functor_type functor = traits::create_functor(); \
|
||||||
typedef typename traits::functor_type::argument_type src_t; \
|
typedef typename traits::functor_type::argument_type src_t; \
|
||||||
typedef typename traits::functor_type::result_type dst_t; \
|
typedef typename traits::functor_type::result_type dst_t; \
|
||||||
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
|
::cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||||
@ -243,138 +243,137 @@ DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
|||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
|
||||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
|
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -47,26 +47,26 @@
|
|||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
#define MAX_KERNEL_SIZE 16
|
|
||||||
#define BLOCK_DIM_X 16
|
|
||||||
#define BLOCK_DIM_Y 4
|
|
||||||
#define RESULT_STEPS 8
|
|
||||||
#define HALO_STEPS 1
|
|
||||||
|
|
||||||
namespace column_filter {
|
|
||||||
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
|
||||||
{
|
{
|
||||||
|
#define MAX_KERNEL_SIZE 16
|
||||||
|
#define BLOCK_DIM_X 16
|
||||||
|
#define BLOCK_DIM_Y 4
|
||||||
|
#define RESULT_STEPS 8
|
||||||
|
#define HALO_STEPS 1
|
||||||
|
|
||||||
|
namespace column_filter
|
||||||
|
{
|
||||||
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
|
void loadKernel(const float kernel[], int ksize)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
||||||
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
|
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
|
||||||
@ -111,11 +111,11 @@ __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int a
|
|||||||
dst.ptr(dstY)[x] = saturate_cast<D>(sum);
|
dst.ptr(dstY)[x] = saturate_cast<D>(sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
|
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
|
||||||
|
|
||||||
@ -126,11 +126,11 @@ void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[5][17] =
|
static const caller_t callers[5][17] =
|
||||||
{
|
{
|
||||||
@ -234,16 +234,14 @@ void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const fl
|
|||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
} // namespace column_filter
|
||||||
} // namespace column_filter
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -43,21 +43,21 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc {
|
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
||||||
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < dst.cols && y < dst.rows)
|
if (x < dst.cols && y < dst.rows)
|
||||||
dst.ptr(y)[x] = src(y - top, x - left);
|
dst.ptr(y)[x] = src(y - top, x - left);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
|
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
|
||||||
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -73,11 +73,11 @@ template <template <typename> class B, typename T> struct CopyMakeBorderDispatch
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
|
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
|
||||||
const T* borderValue, cudaStream_t stream)
|
const T* borderValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
||||||
@ -92,38 +92,36 @@ template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, cons
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);
|
callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
//template void copyMakeBorder_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
//template void copyMakeBorder_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -45,29 +45,29 @@
|
|||||||
#include "opencv2/gpu/device/utility.hpp"
|
#include "opencv2/gpu/device/utility.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
#define UINT_BITS 32U
|
||||||
|
|
||||||
#define UINT_BITS 32U
|
//Warps == subhistograms per threadblock
|
||||||
|
#define WARP_COUNT 6
|
||||||
|
|
||||||
//Warps == subhistograms per threadblock
|
//Threadblock size
|
||||||
#define WARP_COUNT 6
|
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
||||||
|
#define HISTOGRAM256_BIN_COUNT 256
|
||||||
|
|
||||||
//Threadblock size
|
//Shared memory per threadblock
|
||||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
|
||||||
#define HISTOGRAM256_BIN_COUNT 256
|
|
||||||
|
|
||||||
//Shared memory per threadblock
|
#define PARTIAL_HISTOGRAM256_COUNT 240
|
||||||
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
|
|
||||||
|
|
||||||
#define PARTIAL_HISTOGRAM256_COUNT 240
|
#define MERGE_THREADBLOCK_SIZE 256
|
||||||
|
|
||||||
#define MERGE_THREADBLOCK_SIZE 256
|
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
|
||||||
|
|
||||||
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
|
namespace hist
|
||||||
|
{
|
||||||
namespace hist {
|
#if (!USE_SMEM_ATOMICS)
|
||||||
|
|
||||||
#if (!USE_SMEM_ATOMICS)
|
|
||||||
|
|
||||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ namespace hist {
|
|||||||
} while (s_WarpHist[data] != count);
|
} while (s_WarpHist[data] != count);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define TAG_MASK 0xFFFFFFFFU
|
#define TAG_MASK 0xFFFFFFFFU
|
||||||
|
|
||||||
@ -91,20 +91,20 @@ namespace hist {
|
|||||||
atomicAdd(s_WarpHist + data, 1);
|
atomicAdd(s_WarpHist + data, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
|
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
|
||||||
{
|
{
|
||||||
uint x = pos_x << 2;
|
uint x = pos_x << 2;
|
||||||
|
|
||||||
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
|
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
|
||||||
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
|
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
|
||||||
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
|
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
|
||||||
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
|
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
||||||
{
|
{
|
||||||
//Per-warp subhistogram storage
|
//Per-warp subhistogram storage
|
||||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||||
@ -138,17 +138,17 @@ __global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistogra
|
|||||||
|
|
||||||
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
|
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Merge histogram256() output
|
// Merge histogram256() output
|
||||||
// Run one threadblock per bin; each threadblock adds up the same bin counter
|
// Run one threadblock per bin; each threadblock adds up the same bin counter
|
||||||
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
|
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
|
||||||
// takes only a fraction of total processing time
|
// takes only a fraction of total processing time
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
|
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
|
||||||
{
|
{
|
||||||
uint sum = 0;
|
uint sum = 0;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -167,10 +167,10 @@ __global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histog
|
|||||||
|
|
||||||
if(threadIdx.x == 0)
|
if(threadIdx.x == 0)
|
||||||
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
|
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
||||||
DevMem2D_<uint>(src),
|
DevMem2D_<uint>(src),
|
||||||
buf,
|
buf,
|
||||||
@ -185,12 +185,12 @@ void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
__constant__ int c_lut[256];
|
__constant__ int c_lut[256];
|
||||||
|
|
||||||
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
|
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -200,10 +200,10 @@ __global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
|
|||||||
const int lut = c_lut[val];
|
const int lut = c_lut[val];
|
||||||
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
|
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
|
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16);
|
dim3 block(16, 16);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
|
|
||||||
@ -214,8 +214,6 @@ void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
} // namespace hist
|
||||||
} // namespace hist
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -42,31 +42,31 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
// Other values are not supported
|
|
||||||
#define CELL_WIDTH 8
|
|
||||||
#define CELL_HEIGHT 8
|
|
||||||
#define CELLS_PER_BLOCK_X 2
|
|
||||||
#define CELLS_PER_BLOCK_Y 2
|
|
||||||
|
|
||||||
namespace hog {
|
|
||||||
|
|
||||||
__constant__ int cnbins;
|
|
||||||
__constant__ int cblock_stride_x;
|
|
||||||
__constant__ int cblock_stride_y;
|
|
||||||
__constant__ int cnblocks_win_x;
|
|
||||||
__constant__ int cnblocks_win_y;
|
|
||||||
__constant__ int cblock_hist_size;
|
|
||||||
__constant__ int cblock_hist_size_2up;
|
|
||||||
__constant__ int cdescr_size;
|
|
||||||
__constant__ int cdescr_width;
|
|
||||||
|
|
||||||
|
|
||||||
/* Returns the nearest upper power of two, works only for
|
|
||||||
the typical GPU thread count (pert block) values */
|
|
||||||
int power_2up(unsigned int n)
|
|
||||||
{
|
{
|
||||||
|
// Other values are not supported
|
||||||
|
#define CELL_WIDTH 8
|
||||||
|
#define CELL_HEIGHT 8
|
||||||
|
#define CELLS_PER_BLOCK_X 2
|
||||||
|
#define CELLS_PER_BLOCK_Y 2
|
||||||
|
|
||||||
|
namespace hog
|
||||||
|
{
|
||||||
|
__constant__ int cnbins;
|
||||||
|
__constant__ int cblock_stride_x;
|
||||||
|
__constant__ int cblock_stride_y;
|
||||||
|
__constant__ int cnblocks_win_x;
|
||||||
|
__constant__ int cnblocks_win_y;
|
||||||
|
__constant__ int cblock_hist_size;
|
||||||
|
__constant__ int cblock_hist_size_2up;
|
||||||
|
__constant__ int cdescr_size;
|
||||||
|
__constant__ int cdescr_width;
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns the nearest upper power of two, works only for
|
||||||
|
the typical GPU thread count (pert block) values */
|
||||||
|
int power_2up(unsigned int n)
|
||||||
|
{
|
||||||
if (n < 1) return 1;
|
if (n < 1) return 1;
|
||||||
else if (n < 2) return 2;
|
else if (n < 2) return 2;
|
||||||
else if (n < 4) return 4;
|
else if (n < 4) return 4;
|
||||||
@ -79,12 +79,12 @@ int power_2up(unsigned int n)
|
|||||||
else if (n < 512) return 512;
|
else if (n < 512) return 512;
|
||||||
else if (n < 1024) return 1024;
|
else if (n < 1024) return 1024;
|
||||||
return -1; // Input is too big
|
return -1; // Input is too big
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int nblocks_win_x, int nblocks_win_y)
|
int nblocks_win_x, int nblocks_win_y)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||||
@ -102,17 +102,17 @@ void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
|||||||
|
|
||||||
int descr_size = descr_width * nblocks_win_y;
|
int descr_size = descr_width * nblocks_win_y;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
// Histogram computation
|
// Histogram computation
|
||||||
|
|
||||||
|
|
||||||
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
||||||
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
|
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
|
||||||
const PtrElemStep qangle, float scale, float* block_hists)
|
const PtrElemStep qangle, float scale, float* block_hists)
|
||||||
{
|
{
|
||||||
const int block_x = threadIdx.z;
|
const int block_x = threadIdx.z;
|
||||||
const int cell_x = threadIdx.x / 16;
|
const int cell_x = threadIdx.x / 16;
|
||||||
const int cell_y = threadIdx.y;
|
const int cell_y = threadIdx.y;
|
||||||
@ -184,13 +184,13 @@ __global__ void compute_hists_kernel_many_blocks(const int img_block_width, cons
|
|||||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
|
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
|
||||||
if (tid < cblock_hist_size)
|
if (tid < cblock_hist_size)
|
||||||
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
|
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int height, int width, const DevMem2Df& grad,
|
int height, int width, const DevMem2Df& grad,
|
||||||
const DevMem2Db& qangle, float sigma, float* block_hists)
|
const DevMem2Db& qangle, float sigma, float* block_hists)
|
||||||
{
|
{
|
||||||
const int nblocks = 1;
|
const int nblocks = 1;
|
||||||
|
|
||||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||||
@ -215,17 +215,17 @@ void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//-------------------------------------------------------------
|
//-------------------------------------------------------------
|
||||||
// Normalization of histograms via L2Hys_norm
|
// Normalization of histograms via L2Hys_norm
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
template<int size>
|
template<int size>
|
||||||
__device__ float reduce_smem(volatile float* smem)
|
__device__ float reduce_smem(volatile float* smem)
|
||||||
{
|
{
|
||||||
unsigned int tid = threadIdx.x;
|
unsigned int tid = threadIdx.x;
|
||||||
float sum = smem[tid];
|
float sum = smem[tid];
|
||||||
|
|
||||||
@ -247,15 +247,15 @@ __device__ float reduce_smem(volatile float* smem)
|
|||||||
sum = smem[0];
|
sum = smem[0];
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads, // Number of threads which process one block historgam
|
template <int nthreads, // Number of threads which process one block historgam
|
||||||
int nblocks> // Number of block hisograms processed by one GPU thread block
|
int nblocks> // Number of block hisograms processed by one GPU thread block
|
||||||
__global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
|
__global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
|
||||||
const int img_block_width,
|
const int img_block_width,
|
||||||
float* block_hists, float threshold)
|
float* block_hists, float threshold)
|
||||||
{
|
{
|
||||||
if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width)
|
if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -287,12 +287,12 @@ __global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
|
|||||||
|
|
||||||
if (threadIdx.x < block_hist_size)
|
if (threadIdx.x < block_hist_size)
|
||||||
hist[0] = elem * scale;
|
hist[0] = elem * scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int height, int width, float* block_hists, float threshold)
|
int height, int width, float* block_hists, float threshold)
|
||||||
{
|
{
|
||||||
const int nblocks = 1;
|
const int nblocks = 1;
|
||||||
|
|
||||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||||
@ -319,21 +319,21 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------
|
//---------------------------------------------------------------------
|
||||||
// Linear SVM based classification
|
// Linear SVM based classification
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads, // Number of threads per one histogram block
|
template <int nthreads, // Number of threads per one histogram block
|
||||||
int nblocks> // Number of histogram block processed by single GPU thread block
|
int nblocks> // Number of histogram block processed by single GPU thread block
|
||||||
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
|
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
|
||||||
const int win_block_stride_x, const int win_block_stride_y,
|
const int win_block_stride_x, const int win_block_stride_y,
|
||||||
const float* block_hists, const float* coefs,
|
const float* block_hists, const float* coefs,
|
||||||
float free_coef, float threshold, unsigned char* labels)
|
float free_coef, float threshold, unsigned char* labels)
|
||||||
{
|
{
|
||||||
const int win_x = threadIdx.z;
|
const int win_x = threadIdx.z;
|
||||||
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
|
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
|
||||||
return;
|
return;
|
||||||
@ -386,13 +386,13 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const
|
|||||||
|
|
||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
labels[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] = (product + free_coef >= threshold);
|
labels[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] = (product + free_coef >= threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||||
float* coefs, float free_coef, float threshold, unsigned char* labels)
|
float* coefs, float free_coef, float threshold, unsigned char* labels)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
const int nblocks = 1;
|
const int nblocks = 1;
|
||||||
|
|
||||||
@ -413,16 +413,16 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
// Extract descriptors
|
// Extract descriptors
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads>
|
template <int nthreads>
|
||||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||||
const float* block_hists, PtrElemStepf descriptors)
|
const float* block_hists, PtrElemStepf descriptors)
|
||||||
{
|
{
|
||||||
// Get left top corner of the window in src
|
// Get left top corner of the window in src
|
||||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||||
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
||||||
@ -437,12 +437,12 @@ __global__ void extract_descrs_by_rows_kernel(const int img_block_width, const i
|
|||||||
int offset_x = i - offset_y * cdescr_width;
|
int offset_x = i - offset_y * cdescr_width;
|
||||||
descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
|
descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||||
int height, int width, float* block_hists, DevMem2Df descriptors)
|
int height, int width, float* block_hists, DevMem2Df descriptors)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
int win_block_stride_x = win_stride_x / block_stride_x;
|
int win_block_stride_x = win_stride_x / block_stride_x;
|
||||||
@ -458,14 +458,14 @@ void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, i
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads>
|
template <int nthreads>
|
||||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
||||||
const int win_block_stride_y, const float* block_hists,
|
const int win_block_stride_y, const float* block_hists,
|
||||||
PtrElemStepf descriptors)
|
PtrElemStepf descriptors)
|
||||||
{
|
{
|
||||||
// Get left top corner of the window in src
|
// Get left top corner of the window in src
|
||||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||||
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
||||||
@ -485,13 +485,13 @@ __global__ void extract_descrs_by_cols_kernel(const int img_block_width, const i
|
|||||||
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
|
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
|
||||||
= hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
|
= hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||||
DevMem2Df descriptors)
|
DevMem2Df descriptors)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
int win_block_stride_x = win_stride_x / block_stride_x;
|
int win_block_stride_x = win_stride_x / block_stride_x;
|
||||||
@ -507,16 +507,16 @@ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, i
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
// Gradients computation
|
// Gradients computation
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads, int correct_gamma>
|
template <int nthreads, int correct_gamma>
|
||||||
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
|
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
|
||||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
const uchar4* row = (const uchar4*)img.ptr(blockIdx.y);
|
const uchar4* row = (const uchar4*)img.ptr(blockIdx.y);
|
||||||
@ -613,12 +613,12 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
|
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
|
||||||
((float2*)grad.ptr(blockIdx.y))[x] = make_float2(mag0 * (1.f - ang), mag0 * ang);
|
((float2*)grad.ptr(blockIdx.y))[x] = make_float2(mag0 * (1.f - ang), mag0 * ang);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
|
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
|
||||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
dim3 bdim(nthreads, 1);
|
dim3 bdim(nthreads, 1);
|
||||||
@ -632,12 +632,12 @@ void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& i
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int nthreads, int correct_gamma>
|
template <int nthreads, int correct_gamma>
|
||||||
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
|
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
|
||||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
const unsigned char* row = (const unsigned char*)img.ptr(blockIdx.y);
|
const unsigned char* row = (const unsigned char*)img.ptr(blockIdx.y);
|
||||||
@ -685,12 +685,12 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl
|
|||||||
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
|
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
|
||||||
((float2*) grad.ptr(blockIdx.y))[x] = make_float2(mag * (1.f - ang), mag * ang);
|
((float2*) grad.ptr(blockIdx.y))[x] = make_float2(mag * (1.f - ang), mag * ang);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
|
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
|
||||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
dim3 bdim(nthreads, 1);
|
dim3 bdim(nthreads, 1);
|
||||||
@ -704,27 +704,27 @@ void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& i
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//-------------------------------------------------------------------
|
//-------------------------------------------------------------------
|
||||||
// Resize
|
// Resize
|
||||||
|
|
||||||
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
|
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
|
||||||
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
|
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
|
||||||
|
|
||||||
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar> dst, int colOfs)
|
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar> dst, int colOfs)
|
||||||
{
|
{
|
||||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < dst.cols && y < dst.rows)
|
if (x < dst.cols && y < dst.rows)
|
||||||
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
|
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst, int colOfs)
|
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst, int colOfs)
|
||||||
{
|
{
|
||||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -733,11 +733,11 @@ __global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst,
|
|||||||
float4 val = tex2D(resize8UC4_tex, x * sx + colOfs, y * sy);
|
float4 val = tex2D(resize8UC4_tex, x * sx + colOfs, y * sy);
|
||||||
dst.ptr(y)[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
|
dst.ptr(y)[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class TEX>
|
template<class T, class TEX>
|
||||||
static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
||||||
{
|
{
|
||||||
tex.filterMode = cudaFilterModeLinear;
|
tex.filterMode = cudaFilterModeLinear;
|
||||||
|
|
||||||
size_t texOfs = 0;
|
size_t texOfs = 0;
|
||||||
@ -765,11 +765,9 @@ static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
||||||
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
||||||
|
} // namespace hog
|
||||||
} // namespace hog
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -46,18 +46,18 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||||
|
|
||||||
namespace imgproc {
|
texture<uchar4, 2> tex_meanshift;
|
||||||
|
|
||||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
||||||
|
|
||||||
texture<uchar4, 2> tex_meanshift;
|
|
||||||
|
|
||||||
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
|
||||||
size_t out_step, int cols, int rows,
|
size_t out_step, int cols, int rows,
|
||||||
int sp, int sr, int maxIter, float eps)
|
int sp, int sr, int maxIter, float eps)
|
||||||
{
|
{
|
||||||
int isr2 = sr*sr;
|
int isr2 = sr*sr;
|
||||||
uchar4 c = tex2D(tex_meanshift, x0, y0 );
|
uchar4 c = tex2D(tex_meanshift, x0, y0 );
|
||||||
|
|
||||||
@ -117,22 +117,22 @@ __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
|||||||
*(uchar4*)(out + base) = c;
|
*(uchar4*)(out + base) = c;
|
||||||
|
|
||||||
return make_short2((short)x0, (short)y0);
|
return make_short2((short)x0, (short)y0);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
|
__global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
|
||||||
{
|
{
|
||||||
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if( x0 < cols && y0 < rows )
|
if( x0 < cols && y0 < rows )
|
||||||
do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
|
do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
|
__global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
|
||||||
unsigned char* outsp, size_t outspstep,
|
unsigned char* outsp, size_t outspstep,
|
||||||
int cols, int rows,
|
int cols, int rows,
|
||||||
int sp, int sr, int maxIter, float eps)
|
int sp, int sr, int maxIter, float eps)
|
||||||
{
|
{
|
||||||
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -141,10 +141,10 @@ __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
|
|||||||
int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
|
int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
|
||||||
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
|
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
@ -160,10 +160,10 @@ void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr,
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
@ -179,13 +179,13 @@ void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, in
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
||||||
{
|
{
|
||||||
unsigned int H = ((ndisp-d) * 240)/ndisp;
|
unsigned int H = ((ndisp-d) * 240)/ndisp;
|
||||||
|
|
||||||
unsigned int hi = (H/60) % 6;
|
unsigned int hi = (H/60) % 6;
|
||||||
@ -243,10 +243,10 @@ __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
|||||||
const unsigned int a = 255U;
|
const unsigned int a = 255U;
|
||||||
|
|
||||||
return (a << 24) + (r << 16) + (g << 8) + b;
|
return (a << 24) + (r << 16) + (g << 8) + b;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
__global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
||||||
{
|
{
|
||||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -263,10 +263,10 @@ __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, s
|
|||||||
uint4* line = (uint4*)(out_image + y * out_step);
|
uint4* line = (uint4*)(out_image + y * out_step);
|
||||||
line[x >> 2] = res;
|
line[x >> 2] = res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
__global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
||||||
{
|
{
|
||||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -281,11 +281,11 @@ __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, s
|
|||||||
uint2* line = (uint2*)(out_image + y * out_step);
|
uint2* line = (uint2*)(out_image + y * out_step);
|
||||||
line[x >> 1] = res;
|
line[x >> 1] = res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
grid.x = divUp(src.cols, threads.x << 2);
|
grid.x = divUp(src.cols, threads.x << 2);
|
||||||
@ -296,10 +296,10 @@ void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, co
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
grid.x = divUp(src.cols, threads.x << 1);
|
grid.x = divUp(src.cols, threads.x << 1);
|
||||||
@ -310,15 +310,15 @@ void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int nd
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||||
|
|
||||||
__constant__ float cq[16];
|
__constant__ float cq[16];
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void reprojectImageTo3D(const T* disp, size_t disp_step, float* xyzw, size_t xyzw_step, int rows, int cols)
|
__global__ void reprojectImageTo3D(const T* disp, size_t disp_step, float* xyzw, size_t xyzw_step, int rows, int cols)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -344,11 +344,11 @@ __global__ void reprojectImageTo3D(const T* disp, size_t disp_step, float* xyzw,
|
|||||||
|
|
||||||
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
|
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
grid.x = divUp(disp.cols, threads.x);
|
grid.x = divUp(disp.cols, threads.x);
|
||||||
@ -361,23 +361,23 @@ inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df&
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
reprojectImageTo3D_caller(disp, xyzw, q, stream);
|
reprojectImageTo3D_caller(disp, xyzw, q, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
reprojectImageTo3D_caller(disp, xyzw, q, stream);
|
reprojectImageTo3D_caller(disp, xyzw, q, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////// Extract Cov Data ////////////////////////////////////////////////
|
//////////////////////////////////////// Extract Cov Data ////////////////////////////////////////////////
|
||||||
|
|
||||||
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx,
|
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx,
|
||||||
const PtrStepf Dy, PtrStepf dst)
|
const PtrStepf Dy, PtrStepf dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -390,10 +390,10 @@ __global__ void extractCovData_kernel(const int cols, const int rows, const PtrS
|
|||||||
dst.ptr(y + rows)[x] = dx * dy;
|
dst.ptr(y + rows)[x] = dx * dy;
|
||||||
dst.ptr(y + (rows << 1))[x] = dy * dy;
|
dst.ptr(y + (rows << 1))[x] = dy * dy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream)
|
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
|
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
|
||||||
|
|
||||||
@ -402,16 +402,16 @@ void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
||||||
|
|
||||||
texture<float, 2> harrisDxTex;
|
texture<float, 2> harrisDxTex;
|
||||||
texture<float, 2> harrisDyTex;
|
texture<float, 2> harrisDyTex;
|
||||||
|
|
||||||
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
|
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
|
||||||
PtrStepb dst)
|
PtrStepb dst)
|
||||||
{
|
{
|
||||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -440,12 +440,12 @@ __global__ void cornerHarris_kernel(const int cols, const int rows, const int bl
|
|||||||
|
|
||||||
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
|
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename BR, typename BC>
|
template <typename BR, typename BC>
|
||||||
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
|
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
|
||||||
PtrStepb dst, BR border_row, BC border_col)
|
PtrStepb dst, BR border_row, BC border_col)
|
||||||
{
|
{
|
||||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -476,11 +476,11 @@ __global__ void cornerHarris_kernel(const int cols, const int rows, const int bl
|
|||||||
|
|
||||||
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
|
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
|
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
|
||||||
int border_type, cudaStream_t stream)
|
int border_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int rows = Dx.rows;
|
const int rows = Dx.rows;
|
||||||
const int cols = Dx.cols;
|
const int cols = Dx.cols;
|
||||||
|
|
||||||
@ -516,16 +516,16 @@ void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx
|
|||||||
|
|
||||||
//cudaSafeCall(cudaUnbindTexture(harrisDxTex));
|
//cudaSafeCall(cudaUnbindTexture(harrisDxTex));
|
||||||
//cudaSafeCall(cudaUnbindTexture(harrisDyTex));
|
//cudaSafeCall(cudaUnbindTexture(harrisDyTex));
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
|
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
|
||||||
|
|
||||||
texture<float, 2> minEigenValDxTex;
|
texture<float, 2> minEigenValDxTex;
|
||||||
texture<float, 2> minEigenValDyTex;
|
texture<float, 2> minEigenValDyTex;
|
||||||
|
|
||||||
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
|
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
|
||||||
PtrStepb dst)
|
PtrStepb dst)
|
||||||
{
|
{
|
||||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -556,13 +556,13 @@ __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const i
|
|||||||
c *= 0.5f;
|
c *= 0.5f;
|
||||||
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename BR, typename BC>
|
template <typename BR, typename BC>
|
||||||
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
|
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
|
||||||
PtrStepb dst, BR border_row, BC border_col)
|
PtrStepb dst, BR border_row, BC border_col)
|
||||||
{
|
{
|
||||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -595,11 +595,11 @@ __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const i
|
|||||||
c *= 0.5f;
|
c *= 0.5f;
|
||||||
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
|
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
|
||||||
int border_type, cudaStream_t stream)
|
int border_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int rows = Dx.rows;
|
const int rows = Dx.rows;
|
||||||
const int cols = Dx.cols;
|
const int cols = Dx.cols;
|
||||||
|
|
||||||
@ -635,12 +635,12 @@ void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const De
|
|||||||
|
|
||||||
//cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
|
//cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
|
||||||
//cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
|
//cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////// Column Sum //////////////////////////////////////
|
////////////////////////////// Column Sum //////////////////////////////////////
|
||||||
|
|
||||||
__global__ void column_sumKernel_32F(int cols, int rows, const PtrStepb src, const PtrStepb dst)
|
__global__ void column_sumKernel_32F(int cols, int rows, const PtrStepb src, const PtrStepb dst)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
if (x < cols)
|
if (x < cols)
|
||||||
@ -657,11 +657,11 @@ __global__ void column_sumKernel_32F(int cols, int rows, const PtrStepb src, con
|
|||||||
dst_data += dst.step;
|
dst_data += dst.step;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst)
|
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(src.cols, threads.x));
|
dim3 grid(divUp(src.cols, threads.x));
|
||||||
|
|
||||||
@ -669,14 +669,14 @@ void columnSum_32F(const DevMem2Db src, const DevMem2Db dst)
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums
|
// mulSpectrums
|
||||||
|
|
||||||
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -684,11 +684,11 @@ __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<
|
|||||||
{
|
{
|
||||||
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
|
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
|
|
||||||
@ -697,14 +697,14 @@ void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums_CONJ
|
// mulSpectrums_CONJ
|
||||||
|
|
||||||
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -712,11 +712,11 @@ __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const Ptr
|
|||||||
{
|
{
|
||||||
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
|
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
|
|
||||||
@ -725,14 +725,14 @@ void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums
|
// mulAndScaleSpectrums
|
||||||
|
|
||||||
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -741,11 +741,11 @@ __global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const
|
|||||||
cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
|
cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
|
||||||
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
|
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
|
|
||||||
@ -754,14 +754,14 @@ void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComp
|
|||||||
|
|
||||||
if (stream)
|
if (stream)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums_CONJ
|
// mulAndScaleSpectrums_CONJ
|
||||||
|
|
||||||
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -770,11 +770,11 @@ __global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, c
|
|||||||
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
|
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
|
||||||
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
|
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
|
|
||||||
@ -783,26 +783,26 @@ void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cuff
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpMaps
|
// buildWarpMaps
|
||||||
|
|
||||||
// TODO use intrinsics like __sinf and so on
|
// TODO use intrinsics like __sinf and so on
|
||||||
|
|
||||||
namespace build_warp_maps
|
namespace build_warp_maps
|
||||||
{
|
{
|
||||||
|
|
||||||
__constant__ float ck_rinv[9];
|
__constant__ float ck_rinv[9];
|
||||||
__constant__ float cr_kinv[9];
|
__constant__ float cr_kinv[9];
|
||||||
__constant__ float ct[3];
|
__constant__ float ct[3];
|
||||||
__constant__ float cscale;
|
__constant__ float cscale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class PlaneMapper
|
class PlaneMapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||||
{
|
{
|
||||||
using namespace build_warp_maps;
|
using namespace build_warp_maps;
|
||||||
@ -818,12 +818,12 @@ public:
|
|||||||
x /= z;
|
x /= z;
|
||||||
y /= z;
|
y /= z;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class CylindricalMapper
|
class CylindricalMapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||||
{
|
{
|
||||||
using namespace build_warp_maps;
|
using namespace build_warp_maps;
|
||||||
@ -841,12 +841,12 @@ public:
|
|||||||
if (z > 0) { x /= z; y /= z; }
|
if (z > 0) { x /= z; y /= z; }
|
||||||
else x = y = -1;
|
else x = y = -1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class SphericalMapper
|
class SphericalMapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||||
{
|
{
|
||||||
using namespace build_warp_maps;
|
using namespace build_warp_maps;
|
||||||
@ -867,13 +867,13 @@ public:
|
|||||||
if (z > 0) { x /= z; y /= z; }
|
if (z > 0) { x /= z; y /= z; }
|
||||||
else x = y = -1;
|
else x = y = -1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename Mapper>
|
template <typename Mapper>
|
||||||
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
||||||
PtrStepf map_x, PtrStepf map_y)
|
PtrStepf map_x, PtrStepf map_y)
|
||||||
{
|
{
|
||||||
int du = blockIdx.x * blockDim.x + threadIdx.x;
|
int du = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int dv = blockIdx.y * blockDim.y + threadIdx.y;
|
int dv = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
if (du < cols && dv < rows)
|
if (du < cols && dv < rows)
|
||||||
@ -885,13 +885,13 @@ __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
|||||||
map_x.ptr(dv)[du] = x;
|
map_x.ptr(dv)[du] = x;
|
||||||
map_y.ptr(dv)[du] = y;
|
map_y.ptr(dv)[du] = y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||||
float scale, cudaStream_t stream)
|
float scale, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||||
@ -907,13 +907,13 @@ void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
|||||||
cudaSafeCall(cudaGetLastError());
|
cudaSafeCall(cudaGetLastError());
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||||
@ -928,13 +928,13 @@ void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map
|
|||||||
cudaSafeCall(cudaGetLastError());
|
cudaSafeCall(cudaGetLastError());
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||||
@ -949,18 +949,18 @@ void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y
|
|||||||
cudaSafeCall(cudaGetLastError());
|
cudaSafeCall(cudaGetLastError());
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// convolve
|
// convolve
|
||||||
|
|
||||||
#define CONVOLVE_MAX_KERNEL_SIZE 17
|
#define CONVOLVE_MAX_KERNEL_SIZE 17
|
||||||
|
|
||||||
__constant__ float c_convolveKernel[CONVOLVE_MAX_KERNEL_SIZE * CONVOLVE_MAX_KERNEL_SIZE];
|
__constant__ float c_convolveKernel[CONVOLVE_MAX_KERNEL_SIZE * CONVOLVE_MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
__global__ void convolve(const DevMem2Df src, PtrStepf dst, int kWidth, int kHeight)
|
__global__ void convolve(const DevMem2Df src, PtrStepf dst, int kWidth, int kHeight)
|
||||||
{
|
{
|
||||||
__shared__ float smem[16 + 2 * 8][16 + 2 * 8];
|
__shared__ float smem[16 + 2 * 8][16 + 2 * 8];
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -1014,10 +1014,10 @@ __global__ void convolve(const DevMem2Df src, PtrStepf dst, int kWidth, int kHei
|
|||||||
|
|
||||||
dst.ptr(y)[x] = res;
|
dst.ptr(y)[x] = res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream)
|
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(c_convolveKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
cudaSafeCall(cudaMemcpyToSymbol(c_convolveKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||||
|
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
@ -1028,8 +1028,6 @@ void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -50,7 +50,7 @@
|
|||||||
#include "safe_call.hpp"
|
#include "safe_call.hpp"
|
||||||
|
|
||||||
#ifndef CV_PI
|
#ifndef CV_PI
|
||||||
#define CV_PI 3.1415926535897932384626433832795f
|
#define CV_PI 3.1415926535897932384626433832795
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CV_PI_F
|
#ifndef CV_PI_F
|
||||||
@ -61,27 +61,21 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device {
|
|
||||||
#define END_OPENCV_DEVICE_NAMESPACE }}}
|
|
||||||
#define OPENCV_DEVICE_NAMESPACE ::cv::gpu::device
|
|
||||||
#define OPENCV_DEVICE_NAMESPACE_ ::cv::gpu::device::
|
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
typedef unsigned char uchar;
|
|
||||||
typedef unsigned short ushort;
|
|
||||||
typedef signed char schar;
|
|
||||||
typedef unsigned int uint;
|
|
||||||
|
|
||||||
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
|
|
||||||
{
|
{
|
||||||
|
typedef unsigned char uchar;
|
||||||
|
typedef unsigned short ushort;
|
||||||
|
typedef signed char schar;
|
||||||
|
typedef unsigned int uint;
|
||||||
|
|
||||||
|
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
|
||||||
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -102,87 +96,6 @@ namespace cv { namespace gpu
|
|||||||
|
|
||||||
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
||||||
|
|
||||||
/*template<class T> static inline void uploadConstant(const char* name, const T& value)
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) );
|
|
||||||
} */
|
|
||||||
|
|
||||||
//template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img)
|
|
||||||
//{
|
|
||||||
// //!!!! const_cast is disabled!
|
|
||||||
// //!!!! Please use constructor of 'class texture' instead.
|
|
||||||
//
|
|
||||||
// //textureReference* tex;
|
|
||||||
// //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) );
|
|
||||||
// //tex->normalized = normalized;
|
|
||||||
// //tex->filterMode = filterMode;
|
|
||||||
// //tex->addressMode[0] = addrMode;
|
|
||||||
// //tex->addressMode[1] = addrMode;
|
|
||||||
//
|
|
||||||
// const textureReference* tex;
|
|
||||||
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
|
||||||
//
|
|
||||||
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
|
||||||
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
|
||||||
//}
|
|
||||||
|
|
||||||
//static inline void unbindTexture(const char *name)
|
|
||||||
//{
|
|
||||||
// const textureReference* tex;
|
|
||||||
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
|
||||||
// cudaSafeCall( cudaUnbindTexture(tex) );
|
|
||||||
//}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//class TextureBinder
|
|
||||||
//{
|
|
||||||
//public:
|
|
||||||
// TextureBinder() : tex_(0) {}
|
|
||||||
// template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
|
|
||||||
// {
|
|
||||||
// bind(tex, img);
|
|
||||||
// }
|
|
||||||
// template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
|
|
||||||
// {
|
|
||||||
// bind(tex_name, img);
|
|
||||||
// }
|
|
||||||
// ~TextureBinder() { unbind(); }
|
|
||||||
//
|
|
||||||
// template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
|
|
||||||
// {
|
|
||||||
// unbind();
|
|
||||||
//
|
|
||||||
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
|
||||||
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
|
||||||
//
|
|
||||||
// tex_ = tex;
|
|
||||||
// }
|
|
||||||
// template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
|
|
||||||
// {
|
|
||||||
// const textureReference* tex;
|
|
||||||
// cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
|
|
||||||
// bind(tex, img);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// void unbind()
|
|
||||||
// {
|
|
||||||
// if (tex_)
|
|
||||||
// {
|
|
||||||
// cudaUnbindTexture(tex_);
|
|
||||||
// tex_ = 0;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//private:
|
|
||||||
// const textureReference* tex_;
|
|
||||||
//};
|
|
||||||
|
|
||||||
class NppStreamHandler
|
class NppStreamHandler
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -43,46 +43,46 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace match_template {
|
|
||||||
|
|
||||||
__device__ __forceinline__ float sum(float v) { return v; }
|
|
||||||
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
|
||||||
__device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
|
|
||||||
__device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
|
|
||||||
|
|
||||||
__device__ __forceinline__ float first(float v) { return v; }
|
|
||||||
__device__ __forceinline__ float first(float2 v) { return v.x; }
|
|
||||||
__device__ __forceinline__ float first(float3 v) { return v.x; }
|
|
||||||
__device__ __forceinline__ float first(float4 v) { return v.x; }
|
|
||||||
|
|
||||||
__device__ __forceinline__ float mul(float a, float b) { return a * b; }
|
|
||||||
__device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
|
||||||
__device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
|
||||||
__device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
|
||||||
|
|
||||||
__device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
|
|
||||||
__device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
|
||||||
__device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
|
||||||
__device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
|
||||||
|
|
||||||
__device__ __forceinline__ float sub(float a, float b) { return a - b; }
|
|
||||||
__device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
|
||||||
__device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
|
||||||
__device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
|
||||||
|
|
||||||
__device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
|
|
||||||
__device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
|
||||||
__device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
|
||||||
__device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Naive_CCORR
|
|
||||||
|
|
||||||
template <typename T, int cn>
|
|
||||||
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
|
||||||
{
|
{
|
||||||
|
namespace match_template
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ float sum(float v) { return v; }
|
||||||
|
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
||||||
|
__device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
|
||||||
|
__device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float first(float v) { return v; }
|
||||||
|
__device__ __forceinline__ float first(float2 v) { return v.x; }
|
||||||
|
__device__ __forceinline__ float first(float3 v) { return v.x; }
|
||||||
|
__device__ __forceinline__ float first(float4 v) { return v.x; }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float mul(float a, float b) { return a * b; }
|
||||||
|
__device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||||
|
__device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||||
|
__device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
|
||||||
|
__device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||||
|
__device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||||
|
__device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float sub(float a, float b) { return a - b; }
|
||||||
|
__device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||||
|
__device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||||
|
__device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
|
||||||
|
__device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||||
|
__device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||||
|
__device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Naive_CCORR
|
||||||
|
|
||||||
|
template <typename T, int cn>
|
||||||
|
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
||||||
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
|
|
||||||
@ -103,11 +103,11 @@ __global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb imag
|
|||||||
|
|
||||||
result.ptr(y)[x] = sum(res);
|
result.ptr(y)[x] = sum(res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
void matchTemplateNaive_CCORR(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
void matchTemplateNaive_CCORR(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -116,10 +116,10 @@ void matchTemplateNaive_CCORR(const DevMem2Db image, const DevMem2Db templ, DevM
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -128,11 +128,11 @@ void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ,
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](image, templ, result, stream);
|
callers[cn](image, templ, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -141,14 +141,14 @@ void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, D
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](image, templ, result, stream);
|
callers[cn](image, templ, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Naive_SQDIFF
|
// Naive_SQDIFF
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
|
|
||||||
@ -173,11 +173,11 @@ __global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb ima
|
|||||||
|
|
||||||
result.ptr(y)[x] = sum(res);
|
result.ptr(y)[x] = sum(res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
void matchTemplateNaive_SQDIFF(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -186,10 +186,10 @@ void matchTemplateNaive_SQDIFF(const DevMem2Db image, const DevMem2Db templ, Dev
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -198,10 +198,10 @@ void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ,
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](image, templ, result, stream);
|
callers[cn](image, templ, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -210,14 +210,14 @@ void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ,
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](image, templ, result, stream);
|
callers[cn](image, templ, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_SQDIFF
|
// Prepared_SQDIFF
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -229,11 +229,11 @@ __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrSte
|
|||||||
float ccorr = result.ptr(y)[x];
|
float ccorr = result.ptr(y)[x];
|
||||||
result.ptr(y)[x] = image_sqsum_ - 2.f * ccorr + templ_sqsum;
|
result.ptr(y)[x] = image_sqsum_ - 2.f * ccorr + templ_sqsum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream)
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -242,11 +242,11 @@ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -255,37 +255,37 @@ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_SQDIFF_NORMED
|
// Prepared_SQDIFF_NORMED
|
||||||
|
|
||||||
// normAcc* are accurate normalization routines which make GPU matchTemplate
|
// normAcc* are accurate normalization routines which make GPU matchTemplate
|
||||||
// consistent with CPU one
|
// consistent with CPU one
|
||||||
|
|
||||||
__device__ float normAcc(float num, float denum)
|
__device__ float normAcc(float num, float denum)
|
||||||
{
|
{
|
||||||
if (::fabs(num) < denum)
|
if (::fabs(num) < denum)
|
||||||
return num / denum;
|
return num / denum;
|
||||||
if (::fabs(num) < denum * 1.125f)
|
if (::fabs(num) < denum * 1.125f)
|
||||||
return num > 0 ? 1 : -1;
|
return num > 0 ? 1 : -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__device__ float normAcc_SQDIFF(float num, float denum)
|
__device__ float normAcc_SQDIFF(float num, float denum)
|
||||||
{
|
{
|
||||||
if (::fabs(num) < denum)
|
if (::fabs(num) < denum)
|
||||||
return num / denum;
|
return num / denum;
|
||||||
if (::fabs(num) < denum * 1.125f)
|
if (::fabs(num) < denum * 1.125f)
|
||||||
return num > 0 ? 1 : -1;
|
return num > 0 ? 1 : -1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -298,12 +298,12 @@ __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const
|
|||||||
result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum,
|
result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum,
|
||||||
sqrtf(image_sqsum_ * templ_sqsum));
|
sqrtf(image_sqsum_ * templ_sqsum));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -312,12 +312,12 @@ void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsign
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum,
|
||||||
DevMem2Df result, int cn, cudaStream_t stream)
|
DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -325,13 +325,13 @@ void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsign
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_CCOFF
|
// Prepared_CCOFF
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -343,10 +343,10 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_s
|
|||||||
float ccorr = result.ptr(y)[x];
|
float ccorr = result.ptr(y)[x];
|
||||||
result.ptr(y)[x] = ccorr - image_sum_ * templ_sum_scale;
|
result.ptr(y)[x] = ccorr - image_sum_ * templ_sum_scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream)
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -355,16 +355,16 @@ void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int>
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
||||||
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<unsigned int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<unsigned int> image_sum_g,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -380,15 +380,15 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
|||||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||||
- image_sum_g_ * templ_sum_scale_g;
|
- image_sum_g_ * templ_sum_scale_g;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC2(
|
void matchTemplatePrepared_CCOFF_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const DevMem2D_<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const DevMem2D_<unsigned int> image_sum_g,
|
||||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -399,11 +399,11 @@ void matchTemplatePrepared_CCOFF_8UC2(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
float templ_sum_scale_r,
|
float templ_sum_scale_r,
|
||||||
float templ_sum_scale_g,
|
float templ_sum_scale_g,
|
||||||
@ -412,7 +412,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
|||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<unsigned int> image_sum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<unsigned int> image_sum_b,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -432,9 +432,9 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
|||||||
- image_sum_g_ * templ_sum_scale_g
|
- image_sum_g_ * templ_sum_scale_g
|
||||||
- image_sum_b_ * templ_sum_scale_b;
|
- image_sum_b_ * templ_sum_scale_b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC3(
|
void matchTemplatePrepared_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const DevMem2D_<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const DevMem2D_<unsigned int> image_sum_g,
|
||||||
@ -443,7 +443,7 @@ void matchTemplatePrepared_CCOFF_8UC3(
|
|||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -457,11 +457,11 @@ void matchTemplatePrepared_CCOFF_8UC3(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
__global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
float templ_sum_scale_r,
|
float templ_sum_scale_r,
|
||||||
float templ_sum_scale_g,
|
float templ_sum_scale_g,
|
||||||
@ -472,7 +472,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
|||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<unsigned int> image_sum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a,
|
const PtrStep<unsigned int> image_sum_a,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -496,9 +496,9 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
|||||||
- image_sum_b_ * templ_sum_scale_b
|
- image_sum_b_ * templ_sum_scale_b
|
||||||
- image_sum_a_ * templ_sum_scale_a;
|
- image_sum_a_ * templ_sum_scale_a;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC4(
|
void matchTemplatePrepared_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const DevMem2D_<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const DevMem2D_<unsigned int> image_sum_g,
|
||||||
@ -509,7 +509,7 @@ void matchTemplatePrepared_CCOFF_8UC4(
|
|||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
unsigned int templ_sum_a,
|
unsigned int templ_sum_a,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -525,18 +525,18 @@ void matchTemplatePrepared_CCOFF_8UC4(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_CCOFF_NORMED
|
// Prepared_CCOFF_NORMED
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale, float templ_sqsum_scale,
|
float templ_sum_scale, float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum,
|
const PtrStep<unsigned int> image_sum,
|
||||||
const PtrStep<unsigned long long> image_sqsum,
|
const PtrStep<unsigned long long> image_sqsum,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -552,14 +552,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
|||||||
result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale,
|
result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale,
|
||||||
sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)));
|
sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
||||||
const DevMem2D_<unsigned long long> image_sqsum,
|
const DevMem2D_<unsigned long long> image_sqsum,
|
||||||
unsigned int templ_sum, unsigned int templ_sqsum,
|
unsigned int templ_sum, unsigned int templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -574,18 +574,18 @@ void matchTemplatePrepared_CCOFF_NORMED_8U(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
|
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
|
||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g,
|
float templ_sum_scale_r, float templ_sum_scale_g,
|
||||||
float templ_sqsum_scale,
|
float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -610,16 +610,16 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
|
|||||||
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_));
|
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_));
|
||||||
result.ptr(y)[x] = normAcc(num, denum);
|
result.ptr(y)[x] = normAcc(num, denum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||||
unsigned int templ_sum_r, unsigned int templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned int templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned int templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned int templ_sqsum_g,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -640,11 +640,11 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||||
float templ_sqsum_scale,
|
float templ_sqsum_scale,
|
||||||
@ -652,7 +652,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
|||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -685,9 +685,9 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
|||||||
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_));
|
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_));
|
||||||
result.ptr(y)[x] = normAcc(num, denum);
|
result.ptr(y)[x] = normAcc(num, denum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||||
@ -696,7 +696,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
|||||||
unsigned int templ_sum_g, unsigned int templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned int templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned int templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned int templ_sqsum_b,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -720,11 +720,11 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
||||||
int w, int h, float weight,
|
int w, int h, float weight,
|
||||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||||
float templ_sum_scale_a, float templ_sqsum_scale,
|
float templ_sum_scale_a, float templ_sqsum_scale,
|
||||||
@ -733,7 +733,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
|||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
||||||
DevMem2Df result)
|
DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -772,9 +772,9 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
|||||||
+ image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_));
|
+ image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_));
|
||||||
result.ptr(y)[x] = normAcc(num, denum);
|
result.ptr(y)[x] = normAcc(num, denum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||||
@ -785,7 +785,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
|||||||
unsigned int templ_sum_b, unsigned int templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned int templ_sqsum_b,
|
||||||
unsigned int templ_sum_a, unsigned int templ_sqsum_a,
|
unsigned int templ_sum_a, unsigned int templ_sqsum_a,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
DevMem2Df result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -812,16 +812,16 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// normalize
|
// normalize
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void normalizeKernel_8U(
|
__global__ void normalizeKernel_8U(
|
||||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||||
unsigned int templ_sqsum, DevMem2Df result)
|
unsigned int templ_sqsum, DevMem2Df result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -832,11 +832,11 @@ __global__ void normalizeKernel_8U(
|
|||||||
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
|
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
|
||||||
result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum));
|
result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
||||||
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream)
|
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -860,14 +860,14 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// extractFirstChannel
|
// extractFirstChannel
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void extractFirstChannel_32F(const PtrStepb image, DevMem2Df result)
|
__global__ void extractFirstChannel_32F(const PtrStepb image, DevMem2Df result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
|
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
@ -878,10 +878,10 @@ __global__ void extractFirstChannel_32F(const PtrStepb image, DevMem2Df result)
|
|||||||
Typef val = ((const Typef*)image.ptr(y))[x];
|
Typef val = ((const Typef*)image.ptr(y))[x];
|
||||||
result.ptr(y)[x] = first(val);
|
result.ptr(y)[x] = first(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream)
|
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
|
||||||
@ -904,8 +904,6 @@ void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cu
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
} //namespace match_template
|
||||||
} //namespace match_template
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -42,46 +42,46 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace mathfunc {
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Cart <-> Polar
|
|
||||||
|
|
||||||
struct Nothing
|
|
||||||
{
|
{
|
||||||
|
namespace mathfunc
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Cart <-> Polar
|
||||||
|
|
||||||
|
struct Nothing
|
||||||
|
{
|
||||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct Magnitude
|
struct Magnitude
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||||
{
|
{
|
||||||
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct MagnitudeSqr
|
struct MagnitudeSqr
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||||
{
|
{
|
||||||
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct Atan2
|
struct Atan2
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||||
{
|
{
|
||||||
float angle = ::atan2f(y_data, x_data);
|
float angle = ::atan2f(y_data, x_data);
|
||||||
angle += (angle < 0) * 2.0 * CV_PI;
|
angle += (angle < 0) * 2.0 * CV_PI;
|
||||||
dst[y * dst_step + x] = scale * angle;
|
dst[y * dst_step + x] = scale * angle;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename Mag, typename Angle>
|
template <typename Mag, typename Angle>
|
||||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||||
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -93,26 +93,26 @@ __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr,
|
|||||||
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
|
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
|
||||||
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
|
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct NonEmptyMag
|
struct NonEmptyMag
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||||
{
|
{
|
||||||
return mag[y * mag_step + x];
|
return mag[y * mag_step + x];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct EmptyMag
|
struct EmptyMag
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
||||||
{
|
{
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename Mag>
|
template <typename Mag>
|
||||||
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
|
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
|
||||||
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
|
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -127,11 +127,11 @@ __global__ void polarToCart(const float* mag, size_t mag_step, const float* angl
|
|||||||
xptr[y * x_step + x] = mag_data * cos_a;
|
xptr[y * x_step + x] = mag_data * cos_a;
|
||||||
yptr[y * y_step + x] = mag_data * sin_a;
|
yptr[y * y_step + x] = mag_data * sin_a;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag, typename Angle>
|
template <typename Mag, typename Angle>
|
||||||
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -147,10 +147,10 @@ void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2][2][2] =
|
static const caller_t callers[2][2][2] =
|
||||||
{
|
{
|
||||||
@ -177,11 +177,11 @@ void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMe
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag>
|
template <typename Mag>
|
||||||
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -196,10 +196,10 @@ void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2] =
|
static const caller_t callers[2] =
|
||||||
{
|
{
|
||||||
@ -208,8 +208,6 @@ void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, b
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
|
} // namespace mathfunc
|
||||||
} // namespace mathfunc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -45,24 +45,24 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template <typename T> struct shift_and_sizeof;
|
|
||||||
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
|
|
||||||
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
|
|
||||||
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
|
|
||||||
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
|
|
||||||
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
|
|
||||||
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
|
|
||||||
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
////////////////////////////////// CopyTo /////////////////////////////////
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
__global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
|
|
||||||
{
|
{
|
||||||
|
template <typename T> struct shift_and_sizeof;
|
||||||
|
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
|
||||||
|
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
|
||||||
|
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
|
||||||
|
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
|
||||||
|
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
|
||||||
|
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
|
||||||
|
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
////////////////////////////////// CopyTo /////////////////////////////////
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
|
||||||
|
{
|
||||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -72,11 +72,11 @@ __global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mas
|
|||||||
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
|
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
|
||||||
mat_dst[idx] = mat_src[idx];
|
mat_dst[idx] = mat_src[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
dim3 threadsPerBlock(16,16, 1);
|
dim3 threadsPerBlock(16,16, 1);
|
||||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||||
|
|
||||||
@ -86,10 +86,10 @@ void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, c
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream);
|
typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream);
|
||||||
|
|
||||||
static CopyToFunc tab[8] =
|
static CopyToFunc tab[8] =
|
||||||
@ -109,61 +109,61 @@ void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, c
|
|||||||
if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
|
if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
|
||||||
|
|
||||||
func(mat_src, mat_dst, mask, channels, stream);
|
func(mat_src, mat_dst, mask, channels, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
////////////////////////////////// SetTo //////////////////////////////////
|
////////////////////////////////// SetTo //////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
__constant__ uchar scalar_8u[4];
|
__constant__ uchar scalar_8u[4];
|
||||||
__constant__ schar scalar_8s[4];
|
__constant__ schar scalar_8s[4];
|
||||||
__constant__ ushort scalar_16u[4];
|
__constant__ ushort scalar_16u[4];
|
||||||
__constant__ short scalar_16s[4];
|
__constant__ short scalar_16s[4];
|
||||||
__constant__ int scalar_32s[4];
|
__constant__ int scalar_32s[4];
|
||||||
__constant__ float scalar_32f[4];
|
__constant__ float scalar_32f[4];
|
||||||
__constant__ double scalar_64f[4];
|
__constant__ double scalar_64f[4];
|
||||||
|
|
||||||
template <typename T> __device__ __forceinline__ T readScalar(int i);
|
template <typename T> __device__ __forceinline__ T readScalar(int i);
|
||||||
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
|
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
|
||||||
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
|
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
|
||||||
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
|
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
|
||||||
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
|
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
|
||||||
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
|
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
|
||||||
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
|
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
|
||||||
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
|
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
|
||||||
|
|
||||||
void writeScalar(const uchar* vals)
|
void writeScalar(const uchar* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const schar* vals)
|
void writeScalar(const schar* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const ushort* vals)
|
void writeScalar(const ushort* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const short* vals)
|
void writeScalar(const short* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const int* vals)
|
void writeScalar(const int* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const float* vals)
|
void writeScalar(const float* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||||
}
|
}
|
||||||
void writeScalar(const double* vals)
|
void writeScalar(const double* vals)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
|
__global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
|
||||||
{
|
{
|
||||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -172,11 +172,11 @@ __global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int
|
|||||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||||
mat[idx] = readScalar<T>(x % channels);
|
mat[idx] = readScalar<T>(x % channels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
|
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
|
||||||
{
|
{
|
||||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -186,10 +186,10 @@ __global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows,
|
|||||||
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
|
||||||
mat[idx] = readScalar<T>(x % channels);
|
mat[idx] = readScalar<T>(x % channels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream)
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
writeScalar(scalar);
|
writeScalar(scalar);
|
||||||
|
|
||||||
dim3 threadsPerBlock(32, 8, 1);
|
dim3 threadsPerBlock(32, 8, 1);
|
||||||
@ -200,19 +200,19 @@ void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, in
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream)
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
writeScalar(scalar);
|
writeScalar(scalar);
|
||||||
|
|
||||||
dim3 threadsPerBlock(32, 8, 1);
|
dim3 threadsPerBlock(32, 8, 1);
|
||||||
@ -223,22 +223,22 @@ void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T, typename D> struct Convertor : unary_function<T, D>
|
template <typename T, typename D> struct Convertor : unary_function<T, D>
|
||||||
{
|
{
|
||||||
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
|
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ D operator()(const T& src) const
|
__device__ __forceinline__ D operator()(const T& src) const
|
||||||
@ -247,10 +247,10 @@ template <typename T, typename D> struct Convertor : unary_function<T, D>
|
|||||||
}
|
}
|
||||||
|
|
||||||
const double alpha, beta;
|
const double alpha, beta;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
|
template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
@ -291,24 +291,24 @@ namespace detail
|
|||||||
template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
|
template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D> struct TransformFunctorTraits< Convertor<T, D> > : detail::ConvertTraits< Convertor<T, D> >
|
template <typename T, typename D> struct TransformFunctorTraits< Convertor<T, D> > : detail::ConvertTraits< Convertor<T, D> >
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void cvt_(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, cudaStream_t stream)
|
void cvt_(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||||
Convertor<T, D> op(alpha, beta);
|
Convertor<T, D> op(alpha, beta);
|
||||||
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
|
::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta,
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta,
|
||||||
cudaStream_t stream = 0)
|
cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta,
|
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
|
|
||||||
@ -343,6 +343,5 @@ void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int dde
|
|||||||
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||||
|
|
||||||
func(src, dst, alpha, beta, stream);
|
func(src, dst, alpha, beta, stream);
|
||||||
}
|
}
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -46,14 +46,14 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace matrix_reductions {
|
|
||||||
|
|
||||||
// Performs reduction in shared memory
|
|
||||||
template <int size, typename T>
|
|
||||||
__device__ void sumInSmem(volatile T* data, const uint tid)
|
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
|
// Performs reduction in shared memory
|
||||||
|
template <int size, typename T>
|
||||||
|
__device__ void sumInSmem(volatile T* data, const uint tid)
|
||||||
|
{
|
||||||
T sum = data[tid];
|
T sum = data[tid];
|
||||||
|
|
||||||
if (size >= 512) { if (tid < 256) { data[tid] = sum = sum + data[tid + 256]; } __syncthreads(); }
|
if (size >= 512) { if (tid < 256) { data[tid] = sum = sum + data[tid + 256]; } __syncthreads(); }
|
||||||
@ -69,10 +69,10 @@ __device__ void sumInSmem(volatile T* data, const uint tid)
|
|||||||
if (size >= 4) data[tid] = sum = sum + data[tid + 2];
|
if (size >= 4) data[tid] = sum = sum + data[tid + 2];
|
||||||
if (size >= 2) data[tid] = sum = sum + data[tid + 1];
|
if (size >= 2) data[tid] = sum = sum + data[tid + 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Mask8U
|
struct Mask8U
|
||||||
{
|
{
|
||||||
explicit Mask8U(PtrStepb mask): mask(mask) {}
|
explicit Mask8U(PtrStepb mask): mask(mask) {}
|
||||||
|
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
@ -81,32 +81,32 @@ struct Mask8U
|
|||||||
}
|
}
|
||||||
|
|
||||||
PtrStepb mask;
|
PtrStepb mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MaskTrue
|
struct MaskTrue
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Min max
|
// Min max
|
||||||
|
|
||||||
// To avoid shared bank conflicts we convert each value into value of
|
// To avoid shared bank conflicts we convert each value into value of
|
||||||
// appropriate type (32 bits minimum)
|
// appropriate type (32 bits minimum)
|
||||||
template <typename T> struct MinMaxTypeTraits {};
|
template <typename T> struct MinMaxTypeTraits {};
|
||||||
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
|
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
|
||||||
template <> struct MinMaxTypeTraits<char> { typedef int best_type; };
|
template <> struct MinMaxTypeTraits<char> { typedef int best_type; };
|
||||||
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
|
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
|
||||||
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
|
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
|
||||||
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
|
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
|
||||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
||||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
||||||
|
|
||||||
namespace minmax
|
namespace minmax
|
||||||
{
|
{
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -214,7 +214,7 @@ namespace minmax
|
|||||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -246,13 +246,13 @@ namespace minmax
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
||||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -404,13 +404,13 @@ namespace minmax
|
|||||||
template void minMaxMultipassCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<float>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<float>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
} // namespace minmax
|
} // namespace minmax
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// minMaxLoc
|
// minMaxLoc
|
||||||
|
|
||||||
namespace minmaxloc
|
namespace minmaxloc
|
||||||
{
|
{
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -534,7 +534,7 @@ namespace minmaxloc
|
|||||||
|
|
||||||
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -572,7 +572,7 @@ namespace minmaxloc
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
||||||
@ -580,7 +580,7 @@ namespace minmaxloc
|
|||||||
minloc[blockIdx.y * gridDim.x + blockIdx.x] = sminloc[0];
|
minloc[blockIdx.y * gridDim.x + blockIdx.x] = sminloc[0];
|
||||||
maxloc[blockIdx.y * gridDim.x + blockIdx.x] = smaxloc[0];
|
maxloc[blockIdx.y * gridDim.x + blockIdx.x] = smaxloc[0];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -779,13 +779,13 @@ namespace minmaxloc
|
|||||||
template void minMaxLocMultipassCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
} // namespace minmaxloc
|
} // namespace minmaxloc
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// countNonZero
|
// countNonZero
|
||||||
|
|
||||||
namespace countnonzero
|
namespace countnonzero
|
||||||
{
|
{
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -840,7 +840,7 @@ namespace countnonzero
|
|||||||
|
|
||||||
sumInSmem<nthreads, uint>(scount, tid);
|
sumInSmem<nthreads, uint>(scount, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -867,9 +867,9 @@ namespace countnonzero
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0) count[blockIdx.y * gridDim.x + blockIdx.x] = scount[0];
|
if (tid == 0) count[blockIdx.y * gridDim.x + blockIdx.x] = scount[0];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -947,14 +947,14 @@ namespace countnonzero
|
|||||||
template int countNonZeroMultipassCaller<int>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<int>(const DevMem2Db, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<float>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<float>(const DevMem2Db, PtrStepb);
|
||||||
|
|
||||||
} // namespace countnonzero
|
} // namespace countnonzero
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Sum
|
// Sum
|
||||||
|
|
||||||
namespace sum
|
namespace sum
|
||||||
{
|
{
|
||||||
template <typename T> struct SumType {};
|
template <typename T> struct SumType {};
|
||||||
template <> struct SumType<uchar> { typedef uint R; };
|
template <> struct SumType<uchar> { typedef uint R; };
|
||||||
template <> struct SumType<char> { typedef int R; };
|
template <> struct SumType<char> { typedef int R; };
|
||||||
@ -1033,7 +1033,7 @@ namespace sum
|
|||||||
|
|
||||||
sumInSmem<nthreads, R>(smem, tid);
|
sumInSmem<nthreads, R>(smem, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1060,9 +1060,9 @@ namespace sum
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0) result[bid] = smem[0];
|
if (tid == 0) result[bid] = smem[0];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1114,7 +1114,7 @@ namespace sum
|
|||||||
sumInSmem<nthreads, R>(smem, tid);
|
sumInSmem<nthreads, R>(smem, tid);
|
||||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1149,7 +1149,7 @@ namespace sum
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
DstType res;
|
DstType res;
|
||||||
@ -1157,7 +1157,7 @@ namespace sum
|
|||||||
res.y = smem[nthreads];
|
res.y = smem[nthreads];
|
||||||
result[bid] = res;
|
result[bid] = res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1221,7 +1221,7 @@ namespace sum
|
|||||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1260,7 +1260,7 @@ namespace sum
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
DstType res;
|
DstType res;
|
||||||
@ -1269,7 +1269,7 @@ namespace sum
|
|||||||
res.z = smem[2 * nthreads];
|
res.z = smem[2 * nthreads];
|
||||||
result[bid] = res;
|
result[bid] = res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1338,7 +1338,7 @@ namespace sum
|
|||||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||||
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1381,7 +1381,7 @@ namespace sum
|
|||||||
blocks_finished = 0;
|
blocks_finished = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
DstType res;
|
DstType res;
|
||||||
@ -1391,7 +1391,7 @@ namespace sum
|
|||||||
res.w = smem[3 * nthreads];
|
res.w = smem[3 * nthreads];
|
||||||
result[bid] = res;
|
result[bid] = res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1784,13 +1784,13 @@ namespace sum
|
|||||||
template void sqrSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
} // namespace sum
|
} // namespace sum
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// reduce
|
// reduce
|
||||||
|
|
||||||
template <typename S> struct SumReductor
|
template <typename S> struct SumReductor
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ S startValue() const
|
__device__ __forceinline__ S startValue() const
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@ -1805,10 +1805,10 @@ template <typename S> struct SumReductor
|
|||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename S> struct AvgReductor
|
template <typename S> struct AvgReductor
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ S startValue() const
|
__device__ __forceinline__ S startValue() const
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@ -1823,10 +1823,10 @@ template <typename S> struct AvgReductor
|
|||||||
{
|
{
|
||||||
return r / sz;
|
return r / sz;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename S> struct MinReductor
|
template <typename S> struct MinReductor
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ S startValue() const
|
__device__ __forceinline__ S startValue() const
|
||||||
{
|
{
|
||||||
return numeric_limits<S>::max();
|
return numeric_limits<S>::max();
|
||||||
@ -1845,10 +1845,10 @@ template <typename S> struct MinReductor
|
|||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename S> struct MaxReductor
|
template <typename S> struct MaxReductor
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ S startValue() const
|
__device__ __forceinline__ S startValue() const
|
||||||
{
|
{
|
||||||
return numeric_limits<S>::min();
|
return numeric_limits<S>::min();
|
||||||
@ -1867,10 +1867,10 @@ template <typename S> struct MaxReductor
|
|||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Op, typename T, typename S, typename D> __global__ void reduceRows(const DevMem2D_<T> src, D* dst, const Op op)
|
template <class Op, typename T, typename S, typename D> __global__ void reduceRows(const DevMem2D_<T> src, D* dst, const Op op)
|
||||||
{
|
{
|
||||||
__shared__ S smem[16 * 16];
|
__shared__ S smem[16 * 16];
|
||||||
|
|
||||||
const int x = blockIdx.x * 16 + threadIdx.x;
|
const int x = blockIdx.x * 16 + threadIdx.x;
|
||||||
@ -1898,10 +1898,10 @@ template <class Op, typename T, typename S, typename D> __global__ void reduceRo
|
|||||||
|
|
||||||
if (threadIdx.y == 0 && x < src.cols)
|
if (threadIdx.y == 0 && x < src.cols)
|
||||||
dst[x] = saturate_cast<D>(op.result(smem[threadIdx.x * 16], src.rows));
|
dst[x] = saturate_cast<D>(op.result(smem[threadIdx.x * 16], src.rows));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class Op, typename T, typename S, typename D> void reduceRows_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
template <template <typename> class Op, typename T, typename S, typename D> void reduceRows_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(src.cols, block.x));
|
const dim3 grid(divUp(src.cols, block.x));
|
||||||
|
|
||||||
@ -1912,10 +1912,10 @@ template <template <typename> class Op, typename T, typename S, typename D> void
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
@ -1927,29 +1927,29 @@ template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2D
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
callers[reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void reduceRows_gpu<uchar, int, uchar>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, uchar>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<uchar, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<uchar, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<ushort, int, ushort>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, ushort>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<ushort, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<ushort, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<short, int, short>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, short>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<short, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<short, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<int, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<int, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<int, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<int, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<float, float, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<float, float, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <int cn, class Op, typename T, typename S, typename D> __global__ void reduceCols(const DevMem2D_<T> src, D* dst, const Op op)
|
template <int cn, class Op, typename T, typename S, typename D> __global__ void reduceCols(const DevMem2D_<T> src, D* dst, const Op op)
|
||||||
{
|
{
|
||||||
__shared__ S smem[256 * cn];
|
__shared__ S smem[256 * cn];
|
||||||
|
|
||||||
const int y = blockIdx.x;
|
const int y = blockIdx.x;
|
||||||
@ -1962,7 +1962,7 @@ template <int cn, class Op, typename T, typename S, typename D> __global__ void
|
|||||||
for (int c = 0; c < cn; ++c)
|
for (int c = 0; c < cn; ++c)
|
||||||
myVal[c] = op.startValue();
|
myVal[c] = op.startValue();
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
// For cc >= 2.0 prefer L1 cache
|
// For cc >= 2.0 prefer L1 cache
|
||||||
for (int x = threadIdx.x; x < src.cols; x += 256)
|
for (int x = threadIdx.x; x < src.cols; x += 256)
|
||||||
@ -1972,7 +1972,7 @@ template <int cn, class Op, typename T, typename S, typename D> __global__ void
|
|||||||
myVal[c] = op(myVal[c], src_row[x * cn + c]);
|
myVal[c] = op(myVal[c], src_row[x * cn + c]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // __CUDA_ARCH__ >= 200
|
#else // __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
// For older arch use shared memory for cache
|
// For older arch use shared memory for cache
|
||||||
for (int x = 0; x < src.cols; x += 256)
|
for (int x = 0; x < src.cols; x += 256)
|
||||||
@ -1993,7 +1993,7 @@ template <int cn, class Op, typename T, typename S, typename D> __global__ void
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__ >= 200
|
#endif // __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int c = 0; c < cn; ++c)
|
for (int c = 0; c < cn; ++c)
|
||||||
@ -2035,10 +2035,10 @@ template <int cn, class Op, typename T, typename S, typename D> __global__ void
|
|||||||
|
|
||||||
if (threadIdx.x < cn)
|
if (threadIdx.x < cn)
|
||||||
dst[y * cn + threadIdx.x] = saturate_cast<D>(op.result(smem[threadIdx.x * 256], src.cols));
|
dst[y * cn + threadIdx.x] = saturate_cast<D>(op.result(smem[threadIdx.x * 256], src.cols));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int cn, template <typename> class Op, typename T, typename S, typename D> void reduceCols_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
template <int cn, template <typename> class Op, typename T, typename S, typename D> void reduceCols_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(256);
|
const dim3 block(256);
|
||||||
const dim3 grid(src.rows);
|
const dim3 grid(src.rows);
|
||||||
|
|
||||||
@ -2049,10 +2049,10 @@ template <int cn, template <typename> class Op, typename T, typename S, typename
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[4][4] =
|
static const caller_t callers[4][4] =
|
||||||
@ -2064,25 +2064,23 @@ template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2D
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[cn - 1][reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
callers[cn - 1][reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void reduceCols_gpu<uchar, int, uchar>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, uchar>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<uchar, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<uchar, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<ushort, int, ushort>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, ushort>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<ushort, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<ushort, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<short, int, short>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, short>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<short, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<short, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
} // namespace mattrix_reductions
|
||||||
} // namespace mattrix_reductions
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -46,12 +46,12 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc {
|
|
||||||
|
|
||||||
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||||
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -122,10 +122,10 @@ template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src,
|
|||||||
if (dst_x < dst_cols)
|
if (dst_x < dst_cols)
|
||||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
|
template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(256);
|
const dim3 block(256);
|
||||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||||
|
|
||||||
@ -136,10 +136,10 @@ template <typename T, template <typename> class B> void pyrDown_caller(const Dev
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
|
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type type;
|
typedef typename TypeVec<T, cn>::vec_type type;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
|
||||||
@ -150,38 +150,36 @@ template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMe
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
|
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -46,12 +46,12 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc {
|
|
||||||
|
|
||||||
template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, DevMem2D_<T> dst, const B b)
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, DevMem2D_<T> dst, const B b)
|
||||||
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -117,10 +117,10 @@ template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, De
|
|||||||
|
|
||||||
if (x < dst.cols && y < dst.rows)
|
if (x < dst.cols && y < dst.rows)
|
||||||
dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum);
|
dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, template <typename> class B> void pyrUp_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
|
template <typename T, template <typename> class B> void pyrUp_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
|
|
||||||
@ -131,10 +131,10 @@ template <typename T, template <typename> class B> void pyrUp_caller(const DevMe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
|
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type type;
|
typedef typename TypeVec<T, cn>::vec_type type;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
|
||||||
@ -145,38 +145,36 @@ template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
|
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void pyrUp_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -47,12 +47,12 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc {
|
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
|
||||||
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -63,10 +63,10 @@ template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, con
|
|||||||
|
|
||||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
||||||
const float* borderValue, cudaStream_t stream, int)
|
const float* borderValue, cudaStream_t stream, int)
|
||||||
{
|
{
|
||||||
@ -82,10 +82,10 @@ template <template <typename> class Filter, template <typename> class B, typenam
|
|||||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, int)
|
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, int)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||||
@ -102,9 +102,9 @@ template <template <typename> class Filter, template <typename> class B, typenam
|
|||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||||
struct tex_remap_ ## type ## _reader \
|
struct tex_remap_ ## type ## _reader \
|
||||||
{ \
|
{ \
|
||||||
@ -147,34 +147,34 @@ template <template <typename> class Filter, template <typename> class B, typenam
|
|||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||||
|
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||||
|
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
||||||
const float* borderValue, cudaStream_t stream, int cc)
|
const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
@ -183,11 +183,11 @@ template <template <typename> class Filter, template <typename> class B, typenam
|
|||||||
else
|
else
|
||||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
|
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst,
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst,
|
||||||
const float* borderValue, cudaStream_t stream, int cc);
|
const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
@ -217,38 +217,36 @@ template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void remap_gpu<uchar >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<uchar2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<uchar2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<uchar3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<uchar4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void remap_gpu<schar>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<schar>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<ushort >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<ushort2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<ushort2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<ushort3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<ushort4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<short >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<short2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<short2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<short3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<short4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void remap_gpu<int >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<float >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -47,12 +47,12 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc {
|
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
|
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
||||||
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -63,9 +63,9 @@ template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, fl
|
|||||||
|
|
||||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename Ptr2D, typename T> __global__ void resizeNN(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void resizeNN(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -76,10 +76,10 @@ template <typename Ptr2D, typename T> __global__ void resizeNN(const Ptr2D src,
|
|||||||
|
|
||||||
dst.ptr(y)[x] = src(__float2int_rd(ycoo), __float2int_rd(xcoo));
|
dst.ptr(y)[x] = src(__float2int_rd(ycoo), __float2int_rd(xcoo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -92,9 +92,9 @@ template <template <typename> class Filter, typename T> struct ResizeDispatcherS
|
|||||||
resize<<<grid, block, 0, stream>>>(filter_src, fx, fy, dst);
|
resize<<<grid, block, 0, stream>>>(filter_src, fx, fy, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct ResizeDispatcherStream<PointFilter, T>
|
template <typename T> struct ResizeDispatcherStream<PointFilter, T>
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -106,10 +106,10 @@ template <typename T> struct ResizeDispatcherStream<PointFilter, T>
|
|||||||
resizeNN<<<grid, block, 0, stream>>>(brdSrc, fx, fy, dst);
|
resizeNN<<<grid, block, 0, stream>>>(brdSrc, fx, fy, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -124,9 +124,9 @@ template <template <typename> class Filter, typename T> struct ResizeDispatcherN
|
|||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
|
template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -140,9 +140,9 @@ template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
|
|||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||||
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||||
struct tex_resize_ ## type ## _reader \
|
struct tex_resize_ ## type ## _reader \
|
||||||
{ \
|
{ \
|
||||||
@ -181,34 +181,34 @@ template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
|
|||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar2)
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||||
|
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char2)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort2)
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short2)
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||||
|
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int2)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float2)
|
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float2)
|
||||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -216,10 +216,10 @@ template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
|||||||
else
|
else
|
||||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream)
|
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[3] =
|
static const caller_t callers[3] =
|
||||||
@ -228,38 +228,36 @@ template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy,
|
|||||||
};
|
};
|
||||||
|
|
||||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), fx, fy, static_cast< DevMem2D_<T> >(dst), stream);
|
callers[interpolation](static_cast< DevMem2D_<T> >(src), fx, fy, static_cast< DevMem2D_<T> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void resize_gpu<uchar >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<uchar2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<uchar2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<uchar3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<uchar4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
//template void resize_gpu<schar>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<schar>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<ushort >(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort >(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<ushort2>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<ushort2>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<ushort3>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort3>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<ushort4>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort4>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<short >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<short2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<short2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<short3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<short4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
//template void resize_gpu<int >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<float >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
|
} // namespace imgproc
|
||||||
} // namespace imgproc
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -47,25 +47,25 @@
|
|||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
#define MAX_KERNEL_SIZE 16
|
|
||||||
#define BLOCK_DIM_X 16
|
|
||||||
#define BLOCK_DIM_Y 4
|
|
||||||
#define RESULT_STEPS 8
|
|
||||||
#define HALO_STEPS 1
|
|
||||||
|
|
||||||
namespace row_filter {
|
|
||||||
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
|
||||||
{
|
{
|
||||||
|
#define MAX_KERNEL_SIZE 16
|
||||||
|
#define BLOCK_DIM_X 16
|
||||||
|
#define BLOCK_DIM_Y 4
|
||||||
|
#define RESULT_STEPS 8
|
||||||
|
#define HALO_STEPS 1
|
||||||
|
|
||||||
|
namespace row_filter
|
||||||
|
{
|
||||||
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
|
void loadKernel(const float kernel[], int ksize)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <typename T, size_t size> struct SmemType
|
template <typename T, size_t size> struct SmemType
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
|
||||||
@ -75,16 +75,16 @@ namespace detail
|
|||||||
{
|
{
|
||||||
typedef T smem_t;
|
typedef T smem_t;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> struct SmemType
|
template <typename T> struct SmemType
|
||||||
{
|
{
|
||||||
typedef typename detail::SmemType<T, sizeof(T)>::smem_t smem_t;
|
typedef typename detail::SmemType<T, sizeof(T)>::smem_t smem_t;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
template <int KERNEL_SIZE, typename T, typename D, typename B>
|
||||||
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
|
||||||
{
|
{
|
||||||
typedef typename SmemType<T>::smem_t smem_t;
|
typedef typename SmemType<T>::smem_t smem_t;
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
@ -132,11 +132,11 @@ __global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anch
|
|||||||
dst_row[dstX] = saturate_cast<D>(sum);
|
dst_row[dstX] = saturate_cast<D>(sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename SmemType<T>::smem_t smem_t;
|
typedef typename SmemType<T>::smem_t smem_t;
|
||||||
|
|
||||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
@ -149,11 +149,11 @@ void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, in
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[5][17] =
|
static const caller_t callers[5][17] =
|
||||||
{
|
{
|
||||||
@ -257,16 +257,14 @@ void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float
|
|||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearRowFilter_gpu<uchar , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearRowFilter_gpu<short , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
//template void linearRowFilter_gpu<short , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
//template void linearRowFilter_gpu<short2, float2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
//template void linearRowFilter_gpu<short2, float2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
} // namespace row_filter
|
||||||
} // namespace row_filter
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -62,44 +62,43 @@
|
|||||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace gpu {
|
namespace cv { namespace gpu
|
||||||
|
|
||||||
void error(const char *error_string, const char *file, const int line, const char *func = "");
|
|
||||||
void nppError(int err, const char *file, const int line, const char *func = "");
|
|
||||||
void ncvError(int err, const char *file, const int line, const char *func = "");
|
|
||||||
void cufftError(int err, const char *file, const int line, const char *func = "");
|
|
||||||
void cublasError(int err, const char *file, const int line, const char *func = "");
|
|
||||||
|
|
||||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
|
||||||
{
|
{
|
||||||
|
void error(const char *error_string, const char *file, const int line, const char *func = "");
|
||||||
|
void nppError(int err, const char *file, const int line, const char *func = "");
|
||||||
|
void ncvError(int err, const char *file, const int line, const char *func = "");
|
||||||
|
void cufftError(int err, const char *file, const int line, const char *func = "");
|
||||||
|
void cublasError(int err, const char *file, const int line, const char *func = "");
|
||||||
|
|
||||||
|
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||||
|
{
|
||||||
if (cudaSuccess != err)
|
if (cudaSuccess != err)
|
||||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||||
{
|
{
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
cv::gpu::nppError(err, file, line, func);
|
cv::gpu::nppError(err, file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
|
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||||
{
|
{
|
||||||
if (NCV_SUCCESS != err)
|
if (NCV_SUCCESS != err)
|
||||||
cv::gpu::ncvError(err, file, line, func);
|
cv::gpu::ncvError(err, file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
|
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
|
||||||
{
|
{
|
||||||
if (CUFFT_SUCCESS != err)
|
if (CUFFT_SUCCESS != err)
|
||||||
cv::gpu::cufftError(err, file, line, func);
|
cv::gpu::cufftError(err, file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
|
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
|
||||||
{
|
{
|
||||||
if (CUBLAS_STATUS_SUCCESS != err)
|
if (CUBLAS_STATUS_SUCCESS != err)
|
||||||
cv::gpu::cublasError(err, file, line, func);
|
cv::gpu::cublasError(err, file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
@ -42,66 +42,66 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace split_merge {
|
|
||||||
|
|
||||||
template <typename T, size_t elem_size = sizeof(T)>
|
|
||||||
struct TypeTraits
|
|
||||||
{
|
{
|
||||||
|
namespace split_merge
|
||||||
|
{
|
||||||
|
template <typename T, size_t elem_size = sizeof(T)>
|
||||||
|
struct TypeTraits
|
||||||
|
{
|
||||||
typedef T type;
|
typedef T type;
|
||||||
typedef T type2;
|
typedef T type2;
|
||||||
typedef T type3;
|
typedef T type3;
|
||||||
typedef T type4;
|
typedef T type4;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct TypeTraits<T, 1>
|
struct TypeTraits<T, 1>
|
||||||
{
|
{
|
||||||
typedef char type;
|
typedef char type;
|
||||||
typedef char2 type2;
|
typedef char2 type2;
|
||||||
typedef char3 type3;
|
typedef char3 type3;
|
||||||
typedef char4 type4;
|
typedef char4 type4;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct TypeTraits<T, 2>
|
struct TypeTraits<T, 2>
|
||||||
{
|
{
|
||||||
typedef short type;
|
typedef short type;
|
||||||
typedef short2 type2;
|
typedef short2 type2;
|
||||||
typedef short3 type3;
|
typedef short3 type3;
|
||||||
typedef short4 type4;
|
typedef short4 type4;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct TypeTraits<T, 4>
|
struct TypeTraits<T, 4>
|
||||||
{
|
{
|
||||||
typedef int type;
|
typedef int type;
|
||||||
typedef int2 type2;
|
typedef int2 type2;
|
||||||
typedef int3 type3;
|
typedef int3 type3;
|
||||||
typedef int4 type4;
|
typedef int4 type4;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct TypeTraits<T, 8>
|
struct TypeTraits<T, 8>
|
||||||
{
|
{
|
||||||
typedef double type;
|
typedef double type;
|
||||||
typedef double2 type2;
|
typedef double2 type2;
|
||||||
//typedef double3 type3;
|
//typedef double3 type3;
|
||||||
//typedef double4 type3;
|
//typedef double4 type3;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*MergeFunction)(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream);
|
typedef void (*MergeFunction)(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream);
|
||||||
typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);
|
typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);
|
||||||
|
|
||||||
//------------------------------------------------------------
|
//------------------------------------------------------------
|
||||||
// Merge
|
// Merge
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||||
const uchar* src1, size_t src1_step,
|
const uchar* src1, size_t src1_step,
|
||||||
int rows, int cols, uchar* dst, size_t dst_step)
|
int rows, int cols, uchar* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type2 dst_type;
|
typedef typename TypeTraits<T>::type2 dst_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -118,15 +118,15 @@ __global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
|||||||
dst_elem.y = src1_y[x];
|
dst_elem.y = src1_y[x];
|
||||||
dst_y[x] = dst_elem;
|
dst_y[x] = dst_elem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
||||||
const uchar* src1, size_t src1_step,
|
const uchar* src1, size_t src1_step,
|
||||||
const uchar* src2, size_t src2_step,
|
const uchar* src2, size_t src2_step,
|
||||||
int rows, int cols, uchar* dst, size_t dst_step)
|
int rows, int cols, uchar* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type3 dst_type;
|
typedef typename TypeTraits<T>::type3 dst_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -145,15 +145,15 @@ __global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
|||||||
dst_elem.z = src2_y[x];
|
dst_elem.z = src2_y[x];
|
||||||
dst_y[x] = dst_elem;
|
dst_y[x] = dst_elem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
||||||
const uchar* src1, size_t src1_step,
|
const uchar* src1, size_t src1_step,
|
||||||
const uchar* src2, size_t src2_step,
|
const uchar* src2, size_t src2_step,
|
||||||
int rows, int cols, uchar* dst, size_t dst_step)
|
int rows, int cols, uchar* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -168,16 +168,16 @@ __global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
|||||||
dst_y[3 * x + 1] = src1_y[x];
|
dst_y[3 * x + 1] = src1_y[x];
|
||||||
dst_y[3 * x + 2] = src2_y[x];
|
dst_y[3 * x + 2] = src2_y[x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
||||||
const uchar* src1, size_t src1_step,
|
const uchar* src1, size_t src1_step,
|
||||||
const uchar* src2, size_t src2_step,
|
const uchar* src2, size_t src2_step,
|
||||||
const uchar* src3, size_t src3_step,
|
const uchar* src3, size_t src3_step,
|
||||||
int rows, int cols, uchar* dst, size_t dst_step)
|
int rows, int cols, uchar* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type4 dst_type;
|
typedef typename TypeTraits<T>::type4 dst_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -198,16 +198,16 @@ __global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
|||||||
dst_elem.w = src3_y[x];
|
dst_elem.w = src3_y[x];
|
||||||
dst_y[x] = dst_elem;
|
dst_y[x] = dst_elem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
||||||
const uchar* src1, size_t src1_step,
|
const uchar* src1, size_t src1_step,
|
||||||
const uchar* src2, size_t src2_step,
|
const uchar* src2, size_t src2_step,
|
||||||
const uchar* src3, size_t src3_step,
|
const uchar* src3, size_t src3_step,
|
||||||
int rows, int cols, uchar* dst, size_t dst_step)
|
int rows, int cols, uchar* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -222,12 +222,12 @@ __global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
|||||||
dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
|
dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
|
||||||
dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
|
dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC2_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC2_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -238,12 +238,12 @@ static void mergeC2_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC3_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC3_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -255,12 +255,12 @@ static void mergeC3_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC4_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC4_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -273,13 +273,13 @@ static void mergeC4_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
||||||
int total_channels, size_t elem_size,
|
int total_channels, size_t elem_size,
|
||||||
const cudaStream_t& stream)
|
const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
static MergeFunction merge_func_tbl[] =
|
static MergeFunction merge_func_tbl[] =
|
||||||
{
|
{
|
||||||
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
||||||
@ -294,20 +294,20 @@ void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
|||||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||||
|
|
||||||
merge_func(src, dst, stream);
|
merge_func(src, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------------------------------
|
//------------------------------------------------------------
|
||||||
// Split
|
// Split
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||||
int rows, int cols,
|
int rows, int cols,
|
||||||
uchar* dst0, size_t dst0_step,
|
uchar* dst0, size_t dst0_step,
|
||||||
uchar* dst1, size_t dst1_step)
|
uchar* dst1, size_t dst1_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type2 src_type;
|
typedef typename TypeTraits<T>::type2 src_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -323,16 +323,16 @@ __global__ void splitC2_(const uchar* src, size_t src_step,
|
|||||||
dst0_y[x] = src_elem.x;
|
dst0_y[x] = src_elem.x;
|
||||||
dst1_y[x] = src_elem.y;
|
dst1_y[x] = src_elem.y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void splitC3_(const uchar* src, size_t src_step,
|
__global__ void splitC3_(const uchar* src, size_t src_step,
|
||||||
int rows, int cols,
|
int rows, int cols,
|
||||||
uchar* dst0, size_t dst0_step,
|
uchar* dst0, size_t dst0_step,
|
||||||
uchar* dst1, size_t dst1_step,
|
uchar* dst1, size_t dst1_step,
|
||||||
uchar* dst2, size_t dst2_step)
|
uchar* dst2, size_t dst2_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type3 src_type;
|
typedef typename TypeTraits<T>::type3 src_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -350,16 +350,16 @@ __global__ void splitC3_(const uchar* src, size_t src_step,
|
|||||||
dst1_y[x] = src_elem.y;
|
dst1_y[x] = src_elem.y;
|
||||||
dst2_y[x] = src_elem.z;
|
dst2_y[x] = src_elem.z;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__global__ void splitC3_<double>(
|
__global__ void splitC3_<double>(
|
||||||
const uchar* src, size_t src_step, int rows, int cols,
|
const uchar* src, size_t src_step, int rows, int cols,
|
||||||
uchar* dst0, size_t dst0_step,
|
uchar* dst0, size_t dst0_step,
|
||||||
uchar* dst1, size_t dst1_step,
|
uchar* dst1, size_t dst1_step,
|
||||||
uchar* dst2, size_t dst2_step)
|
uchar* dst2, size_t dst2_step)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -374,16 +374,16 @@ __global__ void splitC3_<double>(
|
|||||||
dst1_y[x] = src_y[3 * x + 1];
|
dst1_y[x] = src_y[3 * x + 1];
|
||||||
dst2_y[x] = src_y[3 * x + 2];
|
dst2_y[x] = src_y[3 * x + 2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void splitC4_(const uchar* src, size_t src_step, int rows, int cols,
|
__global__ void splitC4_(const uchar* src, size_t src_step, int rows, int cols,
|
||||||
uchar* dst0, size_t dst0_step,
|
uchar* dst0, size_t dst0_step,
|
||||||
uchar* dst1, size_t dst1_step,
|
uchar* dst1, size_t dst1_step,
|
||||||
uchar* dst2, size_t dst2_step,
|
uchar* dst2, size_t dst2_step,
|
||||||
uchar* dst3, size_t dst3_step)
|
uchar* dst3, size_t dst3_step)
|
||||||
{
|
{
|
||||||
typedef typename TypeTraits<T>::type4 src_type;
|
typedef typename TypeTraits<T>::type4 src_type;
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -403,17 +403,17 @@ __global__ void splitC4_(const uchar* src, size_t src_step, int rows, int cols,
|
|||||||
dst2_y[x] = src_elem.z;
|
dst2_y[x] = src_elem.z;
|
||||||
dst3_y[x] = src_elem.w;
|
dst3_y[x] = src_elem.w;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__global__ void splitC4_<double>(
|
__global__ void splitC4_<double>(
|
||||||
const uchar* src, size_t src_step, int rows, int cols,
|
const uchar* src, size_t src_step, int rows, int cols,
|
||||||
uchar* dst0, size_t dst0_step,
|
uchar* dst0, size_t dst0_step,
|
||||||
uchar* dst1, size_t dst1_step,
|
uchar* dst1, size_t dst1_step,
|
||||||
uchar* dst2, size_t dst2_step,
|
uchar* dst2, size_t dst2_step,
|
||||||
uchar* dst3, size_t dst3_step)
|
uchar* dst3, size_t dst3_step)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -432,11 +432,11 @@ __global__ void splitC4_<double>(
|
|||||||
dst2_y[x] = src_elem2.x;
|
dst2_y[x] = src_elem2.x;
|
||||||
dst3_y[x] = src_elem2.y;
|
dst3_y[x] = src_elem2.y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC2_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC2_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -447,12 +447,12 @@ static void splitC2_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC3_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC3_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -464,12 +464,12 @@ static void splitC3_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC4_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC4_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
@ -482,11 +482,11 @@ static void splitC4_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& s
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
static SplitFunction split_func_tbl[] =
|
static SplitFunction split_func_tbl[] =
|
||||||
{
|
{
|
||||||
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
||||||
@ -501,8 +501,6 @@ void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t
|
|||||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||||
|
|
||||||
split_func(src, dst, stream);
|
split_func(src, dst, stream);
|
||||||
}
|
}
|
||||||
|
} // namespace split_merge
|
||||||
} // namespace split_merge
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -42,35 +42,35 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereobm {
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
|
||||||
|
|
||||||
#define BLOCK_W 128 // the thread block width (464)
|
|
||||||
#define N_DISPARITIES 8
|
|
||||||
|
|
||||||
#define STEREO_MIND 0 // The minimum d range to check
|
|
||||||
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
|
|
||||||
|
|
||||||
__constant__ unsigned int* cminSSDImage;
|
|
||||||
__constant__ size_t cminSSD_step;
|
|
||||||
__constant__ int cwidth;
|
|
||||||
__constant__ int cheight;
|
|
||||||
|
|
||||||
__device__ __forceinline__ int SQ(int a)
|
|
||||||
{
|
{
|
||||||
|
namespace stereobm
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
||||||
|
|
||||||
|
#define BLOCK_W 128 // the thread block width (464)
|
||||||
|
#define N_DISPARITIES 8
|
||||||
|
|
||||||
|
#define STEREO_MIND 0 // The minimum d range to check
|
||||||
|
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
|
||||||
|
|
||||||
|
__constant__ unsigned int* cminSSDImage;
|
||||||
|
__constant__ size_t cminSSD_step;
|
||||||
|
__constant__ int cwidth;
|
||||||
|
__constant__ int cheight;
|
||||||
|
|
||||||
|
__device__ __forceinline__ int SQ(int a)
|
||||||
|
{
|
||||||
return a * a;
|
return a * a;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
||||||
{
|
{
|
||||||
unsigned int cache = 0;
|
unsigned int cache = 0;
|
||||||
unsigned int cache2 = 0;
|
unsigned int cache2 = 0;
|
||||||
|
|
||||||
@ -88,11 +88,11 @@ __device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile u
|
|||||||
cache2 += col_ssd[i];
|
cache2 += col_ssd[i];
|
||||||
|
|
||||||
return col_ssd[0] + cache + cache2;
|
return col_ssd[0] + cache + cache2;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
__device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
||||||
{
|
{
|
||||||
unsigned int ssd[N_DISPARITIES];
|
unsigned int ssd[N_DISPARITIES];
|
||||||
|
|
||||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||||
@ -122,11 +122,11 @@ __device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned
|
|||||||
}
|
}
|
||||||
|
|
||||||
return make_uint2(mssd, bestIdx);
|
return make_uint2(mssd, bestIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
||||||
{
|
{
|
||||||
unsigned char leftPixel1;
|
unsigned char leftPixel1;
|
||||||
unsigned char leftPixel2;
|
unsigned char leftPixel2;
|
||||||
unsigned char rightPixel1[8];
|
unsigned char rightPixel1[8];
|
||||||
@ -189,11 +189,11 @@ __device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned cha
|
|||||||
diff1 = leftPixel1 - rightPixel1[7];
|
diff1 = leftPixel1 - rightPixel1[7];
|
||||||
diff2 = leftPixel2 - rightPixel2[7];
|
diff2 = leftPixel2 - rightPixel2[7];
|
||||||
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
||||||
{
|
{
|
||||||
unsigned char leftPixel1;
|
unsigned char leftPixel1;
|
||||||
int idx;
|
int idx;
|
||||||
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
@ -224,11 +224,11 @@ __device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* im
|
|||||||
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] = diffa[5];
|
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] = diffa[5];
|
||||||
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] = diffa[6];
|
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] = diffa[6];
|
||||||
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] = diffa[7];
|
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] = diffa[7];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp)
|
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp)
|
||||||
{
|
{
|
||||||
extern __shared__ unsigned int col_ssd_cache[];
|
extern __shared__ unsigned int col_ssd_cache[];
|
||||||
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
|
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
|
||||||
volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0; //#define N_DIRTY_PIXELS (2 * RADIUS)
|
volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0; //#define N_DIRTY_PIXELS (2 * RADIUS)
|
||||||
@ -305,11 +305,11 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
|||||||
}
|
}
|
||||||
} // for row loop
|
} // for row loop
|
||||||
} // for d loop
|
} // for d loop
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<int RADIUS> void kernel_caller(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream)
|
template<int RADIUS> void kernel_caller(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1,1,1);
|
dim3 grid(1,1,1);
|
||||||
dim3 threads(BLOCK_W, 1, 1);
|
dim3 threads(BLOCK_W, 1, 1);
|
||||||
|
|
||||||
@ -324,12 +324,12 @@ template<int RADIUS> void kernel_caller(const DevMem2Db& left, const DevMem2Db&
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*kernel_caller_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream);
|
typedef void (*kernel_caller_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream);
|
||||||
|
|
||||||
const static kernel_caller_t callers[] =
|
const static kernel_caller_t callers[] =
|
||||||
{
|
{
|
||||||
0,
|
0,
|
||||||
kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>,
|
kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>,
|
||||||
kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>,
|
kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>,
|
||||||
@ -338,11 +338,11 @@ const static kernel_caller_t callers[] =
|
|||||||
kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25>
|
kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25>
|
||||||
|
|
||||||
//0,0,0, 0,0,0, 0,0,kernel_caller<9>
|
//0,0,0, 0,0,0, 0,0,kernel_caller<9>
|
||||||
};
|
};
|
||||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||||
|
|
||||||
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz >> 1;
|
int winsz2 = winsz >> 1;
|
||||||
|
|
||||||
if (winsz2 == 0 || winsz2 >= calles_num)
|
if (winsz2 == 0 || winsz2 >= calles_num)
|
||||||
@ -362,16 +362,16 @@ void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db
|
|||||||
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||||
|
|
||||||
callers[winsz2](left, right, disp, maxdisp, stream);
|
callers[winsz2](left, right, disp, maxdisp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
|
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
||||||
|
|
||||||
__global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
__global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
||||||
{
|
{
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
@ -385,10 +385,10 @@ __global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
|||||||
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
||||||
output.ptr(y)[x] = conv & 0xFF;
|
output.ptr(y)[x] = conv & 0xFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap, cudaStream_t & stream)
|
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||||
|
|
||||||
@ -405,25 +405,25 @@ void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefi
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
|
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
||||||
|
|
||||||
__device__ __forceinline__ float sobel(int x, int y)
|
__device__ __forceinline__ float sobel(int x, int y)
|
||||||
{
|
{
|
||||||
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
|
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
|
||||||
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
|
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
|
||||||
tex2D(texForTF, x - 1, y + 1) * (-1) + tex2D(texForTF, x + 1, y + 1) * (1);
|
tex2D(texForTF, x - 1, y + 1) * (-1) + tex2D(texForTF, x + 1, y + 1) * (1);
|
||||||
return fabs(conv);
|
return fabs(conv);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
__device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
||||||
{
|
{
|
||||||
float cache = 0;
|
float cache = 0;
|
||||||
float cache2 = 0;
|
float cache2 = 0;
|
||||||
int winsz2 = winsz/2;
|
int winsz2 = winsz/2;
|
||||||
@ -442,12 +442,12 @@ __device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
|||||||
cache2 += cols[i];
|
cache2 += cols[i];
|
||||||
|
|
||||||
return cols[0] + cache + cache2;
|
return cols[0] + cache + cache2;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||||
|
|
||||||
__global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
__global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz/2;
|
int winsz2 = winsz/2;
|
||||||
int n_dirty_pixels = (winsz2) * 2;
|
int n_dirty_pixels = (winsz2) * 2;
|
||||||
|
|
||||||
@ -503,10 +503,10 @@ __global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream)
|
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
avgTexturenessThreshold *= winsz * winsz;
|
avgTexturenessThreshold *= winsz * winsz;
|
||||||
|
|
||||||
texForTF.filterMode = cudaFilterModeLinear;
|
texForTF.filterMode = cudaFilterModeLinear;
|
||||||
@ -530,8 +530,6 @@ void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturen
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||||
}
|
}
|
||||||
|
} // namespace stereobm
|
||||||
} // namespace stereobm
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -44,36 +44,36 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereobp {
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
/////////////////////// load constants ////////////////////////
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
__constant__ int cndisp;
|
|
||||||
__constant__ float cmax_data_term;
|
|
||||||
__constant__ float cdata_weight;
|
|
||||||
__constant__ float cmax_disc_term;
|
|
||||||
__constant__ float cdisc_single_jump;
|
|
||||||
|
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
|
||||||
{
|
{
|
||||||
|
namespace stereobp
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
/////////////////////// load constants ////////////////////////
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__constant__ int cndisp;
|
||||||
|
__constant__ float cmax_data_term;
|
||||||
|
__constant__ float cdata_weight;
|
||||||
|
__constant__ float cmax_disc_term;
|
||||||
|
__constant__ float cdisc_single_jump;
|
||||||
|
|
||||||
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
////////////////////////// comp data //////////////////////////
|
////////////////////////// comp data //////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <int cn> struct PixDiff;
|
template <int cn> struct PixDiff;
|
||||||
template <> struct PixDiff<1>
|
template <> struct PixDiff<1>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||||
{
|
{
|
||||||
l = *ls;
|
l = *ls;
|
||||||
@ -83,9 +83,9 @@ template <> struct PixDiff<1>
|
|||||||
return ::abs((int)l - *rs);
|
return ::abs((int)l - *rs);
|
||||||
}
|
}
|
||||||
uchar l;
|
uchar l;
|
||||||
};
|
};
|
||||||
template <> struct PixDiff<3>
|
template <> struct PixDiff<3>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||||
{
|
{
|
||||||
l = *((uchar3*)ls);
|
l = *((uchar3*)ls);
|
||||||
@ -103,9 +103,9 @@ template <> struct PixDiff<3>
|
|||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
uchar3 l;
|
uchar3 l;
|
||||||
};
|
};
|
||||||
template <> struct PixDiff<4>
|
template <> struct PixDiff<4>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||||
{
|
{
|
||||||
l = *((uchar4*)ls);
|
l = *((uchar4*)ls);
|
||||||
@ -125,11 +125,11 @@ template <> struct PixDiff<4>
|
|||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
uchar4 l;
|
uchar4 l;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int cn, typename D>
|
template <int cn, typename D>
|
||||||
__global__ void comp_data(const DevMem2Db left, const PtrStepb right, PtrElemStep_<D> data)
|
__global__ void comp_data(const DevMem2Db left, const PtrStepb right, PtrElemStep_<D> data)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -156,13 +156,13 @@ __global__ void comp_data(const DevMem2Db left, const PtrStepb right, PtrElemSte
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -174,9 +174,9 @@ template <> void comp_data_gpu<uchar, short>(const DevMem2Db& left, const DevMem
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -188,10 +188,10 @@ template <> void comp_data_gpu<uchar, float>(const DevMem2Db& left, const DevMem
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar3, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -203,9 +203,9 @@ template <> void comp_data_gpu<uchar3, short>(const DevMem2Db& left, const DevMe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar3, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -217,10 +217,10 @@ template <> void comp_data_gpu<uchar3, float>(const DevMem2Db& left, const DevMe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar4, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -232,9 +232,9 @@ template <> void comp_data_gpu<uchar4, short>(const DevMem2Db& left, const DevMe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar4, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -246,15 +246,15 @@ template <> void comp_data_gpu<uchar4, float>(const DevMem2Db& left, const DevMe
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////////// data step down ///////////////////////
|
//////////////////////// data step down ///////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
|
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -270,11 +270,11 @@ __global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const P
|
|||||||
dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
|
dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream)
|
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -286,18 +286,18 @@ void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const DevMem2D
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////// level up messages ////////////////////////
|
/////////////////// level up messages ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrElemStep_<T> src, PtrElemStep_<T> dst)
|
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrElemStep_<T> src, PtrElemStep_<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -312,11 +312,11 @@ __global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const
|
|||||||
for (int d = 0; d < cndisp; ++d)
|
for (int d = 0; d < cndisp; ++d)
|
||||||
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
|
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream)
|
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -339,18 +339,18 @@ void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
||||||
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////// calc all iterations /////////////////////
|
//////////////////// calc all iterations /////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void calc_min_linear_penalty(T* dst, size_t step)
|
__device__ void calc_min_linear_penalty(T* dst, size_t step)
|
||||||
{
|
{
|
||||||
float prev = dst[0];
|
float prev = dst[0];
|
||||||
float cur;
|
float cur;
|
||||||
for (int disp = 1; disp < cndisp; ++disp)
|
for (int disp = 1; disp < cndisp; ++disp)
|
||||||
@ -377,11 +377,11 @@ __device__ void calc_min_linear_penalty(T* dst, size_t step)
|
|||||||
}
|
}
|
||||||
prev = cur;
|
prev = cur;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
||||||
{
|
{
|
||||||
float minimum = device::numeric_limits<float>::max();
|
float minimum = device::numeric_limits<float>::max();
|
||||||
|
|
||||||
for(int i = 0; i < cndisp; ++i)
|
for(int i = 0; i < cndisp; ++i)
|
||||||
@ -416,11 +416,11 @@ __device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* da
|
|||||||
|
|
||||||
for(int i = 0; i < cndisp; ++i)
|
for(int i = 0; i < cndisp; ++i)
|
||||||
dst[msg_disp_step * i] -= sum;
|
dst[msg_disp_step * i] -= sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void one_iteration(int t, PtrElemStep_<T> u, T* d, T* l, T* r, const PtrElemStep_<T> data, int cols, int rows)
|
__global__ void one_iteration(int t, PtrElemStep_<T> u, T* d, T* l, T* r, const PtrElemStep_<T> data, int cols, int rows)
|
||||||
{
|
{
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||||
|
|
||||||
@ -440,12 +440,12 @@ __global__ void one_iteration(int t, PtrElemStep_<T> u, T* d, T* l, T* r, const
|
|||||||
message(us + u.step, ds - u.step, rs - 1, dt, rs, msg_disp_step, data_disp_step);
|
message(us + u.step, ds - u.step, rs - 1, dt, rs, msg_disp_step, data_disp_step);
|
||||||
message(us + u.step, ds - u.step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
|
message(us + u.step, ds - u.step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void calc_all_iterations_gpu(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d,
|
void calc_all_iterations_gpu(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d,
|
||||||
const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream)
|
const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -460,19 +460,19 @@ void calc_all_iterations_gpu(int cols, int rows, int iters, const DevMem2Db& u,
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
||||||
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////////// output ////////////////////////////
|
/////////////////////////// output ////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void output(const PtrElemStep_<T> u, const T* d, const T* l, const T* r, const T* data,
|
__global__ void output(const PtrElemStep_<T> u, const T* d, const T* l, const T* r, const T* data,
|
||||||
DevMem2D_<short> disp)
|
DevMem2D_<short> disp)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -505,12 +505,12 @@ __global__ void output(const PtrElemStep_<T> u, const T* d, const T* l, const T*
|
|||||||
|
|
||||||
disp.ptr(y)[x] = saturate_cast<short>(best);
|
disp.ptr(y)[x] = saturate_cast<short>(best);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
||||||
const DevMem2D_<short>& disp, cudaStream_t stream)
|
const DevMem2D_<short>& disp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -522,11 +522,9 @@ void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, cons
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
|
} // namespace stereobp
|
||||||
} // namespace stereobp
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -44,37 +44,37 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereocsbp {
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
/////////////////////// load constants ////////////////////////
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
__constant__ int cndisp;
|
|
||||||
|
|
||||||
__constant__ float cmax_data_term;
|
|
||||||
__constant__ float cdata_weight;
|
|
||||||
__constant__ float cmax_disc_term;
|
|
||||||
__constant__ float cdisc_single_jump;
|
|
||||||
|
|
||||||
__constant__ int cth;
|
|
||||||
|
|
||||||
__constant__ size_t cimg_step;
|
|
||||||
__constant__ size_t cmsg_step1;
|
|
||||||
__constant__ size_t cmsg_step2;
|
|
||||||
__constant__ size_t cdisp_step1;
|
|
||||||
__constant__ size_t cdisp_step2;
|
|
||||||
|
|
||||||
__constant__ uchar* cleft;
|
|
||||||
__constant__ uchar* cright;
|
|
||||||
__constant__ uchar* ctemp;
|
|
||||||
|
|
||||||
|
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
|
||||||
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp)
|
|
||||||
{
|
{
|
||||||
|
namespace stereocsbp
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
/////////////////////// load constants ////////////////////////
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__constant__ int cndisp;
|
||||||
|
|
||||||
|
__constant__ float cmax_data_term;
|
||||||
|
__constant__ float cdata_weight;
|
||||||
|
__constant__ float cmax_disc_term;
|
||||||
|
__constant__ float cdisc_single_jump;
|
||||||
|
|
||||||
|
__constant__ int cth;
|
||||||
|
|
||||||
|
__constant__ size_t cimg_step;
|
||||||
|
__constant__ size_t cmsg_step1;
|
||||||
|
__constant__ size_t cmsg_step2;
|
||||||
|
__constant__ size_t cdisp_step1;
|
||||||
|
__constant__ size_t cdisp_step2;
|
||||||
|
|
||||||
|
__constant__ uchar* cleft;
|
||||||
|
__constant__ uchar* cright;
|
||||||
|
__constant__ uchar* ctemp;
|
||||||
|
|
||||||
|
|
||||||
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
|
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||||
@ -89,22 +89,22 @@ void load_constants(int ndisp, float max_data_term, float data_weight, float max
|
|||||||
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// init data cost ////////////////////////
|
/////////////////////// init data cost ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <int channels> struct DataCostPerPixel;
|
template <int channels> struct DataCostPerPixel;
|
||||||
template <> struct DataCostPerPixel<1>
|
template <> struct DataCostPerPixel<1>
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||||
{
|
{
|
||||||
return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <> struct DataCostPerPixel<3>
|
template <> struct DataCostPerPixel<3>
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||||
{
|
{
|
||||||
float tb = 0.114f * ::abs((int)left[0] - right[0]);
|
float tb = 0.114f * ::abs((int)left[0] - right[0]);
|
||||||
@ -113,9 +113,9 @@ template <> struct DataCostPerPixel<3>
|
|||||||
|
|
||||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <> struct DataCostPerPixel<4>
|
template <> struct DataCostPerPixel<4>
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||||
{
|
{
|
||||||
uchar4 l = *((const uchar4*)left);
|
uchar4 l = *((const uchar4*)left);
|
||||||
@ -127,11 +127,11 @@ template <> struct DataCostPerPixel<4>
|
|||||||
|
|
||||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane)
|
__global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -160,12 +160,12 @@ __global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_d
|
|||||||
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
|
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
|
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -217,11 +217,11 @@ __global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_di
|
|||||||
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
|
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int channels>
|
template <typename T, int channels>
|
||||||
__global__ void init_data_cost(int h, int w, int level)
|
__global__ void init_data_cost(int h, int w, int level)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -257,11 +257,11 @@ __global__ void init_data_cost(int h, int w, int level)
|
|||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int winsz, int channels>
|
template <typename T, int winsz, int channels>
|
||||||
__global__ void init_data_cost_reduce(int level, int rows, int cols, int h)
|
__global__ void init_data_cost_reduce(int level, int rows, int cols, int h)
|
||||||
{
|
{
|
||||||
int x_out = blockIdx.x;
|
int x_out = blockIdx.x;
|
||||||
int y_out = blockIdx.y % h;
|
int y_out = blockIdx.y % h;
|
||||||
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
||||||
@ -319,12 +319,12 @@ __global__ void init_data_cost_reduce(int level, int rows, int cols, int h)
|
|||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
|
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -338,11 +338,11 @@ void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level,
|
|||||||
case 4: init_data_cost<T, 4><<<grid, threads, 0, stream>>>(h, w, level); break;
|
case 4: init_data_cost<T, 4><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int winsz>
|
template <typename T, int winsz>
|
||||||
void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream)
|
void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int threadsNum = 256;
|
const int threadsNum = 256;
|
||||||
const size_t smem_size = threadsNum * sizeof(float);
|
const size_t smem_size = threadsNum * sizeof(float);
|
||||||
|
|
||||||
@ -357,12 +357,12 @@ void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level,
|
|||||||
case 4: init_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
case 4: init_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
||||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
|
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream);
|
typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
@ -398,21 +398,21 @@ void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selec
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
||||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
||||||
|
|
||||||
template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step,
|
template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step,
|
||||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
////////////////////// compute data cost //////////////////////
|
////////////////////// compute data cost //////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T, int channels>
|
template <typename T, int channels>
|
||||||
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
|
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -451,11 +451,11 @@ __global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int
|
|||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int winsz, int channels>
|
template <typename T, int winsz, int channels>
|
||||||
__global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane)
|
__global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane)
|
||||||
{
|
{
|
||||||
int x_out = blockIdx.x;
|
int x_out = blockIdx.x;
|
||||||
int y_out = blockIdx.y % h;
|
int y_out = blockIdx.y % h;
|
||||||
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
||||||
@ -516,12 +516,12 @@ __global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cos
|
|||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
|
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
|
||||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
|
|
||||||
@ -535,12 +535,12 @@ void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*r
|
|||||||
case 4: compute_data_cost<T, 4><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
case 4: compute_data_cost<T, 4><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int winsz>
|
template <typename T, int winsz>
|
||||||
void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
||||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int threadsNum = 256;
|
const int threadsNum = 256;
|
||||||
const size_t smem_size = threadsNum * sizeof(float);
|
const size_t smem_size = threadsNum * sizeof(float);
|
||||||
|
|
||||||
@ -555,12 +555,12 @@ void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost,
|
|||||||
case 4: compute_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
case 4: compute_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2,
|
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
|
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
||||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream);
|
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
@ -583,27 +583,27 @@ void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2,
|
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step1, size_t msg_step2,
|
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////////// init message /////////////////////////
|
//////////////////////// init message /////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||||
T* data_cost_selected, T* disparity_selected_new, T* data_cost_new,
|
T* data_cost_selected, T* disparity_selected_new, T* data_cost_new,
|
||||||
const T* data_cost_cur, const T* disparity_selected_cur,
|
const T* data_cost_cur, const T* disparity_selected_cur,
|
||||||
int nr_plane, int nr_plane2)
|
int nr_plane, int nr_plane2)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < nr_plane; i++)
|
for(int i = 0; i < nr_plane; i++)
|
||||||
{
|
{
|
||||||
T minimum = numeric_limits<T>::max();
|
T minimum = numeric_limits<T>::max();
|
||||||
@ -628,15 +628,15 @@ __device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_
|
|||||||
|
|
||||||
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
|
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
|
__global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
|
||||||
const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_,
|
const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_,
|
||||||
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
||||||
T* data_cost_selected_, const T* data_cost_,
|
T* data_cost_selected_, const T* data_cost_,
|
||||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2)
|
int h, int w, int nr_plane, int h2, int w2, int nr_plane2)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -677,16 +677,16 @@ __global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
|
|||||||
data_cost_selected, disparity_selected_new, data_cost_new,
|
data_cost_selected, disparity_selected_new, data_cost_new,
|
||||||
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
|
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
|
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||||
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
||||||
T* data_cost_selected, const T* data_cost, size_t msg_step1, size_t msg_step2,
|
T* data_cost_selected, const T* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream)
|
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
size_t disp_step1 = msg_step1 * h;
|
size_t disp_step1 = msg_step1 * h;
|
||||||
size_t disp_step2 = msg_step2 * h2;
|
size_t disp_step2 = msg_step2 * h2;
|
||||||
@ -710,29 +710,29 @@ void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template void init_message(short* u_new, short* d_new, short* l_new, short* r_new,
|
template void init_message(short* u_new, short* d_new, short* l_new, short* r_new,
|
||||||
const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur,
|
const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur,
|
||||||
short* selected_disp_pyr_new, const short* selected_disp_pyr_cur,
|
short* selected_disp_pyr_new, const short* selected_disp_pyr_cur,
|
||||||
short* data_cost_selected, const short* data_cost, size_t msg_step1, size_t msg_step2,
|
short* data_cost_selected, const short* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||||
|
|
||||||
template void init_message(float* u_new, float* d_new, float* l_new, float* r_new,
|
template void init_message(float* u_new, float* d_new, float* l_new, float* r_new,
|
||||||
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
|
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
|
||||||
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
|
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
|
||||||
float* data_cost_selected, const float* data_cost, size_t msg_step1, size_t msg_step2,
|
float* data_cost_selected, const float* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////// calc all iterations /////////////////////
|
//////////////////// calc all iterations /////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
||||||
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
||||||
{
|
{
|
||||||
T minimum = numeric_limits<T>::max();
|
T minimum = numeric_limits<T>::max();
|
||||||
|
|
||||||
for(int d = 0; d < nr_plane; d++)
|
for(int d = 0; d < nr_plane; d++)
|
||||||
@ -762,11 +762,11 @@ __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, cons
|
|||||||
|
|
||||||
for(int d = 0; d < nr_plane; d++)
|
for(int d = 0; d < nr_plane; d++)
|
||||||
msg_dst[d * cdisp_step1] = saturate_cast<T>(temp[d * cdisp_step1] - sum);
|
msg_dst[d * cdisp_step1] = saturate_cast<T>(temp[d * cdisp_step1] - sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i)
|
__global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i)
|
||||||
{
|
{
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1);
|
int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1);
|
||||||
|
|
||||||
@ -788,13 +788,13 @@ __global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_s
|
|||||||
message_per_pixel(data, l, u + cmsg_step1, d - cmsg_step1, l + 1, disp, disp - 1, nr_plane, temp);
|
message_per_pixel(data, l, u + cmsg_step1, d - cmsg_step1, l + 1, disp, disp - 1, nr_plane, temp);
|
||||||
message_per_pixel(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp);
|
message_per_pixel(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
||||||
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
size_t disp_step = msg_step * h;
|
size_t disp_step = msg_step * h;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
||||||
@ -813,25 +813,25 @@ void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
|
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
|
||||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||||
|
|
||||||
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
|
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
|
||||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////////// output ////////////////////////////
|
/////////////////////////// output ////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
||||||
const T* data_cost_selected, const T* disp_selected_pyr,
|
const T* data_cost_selected, const T* disp_selected_pyr,
|
||||||
short* disp, size_t res_step, int cols, int rows, int nr_plane)
|
short* disp, size_t res_step, int cols, int rows, int nr_plane)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
@ -860,12 +860,12 @@ __global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
|||||||
}
|
}
|
||||||
disp[res_step * y + x] = best;
|
disp[res_step * y + x] = best;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream)
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
size_t disp_step = disp.rows * msg_step;
|
size_t disp_step = disp.rows * msg_step;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
||||||
@ -882,14 +882,12 @@ void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_
|
|||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
|
|
||||||
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
|
} // namespace stereocsbp
|
||||||
} // namespace stereocsbp
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -52,75 +52,75 @@
|
|||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace surf {
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Global parameters
|
|
||||||
|
|
||||||
// The maximum number of features (before subpixel interpolation) that memory is reserved for.
|
|
||||||
__constant__ int c_max_candidates;
|
|
||||||
// The maximum number of features that memory is reserved for.
|
|
||||||
__constant__ int c_max_features;
|
|
||||||
// The image size.
|
|
||||||
__constant__ int c_img_rows;
|
|
||||||
__constant__ int c_img_cols;
|
|
||||||
// The number of layers.
|
|
||||||
__constant__ int c_nOctaveLayers;
|
|
||||||
// The hessian threshold.
|
|
||||||
__constant__ float c_hessianThreshold;
|
|
||||||
|
|
||||||
// The current octave.
|
|
||||||
__constant__ int c_octave;
|
|
||||||
// The current layer size.
|
|
||||||
__constant__ int c_layer_rows;
|
|
||||||
__constant__ int c_layer_cols;
|
|
||||||
|
|
||||||
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
|
|
||||||
{
|
{
|
||||||
|
namespace surf
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Global parameters
|
||||||
|
|
||||||
|
// The maximum number of features (before subpixel interpolation) that memory is reserved for.
|
||||||
|
__constant__ int c_max_candidates;
|
||||||
|
// The maximum number of features that memory is reserved for.
|
||||||
|
__constant__ int c_max_features;
|
||||||
|
// The image size.
|
||||||
|
__constant__ int c_img_rows;
|
||||||
|
__constant__ int c_img_cols;
|
||||||
|
// The number of layers.
|
||||||
|
__constant__ int c_nOctaveLayers;
|
||||||
|
// The hessian threshold.
|
||||||
|
__constant__ float c_hessianThreshold;
|
||||||
|
|
||||||
|
// The current octave.
|
||||||
|
__constant__ int c_octave;
|
||||||
|
// The current layer size.
|
||||||
|
__constant__ int c_layer_rows;
|
||||||
|
__constant__ int c_layer_cols;
|
||||||
|
|
||||||
|
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
|
||||||
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void loadOctaveConstants(int octave, int layer_rows, int layer_cols)
|
void loadOctaveConstants(int octave, int layer_rows, int layer_cols)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Integral image texture
|
// Integral image texture
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> imgTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned char, 2, cudaReadModeElementType> imgTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
|
|
||||||
void bindImgTex(DevMem2Db img)
|
void bindImgTex(DevMem2Db img)
|
||||||
{
|
{
|
||||||
bindTexture(&imgTex, img);
|
bindTexture(&imgTex, img);
|
||||||
}
|
}
|
||||||
void bindSumTex(DevMem2D_<uint> sum)
|
void bindSumTex(DevMem2D_<uint> sum)
|
||||||
{
|
{
|
||||||
bindTexture(&sumTex, sum);
|
bindTexture(&sumTex, sum);
|
||||||
}
|
}
|
||||||
void bindMaskSumTex(DevMem2D_<uint> maskSum)
|
void bindMaskSumTex(DevMem2D_<uint> maskSum)
|
||||||
{
|
{
|
||||||
bindTexture(&maskSumTex, maskSum);
|
bindTexture(&maskSumTex, maskSum);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
|
template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
typedef double real_t;
|
typedef double real_t;
|
||||||
#else
|
#else
|
||||||
typedef float real_t;
|
typedef float real_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float ratio = (float)newSize / oldSize;
|
float ratio = (float)newSize / oldSize;
|
||||||
|
|
||||||
@ -144,17 +144,17 @@ template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], in
|
|||||||
}
|
}
|
||||||
|
|
||||||
return (float)d;
|
return (float)d;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Hessian
|
// Hessian
|
||||||
|
|
||||||
__constant__ float c_DX [3][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
|
__constant__ float c_DX [3][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
|
||||||
__constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
|
__constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
|
||||||
__constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
|
__constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ int calcSize(int octave, int layer)
|
__host__ __device__ __forceinline__ int calcSize(int octave, int layer)
|
||||||
{
|
{
|
||||||
/* Wavelet size at first layer of first octave. */
|
/* Wavelet size at first layer of first octave. */
|
||||||
const int HAAR_SIZE0 = 9;
|
const int HAAR_SIZE0 = 9;
|
||||||
|
|
||||||
@ -165,10 +165,10 @@ __host__ __device__ __forceinline__ int calcSize(int octave, int layer)
|
|||||||
const int HAAR_SIZE_INC = 6;
|
const int HAAR_SIZE_INC = 6;
|
||||||
|
|
||||||
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void icvCalcLayerDetAndTrace(PtrStepf det, PtrStepf trace)
|
__global__ void icvCalcLayerDetAndTrace(PtrStepf det, PtrStepf trace)
|
||||||
{
|
{
|
||||||
// Determine the indices
|
// Determine the indices
|
||||||
const int gridDim_y = gridDim.y / (c_nOctaveLayers + 2);
|
const int gridDim_y = gridDim.y / (c_nOctaveLayers + 2);
|
||||||
const int blockIdx_y = blockIdx.y % gridDim_y;
|
const int blockIdx_y = blockIdx.y % gridDim_y;
|
||||||
@ -195,10 +195,10 @@ __global__ void icvCalcLayerDetAndTrace(PtrStepf det, PtrStepf trace)
|
|||||||
det.ptr(layer * c_layer_rows + i + margin)[j + margin] = dx * dy - 0.81f * dxy * dxy;
|
det.ptr(layer * c_layer_rows + i + margin)[j + margin] = dx * dy - 0.81f * dxy * dxy;
|
||||||
trace.ptr(layer * c_layer_rows + i + margin)[j + margin] = dx + dy;
|
trace.ptr(layer * c_layer_rows + i + margin)[j + margin] = dx + dy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers)
|
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers)
|
||||||
{
|
{
|
||||||
const int min_size = calcSize(octave, 0);
|
const int min_size = calcSize(octave, 0);
|
||||||
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
|
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
|
||||||
const int max_samples_j = 1 + ((img_cols - min_size) >> octave);
|
const int max_samples_j = 1 + ((img_cols - min_size) >> octave);
|
||||||
@ -213,15 +213,15 @@ void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// NONMAX
|
// NONMAX
|
||||||
|
|
||||||
__constant__ float c_DM[5] = {0, 0, 9, 9, 1};
|
__constant__ float c_DM[5] = {0, 0, 9, 9, 1};
|
||||||
|
|
||||||
struct WithMask
|
struct WithMask
|
||||||
{
|
{
|
||||||
static __device__ bool check(int sum_i, int sum_j, int size)
|
static __device__ bool check(int sum_i, int sum_j, int size)
|
||||||
{
|
{
|
||||||
float ratio = (float)size / 9.0f;
|
float ratio = (float)size / 9.0f;
|
||||||
@ -243,11 +243,11 @@ struct WithMask
|
|||||||
|
|
||||||
return (d >= 0.5f);
|
return (d >= 0.5f);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Mask>
|
template <typename Mask>
|
||||||
__global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
|
__global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||||
|
|
||||||
extern __shared__ float N9[];
|
extern __shared__ float N9[];
|
||||||
@ -336,11 +336,11 @@ __global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
|
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
|
||||||
int img_rows, int img_cols, int octave, bool use_mask, int nOctaveLayers)
|
int img_rows, int img_cols, int octave, bool use_mask, int nOctaveLayers)
|
||||||
{
|
{
|
||||||
const int layer_rows = img_rows >> octave;
|
const int layer_rows = img_rows >> octave;
|
||||||
const int layer_cols = img_cols >> octave;
|
const int layer_cols = img_cols >> octave;
|
||||||
|
|
||||||
@ -362,15 +362,15 @@ void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4*
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// INTERPOLATION
|
// INTERPOLATION
|
||||||
|
|
||||||
__global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
|
__global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
|
||||||
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
|
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
|
||||||
unsigned int* featureCounter)
|
unsigned int* featureCounter)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||||
|
|
||||||
const int4 maxPos = maxPosBuffer[blockIdx.x];
|
const int4 maxPos = maxPosBuffer[blockIdx.x];
|
||||||
@ -468,12 +468,12 @@ __global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuf
|
|||||||
} // If this is thread 0.
|
} // If this is thread 0.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
||||||
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
|
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
|
||||||
unsigned int* featureCounter)
|
unsigned int* featureCounter)
|
||||||
{
|
{
|
||||||
dim3 threads;
|
dim3 threads;
|
||||||
threads.x = 3;
|
threads.x = 3;
|
||||||
threads.y = 3;
|
threads.y = 3;
|
||||||
@ -486,24 +486,24 @@ void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, u
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Orientation
|
// Orientation
|
||||||
|
|
||||||
#define ORI_SEARCH_INC 5
|
#define ORI_SEARCH_INC 5
|
||||||
#define ORI_WIN 60
|
#define ORI_WIN 60
|
||||||
#define ORI_SAMPLES 113
|
#define ORI_SAMPLES 113
|
||||||
|
|
||||||
__constant__ float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
|
__constant__ float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
|
||||||
__constant__ float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
|
__constant__ float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
|
||||||
__constant__ float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f, 0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f, 0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f, 0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.001455130288377404f};
|
__constant__ float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f, 0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f, 0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f, 0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.001455130288377404f};
|
||||||
|
|
||||||
__constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
|
__constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
|
||||||
__constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
|
__constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
|
||||||
|
|
||||||
__global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir)
|
__global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||||
|
|
||||||
__shared__ float s_X[128];
|
__shared__ float s_X[128];
|
||||||
@ -644,14 +644,14 @@ __global__ void icvCalcOrientation(const float* featureX, const float* featureY,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef ORI_SEARCH_INC
|
#undef ORI_SEARCH_INC
|
||||||
#undef ORI_WIN
|
#undef ORI_WIN
|
||||||
#undef ORI_SAMPLES
|
#undef ORI_SAMPLES
|
||||||
|
|
||||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
||||||
{
|
{
|
||||||
dim3 threads;
|
dim3 threads;
|
||||||
threads.x = 32;
|
threads.x = 32;
|
||||||
threads.y = 4;
|
threads.y = 4;
|
||||||
@ -663,15 +663,15 @@ void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Descriptors
|
// Descriptors
|
||||||
|
|
||||||
#define PATCH_SZ 20
|
#define PATCH_SZ 20
|
||||||
|
|
||||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||||
{
|
{
|
||||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||||
@ -692,10 +692,10 @@ __constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
|||||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
|
||||||
};
|
};
|
||||||
|
|
||||||
struct WinReader
|
struct WinReader
|
||||||
{
|
{
|
||||||
typedef uchar elem_type;
|
typedef uchar elem_type;
|
||||||
|
|
||||||
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
||||||
@ -716,11 +716,11 @@ struct WinReader
|
|||||||
float win_offset;
|
float win_offset;
|
||||||
float cos_dir;
|
float cos_dir;
|
||||||
float sin_dir;
|
float sin_dir;
|
||||||
};
|
};
|
||||||
|
|
||||||
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
||||||
{
|
{
|
||||||
__shared__ float s_PATCH[6][6];
|
__shared__ float s_PATCH[6][6];
|
||||||
|
|
||||||
const float centerX = featureX[blockIdx.x];
|
const float centerX = featureX[blockIdx.x];
|
||||||
@ -770,10 +770,10 @@ __device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
|||||||
s_dx_bin[tid] = vx;
|
s_dx_bin[tid] = vx;
|
||||||
s_dy_bin[tid] = vy;
|
s_dy_bin[tid] = vy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void reduce_sum25(volatile float* sdata1, volatile float* sdata2, volatile float* sdata3, volatile float* sdata4, int tid)
|
__device__ void reduce_sum25(volatile float* sdata1, volatile float* sdata2, volatile float* sdata3, volatile float* sdata4, int tid)
|
||||||
{
|
{
|
||||||
// first step is to reduce from 25 to 16
|
// first step is to reduce from 25 to 16
|
||||||
if (tid < 9) // use 9 threads
|
if (tid < 9) // use 9 threads
|
||||||
{
|
{
|
||||||
@ -806,10 +806,10 @@ __device__ void reduce_sum25(volatile float* sdata1, volatile float* sdata2, vol
|
|||||||
sdata4[tid] += sdata4[tid + 2];
|
sdata4[tid] += sdata4[tid + 2];
|
||||||
sdata4[tid] += sdata4[tid + 1];
|
sdata4[tid] += sdata4[tid + 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compute_descriptors64(PtrStepf descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
__global__ void compute_descriptors64(PtrStepf descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
||||||
{
|
{
|
||||||
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
|
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
|
||||||
__shared__ float sdx[25];
|
__shared__ float sdx[25];
|
||||||
__shared__ float sdy[25];
|
__shared__ float sdy[25];
|
||||||
@ -841,10 +841,10 @@ __global__ void compute_descriptors64(PtrStepf descriptors, const float* feature
|
|||||||
descriptors_block[3] = sdyabs[0];
|
descriptors_block[3] = sdyabs[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compute_descriptors128(PtrStepf descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
__global__ void compute_descriptors128(PtrStepf descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
||||||
{
|
{
|
||||||
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
|
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
|
||||||
__shared__ float sdx[25];
|
__shared__ float sdx[25];
|
||||||
__shared__ float sdy[25];
|
__shared__ float sdy[25];
|
||||||
@ -921,10 +921,10 @@ __global__ void compute_descriptors128(PtrStepf descriptors, const float* featur
|
|||||||
descriptors_block[7] = sdabs2[0];
|
descriptors_block[7] = sdabs2[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_DIM_X> __global__ void normalize_descriptors(PtrStepf descriptors)
|
template <int BLOCK_DIM_X> __global__ void normalize_descriptors(PtrStepf descriptors)
|
||||||
{
|
{
|
||||||
// no need for thread ID
|
// no need for thread ID
|
||||||
float* descriptor_base = descriptors.ptr(blockIdx.x);
|
float* descriptor_base = descriptors.ptr(blockIdx.x);
|
||||||
|
|
||||||
@ -964,11 +964,11 @@ template <int BLOCK_DIM_X> __global__ void normalize_descriptors(PtrStepf descri
|
|||||||
|
|
||||||
// normalize and store in output
|
// normalize and store in output
|
||||||
descriptor_base[threadIdx.x] = lookup / len;
|
descriptor_base[threadIdx.x] = lookup / len;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
||||||
{
|
{
|
||||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||||
|
|
||||||
if (descriptors.cols == 64)
|
if (descriptors.cols == 64)
|
||||||
@ -995,8 +995,6 @@ void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
|||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} // namespace surf
|
||||||
} // namespace surf
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
@ -71,20 +71,19 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
|||||||
|
|
||||||
#include "opencv2/gpu/stream_accessor.hpp"
|
#include "opencv2/gpu/stream_accessor.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
||||||
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
}}}
|
||||||
template <typename T>
|
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
|
||||||
|
|
||||||
struct Stream::Impl
|
struct Stream::Impl
|
||||||
{
|
{
|
||||||
|
@ -123,19 +123,18 @@ namespace
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// add
|
// add
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
template <typename T, typename D>
|
||||||
|
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T, typename D>
|
|
||||||
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -174,7 +173,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
|||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -236,19 +235,18 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// subtract
|
// subtract
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
template <typename T, typename D>
|
||||||
|
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T, typename D>
|
|
||||||
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -287,7 +285,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
|||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -349,22 +347,21 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// multiply
|
// multiply
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||||
|
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
template <typename T, typename D>
|
||||||
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T, typename D>
|
|
||||||
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -422,7 +419,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -472,25 +469,24 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// divide
|
// divide
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||||
|
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
template <typename T, typename D>
|
||||||
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T, typename D>
|
|
||||||
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -548,7 +544,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -597,7 +593,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
|||||||
|
|
||||||
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
|
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -630,19 +626,18 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// absdiff
|
// absdiff
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T>
|
|
||||||
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -714,7 +709,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
|
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -758,18 +753,17 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Comparison of two matrixes
|
// Comparison of two matrixes
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_lt(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_lt(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_le(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_le(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -835,14 +829,13 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Unary bitwise logical operations
|
// Unary bitwise logical operations
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
|
template <typename T>
|
||||||
|
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
template <typename T>
|
}}}
|
||||||
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -850,13 +843,13 @@ namespace
|
|||||||
{
|
{
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
OPENCV_DEVICE_NAMESPACE_ bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
|
::cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
|
|
||||||
@ -893,24 +886,23 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Binary bitwise logical operations
|
// Binary bitwise logical operations
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
template <typename T>
|
||||||
|
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
template <typename T>
|
||||||
|
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
template <typename T>
|
||||||
|
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
template <typename T>
|
}}}
|
||||||
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -919,12 +911,12 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
OPENCV_DEVICE_NAMESPACE_ bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
::cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
|
|
||||||
@ -952,13 +944,13 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
OPENCV_DEVICE_NAMESPACE_ bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
::cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
|
|
||||||
@ -986,13 +978,13 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
OPENCV_DEVICE_NAMESPACE_ bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
::cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
|
|
||||||
@ -1046,21 +1038,20 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Minimum and maximum operations
|
// Minimum and maximum operations
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
template <typename T>
|
|
||||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -1069,14 +1060,14 @@ namespace
|
|||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
::cv::gpu::device::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -1084,14 +1075,14 @@ namespace
|
|||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
::cv::gpu::device::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
::cv::gpu::device::max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1155,18 +1146,17 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// threshold
|
// threshold
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T> void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream)
|
template <typename T> void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
OPENCV_DEVICE_NAMESPACE_ threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
|
::cv::gpu::device::threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1223,16 +1213,15 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// pow
|
// pow
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
CV_Assert(src.depth() != CV_64F);
|
CV_Assert(src.depth() != CV_64F);
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
@ -1252,16 +1241,15 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// addWeighted
|
// addWeighted
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
template <typename T1, typename T2, typename D>
|
template <typename T1, typename T2, typename D>
|
||||||
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size());
|
CV_Assert(src1.size() == src2.size());
|
||||||
CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels()));
|
CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels()));
|
||||||
|
@ -735,21 +735,20 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Separable Linear Filter
|
// Separable Linear Filter
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace row_filter
|
|
||||||
{
|
{
|
||||||
|
namespace row_filter
|
||||||
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace column_filter
|
namespace column_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -803,7 +802,7 @@ namespace
|
|||||||
|
|
||||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
|
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ row_filter;
|
using namespace ::cv::gpu::device::row_filter;
|
||||||
|
|
||||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
||||||
|
|
||||||
@ -918,7 +917,7 @@ namespace
|
|||||||
|
|
||||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
|
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ column_filter;
|
using namespace ::cv::gpu::device::column_filter;
|
||||||
|
|
||||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
||||||
|
|
||||||
|
@ -60,10 +60,10 @@ std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nog
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace hog
|
|
||||||
{
|
{
|
||||||
|
namespace hog
|
||||||
|
{
|
||||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int nblocks_win_x, int nblocks_win_y);
|
int nblocks_win_x, int nblocks_win_y);
|
||||||
|
|
||||||
@ -93,11 +93,10 @@ namespace hog
|
|||||||
|
|
||||||
void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
||||||
void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE;
|
|
||||||
|
|
||||||
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
|
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
|
||||||
int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
|
int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
|
||||||
|
@ -107,20 +107,19 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// remap
|
// remap
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst,
|
void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst,
|
||||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
|
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
@ -160,18 +159,17 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// meanShiftFiltering_GPU
|
// meanShiftFiltering_GPU
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
|
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
if( src.empty() )
|
if( src.empty() )
|
||||||
CV_Error( CV_StsBadArg, "The input image is empty" );
|
CV_Error( CV_StsBadArg, "The input image is empty" );
|
||||||
@ -197,18 +195,17 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// meanShiftProc_GPU
|
// meanShiftProc_GPU
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
|
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
if( src.empty() )
|
if( src.empty() )
|
||||||
CV_Error( CV_StsBadArg, "The input image is empty" );
|
CV_Error( CV_StsBadArg, "The input image is empty" );
|
||||||
@ -235,22 +232,21 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// drawColorDisp
|
// drawColorDisp
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
dst.create(src.size(), CV_8UC4);
|
dst.create(src.size(), CV_8UC4);
|
||||||
|
|
||||||
@ -272,22 +268,21 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// reprojectImageTo3D
|
// reprojectImageTo3D
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||||
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
|
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
xyzw.create(disp.rows, disp.cols, CV_32FC4);
|
xyzw.create(disp.rows, disp.cols, CV_32FC4);
|
||||||
|
|
||||||
@ -309,14 +304,13 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// resize
|
// resize
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||||
{
|
{
|
||||||
@ -380,7 +374,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
static const caller_t callers[6][4] =
|
static const caller_t callers[6][4] =
|
||||||
@ -400,20 +394,19 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// copyMakeBorder
|
// copyMakeBorder
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
|
Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
|
||||||
|
|
||||||
@ -666,21 +659,20 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpPlaneMaps
|
// buildWarpPlaneMaps
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
||||||
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
@ -700,21 +692,20 @@ void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, cons
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpCylyndricalMaps
|
// buildWarpCylyndricalMaps
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
@ -733,21 +724,20 @@ void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpSphericalMaps
|
// buildWarpSphericalMaps
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
@ -899,18 +889,17 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// columnSum
|
// columnSum
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
|
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32F);
|
CV_Assert(src.type() == CV_32F);
|
||||||
|
|
||||||
@ -1245,19 +1234,18 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
|
|||||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace hist
|
|
||||||
{
|
{
|
||||||
|
namespace hist
|
||||||
|
{
|
||||||
void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
|
void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
|
||||||
|
|
||||||
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
||||||
const int HISTOGRAM256_BIN_COUNT = 256;
|
const int HISTOGRAM256_BIN_COUNT = 256;
|
||||||
|
|
||||||
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
|
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
||||||
{
|
{
|
||||||
@ -1267,7 +1255,7 @@ void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
|||||||
|
|
||||||
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)
|
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ hist;
|
using namespace ::cv::gpu::device::hist;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
@ -1293,7 +1281,7 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream&
|
|||||||
|
|
||||||
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
|
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ hist;
|
using namespace ::cv::gpu::device::hist;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
@ -1327,16 +1315,15 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// cornerHarris & minEgenVal
|
// cornerHarris & minEgenVal
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream);
|
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream);
|
||||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
||||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -1421,7 +1408,7 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
|
|||||||
|
|
||||||
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
|
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
||||||
borderType == cv::BORDER_REPLICATE);
|
borderType == cv::BORDER_REPLICATE);
|
||||||
@ -1448,7 +1435,7 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
|
|||||||
|
|
||||||
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
|
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
||||||
borderType == cv::BORDER_REPLICATE);
|
borderType == cv::BORDER_REPLICATE);
|
||||||
@ -1464,20 +1451,19 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums
|
// mulSpectrums
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1495,20 +1481,19 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums
|
// mulAndScaleSpectrums
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
static Caller callers[] = { mulAndScaleSpectrums, mulAndScaleSpectrums_CONJ };
|
static Caller callers[] = { mulAndScaleSpectrums, mulAndScaleSpectrums_CONJ };
|
||||||
@ -1673,18 +1658,17 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
convolve(image, templ, result, ccorr, buf);
|
convolve(image, templ, result, ccorr, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);
|
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
#ifndef HAVE_CUFFT
|
#ifndef HAVE_CUFFT
|
||||||
|
|
||||||
@ -1811,18 +1795,17 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// pyrDown
|
// pyrDown
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1851,18 +1834,17 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& st
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// pyrUp
|
// pyrUp
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace imgproc
|
|
||||||
{
|
{
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1933,10 +1915,10 @@ void cv::gpu::CannyBuf::release()
|
|||||||
trackBuf2.release();
|
trackBuf2.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace canny
|
|
||||||
{
|
{
|
||||||
|
namespace canny
|
||||||
|
{
|
||||||
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
|
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
|
||||||
|
|
||||||
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);
|
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);
|
||||||
@ -1949,15 +1931,14 @@ namespace canny
|
|||||||
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);
|
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);
|
||||||
|
|
||||||
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols);
|
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
|
void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
using namespace ::cv::gpu::device::canny;
|
||||||
|
|
||||||
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
||||||
|
|
||||||
@ -1977,7 +1958,7 @@ void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double hi
|
|||||||
|
|
||||||
void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
using namespace ::cv::gpu::device::canny;
|
||||||
|
|
||||||
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
@ -2016,7 +1997,7 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_
|
|||||||
|
|
||||||
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
using namespace ::cv::gpu::device::canny;
|
||||||
|
|
||||||
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
||||||
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
|
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
|
||||||
|
@ -274,18 +274,17 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// GpuFuncTable
|
// GpuFuncTable
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
||||||
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
}}}
|
||||||
template <typename T>
|
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -345,7 +344,7 @@ namespace
|
|||||||
|
|
||||||
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
|
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
|
||||||
{
|
{
|
||||||
OPENCV_DEVICE_NAMESPACE_ convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
|
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@ -403,7 +402,7 @@ namespace
|
|||||||
void kernelSet(GpuMat& src, Scalar s)
|
void kernelSet(GpuMat& src, Scalar s)
|
||||||
{
|
{
|
||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, src.channels(), 0);
|
::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int SDEPTH, int SCN> struct NppSetMaskFunc
|
template<int SDEPTH, int SCN> struct NppSetMaskFunc
|
||||||
@ -458,7 +457,7 @@ namespace
|
|||||||
void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask)
|
void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask)
|
||||||
{
|
{
|
||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, mask, src.channels(), 0);
|
::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
class CudaFuncTable : public GpuFuncTable
|
class CudaFuncTable : public GpuFuncTable
|
||||||
@ -479,7 +478,7 @@ namespace
|
|||||||
|
|
||||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||||
{
|
{
|
||||||
OPENCV_DEVICE_NAMESPACE_ copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
|
::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
|
||||||
}
|
}
|
||||||
|
|
||||||
void convert(const GpuMat& src, GpuMat& dst) const
|
void convert(const GpuMat& src, GpuMat& dst) const
|
||||||
@ -560,7 +559,7 @@ namespace
|
|||||||
|
|
||||||
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
||||||
{
|
{
|
||||||
device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
|
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
|
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
|
||||||
|
@ -52,10 +52,10 @@ void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&)
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace match_template
|
|
||||||
{
|
{
|
||||||
|
namespace match_template
|
||||||
|
{
|
||||||
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
@ -135,11 +135,10 @@ namespace match_template
|
|||||||
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
|
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
|
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::match_template;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ match_template;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -190,10 +190,10 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Sum
|
// Sum
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace matrix_reductions
|
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
namespace sum
|
namespace sum
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -216,9 +216,8 @@ namespace matrix_reductions
|
|||||||
|
|
||||||
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
Scalar cv::gpu::sum(const GpuMat& src)
|
Scalar cv::gpu::sum(const GpuMat& src)
|
||||||
{
|
{
|
||||||
@ -229,7 +228,7 @@ Scalar cv::gpu::sum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
using namespace ::cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
@ -272,7 +271,7 @@ Scalar cv::gpu::absSum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
using namespace ::cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
@ -316,7 +315,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
using namespace ::cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
@ -353,10 +352,10 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Find min or max
|
// Find min or max
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace matrix_reductions
|
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
namespace minmax
|
namespace minmax
|
||||||
{
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
||||||
@ -373,9 +372,8 @@ namespace matrix_reductions
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
|
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
|
||||||
@ -387,7 +385,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
|
|
||||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
|
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmax;
|
using namespace ::cv::gpu::device::matrix_reductions::minmax;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
|
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
||||||
@ -457,10 +455,10 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Locate min and max
|
// Locate min and max
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace matrix_reductions
|
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
namespace minmaxloc
|
namespace minmaxloc
|
||||||
{
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
||||||
@ -482,9 +480,8 @@ namespace matrix_reductions
|
|||||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
|
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
|
||||||
{
|
{
|
||||||
@ -495,7 +492,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||||
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
|
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmaxloc;
|
using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
@ -571,10 +568,10 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Count non-zero elements
|
// Count non-zero elements
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace matrix_reductions
|
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
namespace countnonzero
|
namespace countnonzero
|
||||||
{
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
||||||
@ -585,9 +582,8 @@ namespace matrix_reductions
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
|
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
int cv::gpu::countNonZero(const GpuMat& src)
|
int cv::gpu::countNonZero(const GpuMat& src)
|
||||||
{
|
{
|
||||||
@ -598,7 +594,7 @@ int cv::gpu::countNonZero(const GpuMat& src)
|
|||||||
|
|
||||||
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::countnonzero;
|
using namespace ::cv::gpu::device::matrix_reductions::countnonzero;
|
||||||
|
|
||||||
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
|
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
|
||||||
|
|
||||||
@ -632,19 +628,19 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
|||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// reduce
|
// reduce
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace matrix_reductions
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
|
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions;
|
using namespace ::cv::gpu::device::matrix_reductions;
|
||||||
|
|
||||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
|
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
|
||||||
CV_Assert(dim == 0 || dim == 1);
|
CV_Assert(dim == 0 || dim == 1);
|
||||||
|
@ -48,13 +48,13 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "vec_math.hpp"
|
#include "vec_math.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
|
||||||
// BrdConstant
|
|
||||||
|
|
||||||
template <typename D> struct BrdRowConstant
|
|
||||||
{
|
{
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdConstant
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowConstant
|
||||||
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
|
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
|
||||||
@ -76,10 +76,10 @@ template <typename D> struct BrdRowConstant
|
|||||||
|
|
||||||
const int width;
|
const int width;
|
||||||
const D val;
|
const D val;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdColConstant
|
template <typename D> struct BrdColConstant
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
|
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
|
||||||
@ -101,10 +101,10 @@ template <typename D> struct BrdColConstant
|
|||||||
|
|
||||||
const int height;
|
const int height;
|
||||||
const D val;
|
const D val;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdConstant
|
template <typename D> struct BrdConstant
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
|
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
|
||||||
@ -124,13 +124,13 @@ template <typename D> struct BrdConstant
|
|||||||
const int height;
|
const int height;
|
||||||
const int width;
|
const int width;
|
||||||
const D val;
|
const D val;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BrdReplicate
|
// BrdReplicate
|
||||||
|
|
||||||
template <typename D> struct BrdRowReplicate
|
template <typename D> struct BrdRowReplicate
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
|
||||||
@ -167,10 +167,10 @@ template <typename D> struct BrdRowReplicate
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdColReplicate
|
template <typename D> struct BrdColReplicate
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
|
||||||
@ -207,10 +207,10 @@ template <typename D> struct BrdColReplicate
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdReplicate
|
template <typename D> struct BrdReplicate
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
@ -258,13 +258,13 @@ template <typename D> struct BrdReplicate
|
|||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BrdReflect101
|
// BrdReflect101
|
||||||
|
|
||||||
template <typename D> struct BrdRowReflect101
|
template <typename D> struct BrdRowReflect101
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
|
||||||
@ -301,10 +301,10 @@ template <typename D> struct BrdRowReflect101
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdColReflect101
|
template <typename D> struct BrdColReflect101
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
|
||||||
@ -341,10 +341,10 @@ template <typename D> struct BrdColReflect101
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdReflect101
|
template <typename D> struct BrdReflect101
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
@ -392,13 +392,13 @@ template <typename D> struct BrdReflect101
|
|||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BrdReflect
|
// BrdReflect
|
||||||
|
|
||||||
template <typename D> struct BrdRowReflect
|
template <typename D> struct BrdRowReflect
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
|
||||||
@ -435,10 +435,10 @@ template <typename D> struct BrdRowReflect
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdColReflect
|
template <typename D> struct BrdColReflect
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
|
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
|
||||||
@ -475,10 +475,10 @@ template <typename D> struct BrdColReflect
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdReflect
|
template <typename D> struct BrdReflect
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
@ -506,7 +506,7 @@ template <typename D> struct BrdReflect
|
|||||||
|
|
||||||
__device__ __forceinline__ int idx_col_high(int x) const
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
{
|
{
|
||||||
return /*::abs*/(last_col - ::abs(last_col - x) + (x > last_col)) /*% (last_col + 1)*/;
|
return (last_col - ::abs(last_col - x) + (x > last_col));
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__ int idx_col(int x) const
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
@ -526,13 +526,13 @@ template <typename D> struct BrdReflect
|
|||||||
|
|
||||||
const int last_row;
|
const int last_row;
|
||||||
const int last_col;
|
const int last_col;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BrdWrap
|
// BrdWrap
|
||||||
|
|
||||||
template <typename D> struct BrdRowWrap
|
template <typename D> struct BrdRowWrap
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
|
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
|
||||||
@ -569,10 +569,10 @@ template <typename D> struct BrdRowWrap
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int width;
|
const int width;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdColWrap
|
template <typename D> struct BrdColWrap
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
|
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
|
||||||
@ -609,10 +609,10 @@ template <typename D> struct BrdColWrap
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int height;
|
const int height;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename D> struct BrdWrap
|
template <typename D> struct BrdWrap
|
||||||
{
|
{
|
||||||
typedef D result_type;
|
typedef D result_type;
|
||||||
|
|
||||||
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
|
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
|
||||||
@ -667,13 +667,13 @@ template <typename D> struct BrdWrap
|
|||||||
|
|
||||||
const int height;
|
const int height;
|
||||||
const int width;
|
const int width;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BorderReader
|
// BorderReader
|
||||||
|
|
||||||
template <typename Ptr2D, typename B> struct BorderReader
|
template <typename Ptr2D, typename B> struct BorderReader
|
||||||
{
|
{
|
||||||
typedef typename B::result_type elem_type;
|
typedef typename B::result_type elem_type;
|
||||||
typedef typename Ptr2D::index_type index_type;
|
typedef typename Ptr2D::index_type index_type;
|
||||||
|
|
||||||
@ -686,12 +686,12 @@ template <typename Ptr2D, typename B> struct BorderReader
|
|||||||
|
|
||||||
const Ptr2D ptr;
|
const Ptr2D ptr;
|
||||||
const B b;
|
const B b;
|
||||||
};
|
};
|
||||||
|
|
||||||
// under win32 there is some bug with templated types that passed as kernel parameters
|
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||||
// with this specialization all works fine
|
// with this specialization all works fine
|
||||||
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||||
{
|
{
|
||||||
typedef typename BrdConstant<D>::result_type elem_type;
|
typedef typename BrdConstant<D>::result_type elem_type;
|
||||||
typedef typename Ptr2D::index_type index_type;
|
typedef typename Ptr2D::index_type index_type;
|
||||||
|
|
||||||
@ -709,8 +709,7 @@ template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D>
|
|||||||
const int height;
|
const int height;
|
||||||
const int width;
|
const int width;
|
||||||
const D val;
|
const D val;
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
@ -46,178 +46,177 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "detail/color_detail.hpp"
|
#include "detail/color_detail.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||||
|
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||||
|
// {
|
||||||
|
// typedef ... functor_type;
|
||||||
|
// static __host__ __device__ functor_type create_functor();
|
||||||
|
// };
|
||||||
|
|
||||||
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||||
// {
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||||
// typedef ... functor_type;
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||||
// static __host__ __device__ functor_type create_functor();
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||||
// };
|
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||||
|
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
|
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
|
||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
@ -45,16 +45,8 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
#if defined(_WIN64) || defined(__LP64__)
|
|
||||||
// 64-bit register modifier for inlined asm
|
|
||||||
#define OPENCV_GPU_ASM_PTR "l"
|
|
||||||
#else
|
|
||||||
// 32-bit register modifier for inlined asm
|
|
||||||
#define OPENCV_GPU_ASM_PTR "r"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
// for Fermi memory space is detected automatically
|
// for Fermi memory space is detected automatically
|
||||||
@ -65,6 +57,14 @@ BEGIN_OPENCV_DEVICE_NAMESPACE
|
|||||||
|
|
||||||
#else // __CUDA_ARCH__ >= 200
|
#else // __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
|
#if defined(_WIN64) || defined(__LP64__)
|
||||||
|
// 64-bit register modifier for inlined asm
|
||||||
|
#define OPENCV_GPU_ASM_PTR "l"
|
||||||
|
#else
|
||||||
|
// 32-bit register modifier for inlined asm
|
||||||
|
#define OPENCV_GPU_ASM_PTR "r"
|
||||||
|
#endif
|
||||||
|
|
||||||
template<class T> struct ForceGlob;
|
template<class T> struct ForceGlob;
|
||||||
|
|
||||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||||
@ -97,9 +97,9 @@ BEGIN_OPENCV_DEVICE_NAMESPACE
|
|||||||
|
|
||||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
|
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
|
||||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
||||||
|
#undef OPENCV_GPU_ASM_PTR
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__ >= 200
|
#endif // __CUDA_ARCH__ >= 200
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||||
|
@ -49,14 +49,14 @@
|
|||||||
#include "../limits.hpp"
|
#include "../limits.hpp"
|
||||||
#include "../functional.hpp"
|
#include "../functional.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
#ifndef CV_DESCALE
|
|
||||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
|
#ifndef CV_DESCALE
|
||||||
|
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace color_detail
|
||||||
|
{
|
||||||
template<typename T> struct ColorChannel
|
template<typename T> struct ColorChannel
|
||||||
{
|
{
|
||||||
typedef float worktype_f;
|
typedef float worktype_f;
|
||||||
@ -95,12 +95,12 @@ namespace detail
|
|||||||
B2Y = 1868,
|
B2Y = 1868,
|
||||||
BLOCK_SIZE = 256
|
BLOCK_SIZE = 256
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <typename T, int scn, int dcn, int bidx> struct RGB2RGB : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
|
template <typename T, int scn, int dcn, int bidx> struct RGB2RGB : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
|
||||||
{
|
{
|
||||||
__device__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
|
__device__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
|
||||||
@ -130,12 +130,12 @@ namespace detail
|
|||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -144,8 +144,8 @@ namespace detail
|
|||||||
|
|
||||||
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
|
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
|
template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
|
||||||
template<int bidx> struct RGB2RGB5x5Converter<6, bidx>
|
template<int bidx> struct RGB2RGB5x5Converter<6, bidx>
|
||||||
{
|
{
|
||||||
@ -192,20 +192,20 @@ namespace detail
|
|||||||
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
|
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
|
||||||
struct name ## _traits \
|
struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int green_bits, int bidx> struct RGB5x52RGBConverter;
|
template <int green_bits, int bidx> struct RGB5x52RGBConverter;
|
||||||
template <int bidx> struct RGB5x52RGBConverter<5, bidx>
|
template <int bidx> struct RGB5x52RGBConverter<5, bidx>
|
||||||
{
|
{
|
||||||
@ -262,12 +262,12 @@ namespace detail
|
|||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
|
#define OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
|
||||||
struct name ## _traits \
|
struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -276,8 +276,8 @@ namespace detail
|
|||||||
|
|
||||||
///////////////////////////////// Grayscale to Color ////////////////////////////////
|
///////////////////////////////// Grayscale to Color ////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <typename T, int dcn> struct Gray2RGB : unary_function<T, typename TypeVec<T, dcn>::vec_type>
|
template <typename T, int dcn> struct Gray2RGB : unary_function<T, typename TypeVec<T, dcn>::vec_type>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(T src) const
|
__device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(T src) const
|
||||||
@ -303,20 +303,20 @@ namespace detail
|
|||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
|
#define OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::Gray2RGB<T, dcn> functor_type; \
|
typedef ::cv::gpu::device::color_detail::Gray2RGB<T, dcn> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int green_bits> struct Gray2RGB5x5Converter;
|
template <int green_bits> struct Gray2RGB5x5Converter;
|
||||||
template<> struct Gray2RGB5x5Converter<6>
|
template<> struct Gray2RGB5x5Converter<6>
|
||||||
{
|
{
|
||||||
@ -341,12 +341,12 @@ namespace detail
|
|||||||
return Gray2RGB5x5Converter<green_bits>::cvt(src);
|
return Gray2RGB5x5Converter<green_bits>::cvt(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
|
#define OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
|
||||||
struct name ## _traits \
|
struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::Gray2RGB5x5<green_bits> functor_type; \
|
typedef ::cv::gpu::device::color_detail::Gray2RGB5x5<green_bits> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -355,8 +355,8 @@ namespace detail
|
|||||||
|
|
||||||
///////////////////////////////// Color to Grayscale ////////////////////////////////
|
///////////////////////////////// Color to Grayscale ////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int green_bits> struct RGB5x52GrayConverter;
|
template <int green_bits> struct RGB5x52GrayConverter;
|
||||||
template <> struct RGB5x52GrayConverter<6>
|
template <> struct RGB5x52GrayConverter<6>
|
||||||
{
|
{
|
||||||
@ -380,20 +380,20 @@ namespace detail
|
|||||||
return RGB5x52GrayConverter<green_bits>::cvt(src);
|
return RGB5x52GrayConverter<green_bits>::cvt(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
|
#define OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
|
||||||
struct name ## _traits \
|
struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB5x52Gray<green_bits> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB5x52Gray<green_bits> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int bidx, typename T> static __device__ __forceinline__ T RGB2GrayConvert(const T* src)
|
template <int bidx, typename T> static __device__ __forceinline__ T RGB2GrayConvert(const T* src)
|
||||||
{
|
{
|
||||||
return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
|
return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
|
||||||
@ -424,12 +424,12 @@ namespace detail
|
|||||||
return RGB2GrayConvert<bidx>(src);
|
return RGB2GrayConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2Gray<T, scn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -438,8 +438,8 @@ namespace detail
|
|||||||
|
|
||||||
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
|
__constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
|
||||||
__constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
|
__constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
|
||||||
|
|
||||||
@ -494,20 +494,20 @@ namespace detail
|
|||||||
return RGB2YUVConvert<bidx>(src);
|
return RGB2YUVConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
__constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
||||||
__constant__ int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
|
__constant__ int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
|
||||||
|
|
||||||
@ -565,12 +565,12 @@ namespace detail
|
|||||||
return YUV2RGBConvert<bidx>(src);
|
return YUV2RGBConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -579,8 +579,8 @@ namespace detail
|
|||||||
|
|
||||||
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
|
__constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
|
||||||
__constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
|
__constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
|
||||||
|
|
||||||
@ -635,20 +635,20 @@ namespace detail
|
|||||||
return RGB2YCrCbConvert<bidx>(src);
|
return RGB2YCrCbConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
|
__constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
|
||||||
__constant__ int c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
|
__constant__ int c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
|
||||||
|
|
||||||
@ -706,12 +706,12 @@ namespace detail
|
|||||||
return YCrCb2RGBConvert<bidx>(src);
|
return YCrCb2RGBConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -720,8 +720,8 @@ namespace detail
|
|||||||
|
|
||||||
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
|
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f };
|
__constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f };
|
||||||
__constant__ int c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 };
|
__constant__ int c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 };
|
||||||
|
|
||||||
@ -774,20 +774,20 @@ namespace detail
|
|||||||
return RGB2XYZConvert<bidx>(src);
|
return RGB2XYZConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f };
|
__constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f };
|
||||||
__constant__ int c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 };
|
__constant__ int c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 };
|
||||||
|
|
||||||
@ -841,12 +841,12 @@ namespace detail
|
|||||||
return XYZ2RGBConvert<bidx>(src);
|
return XYZ2RGBConvert<bidx>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
|
typedef ::cv::gpu::device::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -855,8 +855,8 @@ namespace detail
|
|||||||
|
|
||||||
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
|
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ int c_HsvDivTable [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
|
__constant__ int c_HsvDivTable [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
|
||||||
__constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
|
__constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
|
||||||
__constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685};
|
__constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685};
|
||||||
@ -971,12 +971,12 @@ namespace detail
|
|||||||
return RGB2HSVConvert<bidx, hr>(src);
|
return RGB2HSVConvert<bidx, hr>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -984,7 +984,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <typename T> struct name ## _full_traits \
|
template <typename T> struct name ## _full_traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -992,7 +992,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _traits<float> \
|
template <> struct name ## _traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1000,15 +1000,15 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _full_traits<float> \
|
template <> struct name ## _full_traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
|
__constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
|
||||||
|
|
||||||
template <int bidx, int hr, typename T> static __device__ void HSV2RGBConvert(const T& src, float* dst)
|
template <int bidx, int hr, typename T> static __device__ void HSV2RGBConvert(const T& src, float* dst)
|
||||||
@ -1097,12 +1097,12 @@ namespace detail
|
|||||||
return HSV2RGBConvert<bidx, hr>(src);
|
return HSV2RGBConvert<bidx, hr>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1110,7 +1110,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <typename T> struct name ## _full_traits \
|
template <typename T> struct name ## _full_traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1118,7 +1118,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _traits<float> \
|
template <> struct name ## _traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1126,7 +1126,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _full_traits<float> \
|
template <> struct name ## _full_traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1135,8 +1135,8 @@ namespace detail
|
|||||||
|
|
||||||
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
|
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
template <int bidx, int hr, typename D> static __device__ void RGB2HLSConvert(const float* src, D& dst)
|
template <int bidx, int hr, typename D> static __device__ void RGB2HLSConvert(const float* src, D& dst)
|
||||||
{
|
{
|
||||||
const float hscale = hr * (1.f / 360.f);
|
const float hscale = hr * (1.f / 360.f);
|
||||||
@ -1222,12 +1222,12 @@ namespace detail
|
|||||||
return RGB2HLSConvert<bidx, hr>(src);
|
return RGB2HLSConvert<bidx, hr>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1235,7 +1235,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <typename T> struct name ## _full_traits \
|
template <typename T> struct name ## _full_traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1243,7 +1243,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _traits<float> \
|
template <> struct name ## _traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1251,15 +1251,15 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _full_traits<float> \
|
template <> struct name ## _full_traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace color_detail
|
||||||
{
|
{
|
||||||
__constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
|
__constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
|
||||||
|
|
||||||
template <int bidx, int hr, typename T> static __device__ void HLS2RGBConvert(const T& src, float* dst)
|
template <int bidx, int hr, typename T> static __device__ void HLS2RGBConvert(const T& src, float* dst)
|
||||||
@ -1354,12 +1354,12 @@ namespace detail
|
|||||||
return HLS2RGBConvert<bidx, hr>(src);
|
return HLS2RGBConvert<bidx, hr>(src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
|
#define OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
|
||||||
template <typename T> struct name ## _traits \
|
template <typename T> struct name ## _traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1367,7 +1367,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <typename T> struct name ## _full_traits \
|
template <typename T> struct name ## _full_traits \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1375,7 +1375,7 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _traits<float> \
|
template <> struct name ## _traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
@ -1383,13 +1383,12 @@ namespace detail
|
|||||||
}; \
|
}; \
|
||||||
template <> struct name ## _full_traits<float> \
|
template <> struct name ## _full_traits<float> \
|
||||||
{ \
|
{ \
|
||||||
typedef detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
typedef ::cv::gpu::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
|
||||||
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
static __host__ __device__ __forceinline__ functor_type create_functor() \
|
||||||
{ \
|
{ \
|
||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
|
#endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
|
||||||
|
@ -47,26 +47,10 @@
|
|||||||
#include "../vec_traits.hpp"
|
#include "../vec_traits.hpp"
|
||||||
#include "../functional.hpp"
|
#include "../functional.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
//! Mask accessor
|
namespace transform_detail
|
||||||
|
|
||||||
struct MaskReader
|
|
||||||
{
|
{
|
||||||
explicit MaskReader(const PtrStepb& mask_): mask(mask_) {}
|
|
||||||
|
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
|
|
||||||
|
|
||||||
const PtrStepb mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NoMask
|
|
||||||
{
|
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const { return true; }
|
|
||||||
};
|
|
||||||
|
|
||||||
//! Read Write Traits
|
//! Read Write Traits
|
||||||
|
|
||||||
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||||
@ -404,8 +388,7 @@ namespace detail
|
|||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||||
}
|
}
|
||||||
}
|
} // namespace transform_detail
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||||
|
@ -46,10 +46,10 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "../vec_traits.hpp"
|
#include "../vec_traits.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
|
namespace type_traits_detail
|
||||||
|
{
|
||||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||||
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||||
|
|
||||||
@ -181,8 +181,7 @@ namespace detail
|
|||||||
typedef U& type;
|
typedef U& type;
|
||||||
enum { value = 1 };
|
enum { value = 1 };
|
||||||
};
|
};
|
||||||
}
|
} // namespace type_traits_detail
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||||
|
@ -45,10 +45,10 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
|
namespace utility_detail
|
||||||
|
{
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
@ -837,8 +837,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
} // namespace utility_detail
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
#endif // __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
||||||
|
@ -46,10 +46,10 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "../datamov_utils.hpp"
|
#include "../datamov_utils.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
|
namespace vec_distance_detail
|
||||||
|
{
|
||||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||||
{
|
{
|
||||||
template <typename Dist, typename T1, typename T2>
|
template <typename Dist, typename T1, typename T2>
|
||||||
@ -112,8 +112,7 @@ namespace detail
|
|||||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
} // namespace vec_distance_detail
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||||
|
@ -46,23 +46,22 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "warp_reduce.hpp"
|
#include "warp_reduce.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
struct Emulation
|
|
||||||
{
|
{
|
||||||
|
struct Emulation
|
||||||
|
{
|
||||||
static __forceinline__ __device__ int Ballot(int predicate, volatile int* cta_buffer)
|
static __forceinline__ __device__ int Ballot(int predicate, volatile int* cta_buffer)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
(void)cta_buffer;
|
(void)cta_buffer;
|
||||||
return __ballot(predicate);
|
return __ballot(predicate);
|
||||||
#else
|
#else
|
||||||
int tid = threadIdx.x;
|
int tid = threadIdx.x;
|
||||||
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
|
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
|
||||||
return warp_reduce(cta_buffer);
|
return warp_reduce(cta_buffer);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif /* OPENCV_GPU_EMULATION_HPP_ */
|
#endif /* OPENCV_GPU_EMULATION_HPP_ */
|
@ -48,10 +48,10 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "vec_math.hpp"
|
#include "vec_math.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template <typename Ptr2D> struct PointFilter
|
|
||||||
{
|
{
|
||||||
|
template <typename Ptr2D> struct PointFilter
|
||||||
|
{
|
||||||
typedef typename Ptr2D::elem_type elem_type;
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
typedef float index_type;
|
typedef float index_type;
|
||||||
|
|
||||||
@ -63,10 +63,10 @@ template <typename Ptr2D> struct PointFilter
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Ptr2D src;
|
const Ptr2D src;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Ptr2D> struct LinearFilter
|
template <typename Ptr2D> struct LinearFilter
|
||||||
{
|
{
|
||||||
typedef typename Ptr2D::elem_type elem_type;
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
typedef float index_type;
|
typedef float index_type;
|
||||||
|
|
||||||
@ -99,10 +99,10 @@ template <typename Ptr2D> struct LinearFilter
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Ptr2D src;
|
const Ptr2D src;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Ptr2D> struct CubicFilter
|
template <typename Ptr2D> struct CubicFilter
|
||||||
{
|
{
|
||||||
typedef typename Ptr2D::elem_type elem_type;
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
typedef float index_type;
|
typedef float index_type;
|
||||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||||
@ -130,8 +130,7 @@ template <typename Ptr2D> struct CubicFilter
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Ptr2D src;
|
const Ptr2D src;
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_FILTERS_HPP__
|
#endif // __OPENCV_GPU_FILTERS_HPP__
|
||||||
|
@ -47,11 +47,11 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template<class Func>
|
|
||||||
void printFuncAttrib(Func& func)
|
|
||||||
{
|
{
|
||||||
|
template<class Func>
|
||||||
|
void printFuncAttrib(Func& func)
|
||||||
|
{
|
||||||
|
|
||||||
cudaFuncAttributes attrs;
|
cudaFuncAttributes attrs;
|
||||||
cudaFuncGetAttributes(&attrs, func);
|
cudaFuncGetAttributes(&attrs, func);
|
||||||
@ -67,8 +67,7 @@ void printFuncAttrib(Func& func)
|
|||||||
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
|
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
|
@ -49,182 +49,182 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "type_traits.hpp"
|
#include "type_traits.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
// Function Objects
|
|
||||||
|
|
||||||
using thrust::unary_function;
|
|
||||||
using thrust::binary_function;
|
|
||||||
|
|
||||||
// Arithmetic Operations
|
|
||||||
|
|
||||||
template <typename T> struct plus : binary_function<T, T, T>
|
|
||||||
{
|
{
|
||||||
|
// Function Objects
|
||||||
|
|
||||||
|
using thrust::unary_function;
|
||||||
|
using thrust::binary_function;
|
||||||
|
|
||||||
|
// Arithmetic Operations
|
||||||
|
|
||||||
|
template <typename T> struct plus : binary_function<T, T, T>
|
||||||
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a + b;
|
return a + b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct minus : binary_function<T, T, T>
|
template <typename T> struct minus : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a - b;
|
return a - b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct multiplies : binary_function<T, T, T>
|
template <typename T> struct multiplies : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a * b;
|
return a * b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct divides : binary_function<T, T, T>
|
template <typename T> struct divides : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a / b;
|
return a / b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct modulus : binary_function<T, T, T>
|
template <typename T> struct modulus : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a % b;
|
return a % b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct negate : unary_function<T, T>
|
template <typename T> struct negate : unary_function<T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||||
{
|
{
|
||||||
return -a;
|
return -a;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Comparison Operations
|
// Comparison Operations
|
||||||
|
|
||||||
template <typename T> struct equal_to : binary_function<T, T, bool>
|
template <typename T> struct equal_to : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a == b;
|
return a == b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a != b;
|
return a != b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct greater : binary_function<T, T, bool>
|
template <typename T> struct greater : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a > b;
|
return a > b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct less : binary_function<T, T, bool>
|
template <typename T> struct less : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a < b;
|
return a < b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a >= b;
|
return a >= b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct less_equal : binary_function<T, T, bool>
|
template <typename T> struct less_equal : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a <= b;
|
return a <= b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Logical Operations
|
// Logical Operations
|
||||||
|
|
||||||
template <typename T> struct logical_and : binary_function<T, T, bool>
|
template <typename T> struct logical_and : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a && b;
|
return a && b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct logical_or : binary_function<T, T, bool>
|
template <typename T> struct logical_or : binary_function<T, T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a || b;
|
return a || b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct logical_not : unary_function<T, bool>
|
template <typename T> struct logical_not : unary_function<T, bool>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||||
{
|
{
|
||||||
return !a;
|
return !a;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Bitwise Operations
|
// Bitwise Operations
|
||||||
|
|
||||||
template <typename T> struct bit_and : binary_function<T, T, T>
|
template <typename T> struct bit_and : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a & b;
|
return a & b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct bit_or : binary_function<T, T, T>
|
template <typename T> struct bit_or : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a | b;
|
return a | b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct bit_xor : binary_function<T, T, T>
|
template <typename T> struct bit_xor : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, typename TypeTraits<T>::ParameterType b) const
|
||||||
{
|
{
|
||||||
return a ^ b;
|
return a ^ b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T> struct bit_not : unary_function<T, T>
|
template <typename T> struct bit_not : unary_function<T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||||
{
|
{
|
||||||
return ~v;
|
return ~v;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generalized Identity Operations
|
// Generalized Identity Operations
|
||||||
|
|
||||||
template <typename T> struct identity : unary_function<T, T>
|
template <typename T> struct identity : unary_function<T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
|
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
|
||||||
{
|
{
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||||
{
|
{
|
||||||
return lhs;
|
return lhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||||
{
|
{
|
||||||
return rhs;
|
return rhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Min/Max Operations
|
// Min/Max Operations
|
||||||
|
|
||||||
@ -234,39 +234,41 @@ template <typename T1, typename T2> struct project2nd : binary_function<T1, T2,
|
|||||||
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> struct maximum : binary_function<T, T, T>
|
template <typename T> struct maximum : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||||
{
|
{
|
||||||
return lhs < rhs ? rhs : lhs;
|
return lhs < rhs ? rhs : lhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
|
||||||
|
|
||||||
template <typename T> struct minimum : binary_function<T, T, T>
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||||
{
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
||||||
|
|
||||||
|
template <typename T> struct minimum : binary_function<T, T, T>
|
||||||
|
{
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||||
{
|
{
|
||||||
return lhs < rhs ? lhs : rhs;
|
return lhs < rhs ? lhs : rhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
||||||
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
||||||
|
|
||||||
@ -287,6 +289,7 @@ OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
|||||||
return func(v); \
|
return func(v); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
||||||
template <typename T> struct name ## _func : binary_function<T, T, float> \
|
template <typename T> struct name ## _func : binary_function<T, T, float> \
|
||||||
{ \
|
{ \
|
||||||
@ -303,56 +306,56 @@ OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
|||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(fabs, ::fabs)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(fabs, ::fabs)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log, ::log)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log, ::log)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
|
||||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
|
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
|
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
|
||||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
|
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
|
||||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
|
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR
|
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR
|
||||||
#undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR
|
#undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR
|
||||||
|
|
||||||
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
|
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
|
||||||
{
|
{
|
||||||
return src1 * src1 + src2 * src2;
|
return src1 * src1 + src2 * src2;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Saturate Cast Functor
|
// Saturate Cast Functor
|
||||||
|
|
||||||
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
|
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
|
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||||
{
|
{
|
||||||
return saturate_cast<D>(v);
|
return saturate_cast<D>(v);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Threshold Functors
|
// Threshold Functors
|
||||||
|
|
||||||
template <typename T> struct thresh_binary_func : unary_function<T, T>
|
template <typename T> struct thresh_binary_func : unary_function<T, T>
|
||||||
{
|
{
|
||||||
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
@ -362,9 +365,9 @@ template <typename T> struct thresh_binary_func : unary_function<T, T>
|
|||||||
|
|
||||||
const T thresh;
|
const T thresh;
|
||||||
const T maxVal;
|
const T maxVal;
|
||||||
};
|
};
|
||||||
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
||||||
{
|
{
|
||||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
@ -374,9 +377,9 @@ template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
|||||||
|
|
||||||
const T thresh;
|
const T thresh;
|
||||||
const T maxVal;
|
const T maxVal;
|
||||||
};
|
};
|
||||||
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
@ -385,9 +388,9 @@ template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
|||||||
}
|
}
|
||||||
|
|
||||||
const T thresh;
|
const T thresh;
|
||||||
};
|
};
|
||||||
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
@ -396,9 +399,9 @@ template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
|||||||
}
|
}
|
||||||
|
|
||||||
const T thresh;
|
const T thresh;
|
||||||
};
|
};
|
||||||
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
@ -407,12 +410,12 @@ template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
|||||||
}
|
}
|
||||||
|
|
||||||
const T thresh;
|
const T thresh;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Function Object Adaptors
|
// Function Object Adaptors
|
||||||
|
|
||||||
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
|
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
|
||||||
|
|
||||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
|
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
|
||||||
@ -421,14 +424,14 @@ template <typename Predicate> struct unary_negate : unary_function<typename Pred
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Predicate pred;
|
const Predicate pred;
|
||||||
};
|
};
|
||||||
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
||||||
{
|
{
|
||||||
return unary_negate<Predicate>(pred);
|
return unary_negate<Predicate>(pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
|
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
|
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
|
||||||
|
|
||||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x, typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
|
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x, typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
|
||||||
@ -437,14 +440,14 @@ template <typename Predicate> struct binary_negate : binary_function<typename Pr
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Predicate pred;
|
const Predicate pred;
|
||||||
};
|
};
|
||||||
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
||||||
{
|
{
|
||||||
return binary_negate<BinaryPredicate>(pred);
|
return binary_negate<BinaryPredicate>(pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
|
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
|
||||||
{
|
{
|
||||||
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
|
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
|
||||||
@ -454,14 +457,14 @@ template <typename Op> struct binder1st : unary_function<typename Op::second_arg
|
|||||||
|
|
||||||
const Op op;
|
const Op op;
|
||||||
const typename Op::first_argument_type arg1;
|
const typename Op::first_argument_type arg1;
|
||||||
};
|
};
|
||||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||||
{
|
{
|
||||||
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
|
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
|
||||||
{
|
{
|
||||||
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
||||||
|
|
||||||
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
|
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
|
||||||
@ -471,16 +474,16 @@ template <typename Op> struct binder2nd : unary_function<typename Op::first_argu
|
|||||||
|
|
||||||
const Op op;
|
const Op op;
|
||||||
const typename Op::second_argument_type arg2;
|
const typename Op::second_argument_type arg2;
|
||||||
};
|
};
|
||||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||||
{
|
{
|
||||||
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Functor Traits
|
// Functor Traits
|
||||||
|
|
||||||
template <typename F> struct IsUnaryFunction
|
template <typename F> struct IsUnaryFunction
|
||||||
{
|
{
|
||||||
typedef char Yes;
|
typedef char Yes;
|
||||||
struct No {Yes a[2];};
|
struct No {Yes a[2];};
|
||||||
|
|
||||||
@ -490,10 +493,10 @@ template <typename F> struct IsUnaryFunction
|
|||||||
static F makeF();
|
static F makeF();
|
||||||
|
|
||||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename F> struct IsBinaryFunction
|
template <typename F> struct IsBinaryFunction
|
||||||
{
|
{
|
||||||
typedef char Yes;
|
typedef char Yes;
|
||||||
struct No {Yes a[2];};
|
struct No {Yes a[2];};
|
||||||
|
|
||||||
@ -503,17 +506,17 @@ template <typename F> struct IsBinaryFunction
|
|||||||
static F makeF();
|
static F makeF();
|
||||||
|
|
||||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail
|
namespace functional_detail
|
||||||
{
|
{
|
||||||
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
|
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
|
||||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
|
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
|
||||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
|
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
|
||||||
|
|
||||||
template <typename T, typename D> struct DefaultUnaryShift
|
template <typename T, typename D> struct DefaultUnaryShift
|
||||||
{
|
{
|
||||||
enum { shift = detail::UnOpShift<sizeof(T), sizeof(D)>::shift };
|
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
|
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
|
||||||
@ -522,7 +525,7 @@ namespace detail
|
|||||||
|
|
||||||
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
|
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
|
||||||
{
|
{
|
||||||
enum { shift = detail::BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
|
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
|
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
|
||||||
@ -534,28 +537,27 @@ namespace detail
|
|||||||
{
|
{
|
||||||
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
|
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Func> struct DefaultTransformShift
|
template <typename Func> struct DefaultTransformShift
|
||||||
{
|
{
|
||||||
enum { shift = detail::ShiftDispatcher<Func>::shift };
|
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Func> struct DefaultTransformFunctorTraits
|
template <typename Func> struct DefaultTransformFunctorTraits
|
||||||
{
|
{
|
||||||
enum { simple_block_dim_x = 16 };
|
enum { simple_block_dim_x = 16 };
|
||||||
enum { simple_block_dim_y = 16 };
|
enum { simple_block_dim_y = 16 };
|
||||||
|
|
||||||
enum { smart_block_dim_x = 16 };
|
enum { smart_block_dim_x = 16 };
|
||||||
enum { smart_block_dim_y = 16 };
|
enum { smart_block_dim_y = 16 };
|
||||||
enum { smart_shift = DefaultTransformShift<Func>::shift };
|
enum { smart_shift = DefaultTransformShift<Func>::shift };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
||||||
|
|
||||||
#define DEFINE_TRANSFORM_FUNCTOR_TRAITS(type) \
|
#define OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(type) \
|
||||||
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
|
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
|
||||||
|
@ -43,12 +43,13 @@
|
|||||||
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template<class T> struct numeric_limits
|
|
||||||
{
|
{
|
||||||
|
template<class T> struct numeric_limits
|
||||||
|
{
|
||||||
typedef T type;
|
typedef T type;
|
||||||
__device__ __forceinline__ static type min() { return type(); };
|
__device__ __forceinline__ static type min() { return type(); };
|
||||||
__device__ __forceinline__ static type max() { return type(); };
|
__device__ __forceinline__ static type max() { return type(); };
|
||||||
@ -59,10 +60,10 @@ template<class T> struct numeric_limits
|
|||||||
__device__ __forceinline__ static type quiet_NaN() { return type(); }
|
__device__ __forceinline__ static type quiet_NaN() { return type(); }
|
||||||
__device__ __forceinline__ static type signaling_NaN() { return T(); }
|
__device__ __forceinline__ static type signaling_NaN() { return T(); }
|
||||||
static const bool is_signed;
|
static const bool is_signed;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<bool>
|
template<> struct numeric_limits<bool>
|
||||||
{
|
{
|
||||||
typedef bool type;
|
typedef bool type;
|
||||||
__device__ __forceinline__ static type min() { return false; };
|
__device__ __forceinline__ static type min() { return false; };
|
||||||
__device__ __forceinline__ static type max() { return true; };
|
__device__ __forceinline__ static type max() { return true; };
|
||||||
@ -73,10 +74,10 @@ template<> struct numeric_limits<bool>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = false;
|
static const bool is_signed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<char>
|
template<> struct numeric_limits<char>
|
||||||
{
|
{
|
||||||
typedef char type;
|
typedef char type;
|
||||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||||
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
||||||
@ -87,13 +88,13 @@ template<> struct numeric_limits<char>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = (char)-1 == -1;
|
static const bool is_signed = (char)-1 == -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<signed char>
|
template<> struct numeric_limits<signed char>
|
||||||
{
|
{
|
||||||
typedef char type;
|
typedef char type;
|
||||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
|
||||||
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
|
||||||
__device__ __forceinline__ static type epsilon();
|
__device__ __forceinline__ static type epsilon();
|
||||||
__device__ __forceinline__ static type round_error();
|
__device__ __forceinline__ static type round_error();
|
||||||
__device__ __forceinline__ static type denorm_min();
|
__device__ __forceinline__ static type denorm_min();
|
||||||
@ -101,10 +102,10 @@ template<> struct numeric_limits<char>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = (signed char)-1 == -1;
|
static const bool is_signed = (signed char)-1 == -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<unsigned char>
|
template<> struct numeric_limits<unsigned char>
|
||||||
{
|
{
|
||||||
typedef unsigned char type;
|
typedef unsigned char type;
|
||||||
__device__ __forceinline__ static type min() { return 0; };
|
__device__ __forceinline__ static type min() { return 0; };
|
||||||
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
|
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
|
||||||
@ -115,10 +116,10 @@ template<> struct numeric_limits<unsigned char>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = false;
|
static const bool is_signed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<short>
|
template<> struct numeric_limits<short>
|
||||||
{
|
{
|
||||||
typedef short type;
|
typedef short type;
|
||||||
__device__ __forceinline__ static type min() { return SHRT_MIN; };
|
__device__ __forceinline__ static type min() { return SHRT_MIN; };
|
||||||
__device__ __forceinline__ static type max() { return SHRT_MAX; };
|
__device__ __forceinline__ static type max() { return SHRT_MAX; };
|
||||||
@ -129,10 +130,10 @@ template<> struct numeric_limits<short>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<unsigned short>
|
template<> struct numeric_limits<unsigned short>
|
||||||
{
|
{
|
||||||
typedef unsigned short type;
|
typedef unsigned short type;
|
||||||
__device__ __forceinline__ static type min() { return 0; };
|
__device__ __forceinline__ static type min() { return 0; };
|
||||||
__device__ __forceinline__ static type max() { return USHRT_MAX; };
|
__device__ __forceinline__ static type max() { return USHRT_MAX; };
|
||||||
@ -143,10 +144,10 @@ template<> struct numeric_limits<unsigned short>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = false;
|
static const bool is_signed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<int>
|
template<> struct numeric_limits<int>
|
||||||
{
|
{
|
||||||
typedef int type;
|
typedef int type;
|
||||||
__device__ __forceinline__ static type min() { return INT_MIN; };
|
__device__ __forceinline__ static type min() { return INT_MIN; };
|
||||||
__device__ __forceinline__ static type max() { return INT_MAX; };
|
__device__ __forceinline__ static type max() { return INT_MAX; };
|
||||||
@ -157,11 +158,11 @@ template<> struct numeric_limits<int>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<> struct numeric_limits<unsigned int>
|
template<> struct numeric_limits<unsigned int>
|
||||||
{
|
{
|
||||||
typedef unsigned int type;
|
typedef unsigned int type;
|
||||||
__device__ __forceinline__ static type min() { return 0; };
|
__device__ __forceinline__ static type min() { return 0; };
|
||||||
__device__ __forceinline__ static type max() { return UINT_MAX; };
|
__device__ __forceinline__ static type max() { return UINT_MAX; };
|
||||||
@ -172,10 +173,10 @@ template<> struct numeric_limits<unsigned int>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = false;
|
static const bool is_signed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<long>
|
template<> struct numeric_limits<long>
|
||||||
{
|
{
|
||||||
typedef long type;
|
typedef long type;
|
||||||
__device__ __forceinline__ static type min() { return LONG_MIN; };
|
__device__ __forceinline__ static type min() { return LONG_MIN; };
|
||||||
__device__ __forceinline__ static type max() { return LONG_MAX; };
|
__device__ __forceinline__ static type max() { return LONG_MAX; };
|
||||||
@ -186,10 +187,10 @@ template<> struct numeric_limits<long>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<unsigned long>
|
template<> struct numeric_limits<unsigned long>
|
||||||
{
|
{
|
||||||
typedef unsigned long type;
|
typedef unsigned long type;
|
||||||
__device__ __forceinline__ static type min() { return 0; };
|
__device__ __forceinline__ static type min() { return 0; };
|
||||||
__device__ __forceinline__ static type max() { return ULONG_MAX; };
|
__device__ __forceinline__ static type max() { return ULONG_MAX; };
|
||||||
@ -200,10 +201,10 @@ template<> struct numeric_limits<unsigned long>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = false;
|
static const bool is_signed = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<float>
|
template<> struct numeric_limits<float>
|
||||||
{
|
{
|
||||||
typedef float type;
|
typedef float type;
|
||||||
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
|
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
|
||||||
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
|
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
|
||||||
@ -214,10 +215,10 @@ template<> struct numeric_limits<float>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct numeric_limits<double>
|
template<> struct numeric_limits<double>
|
||||||
{
|
{
|
||||||
typedef double type;
|
typedef double type;
|
||||||
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
|
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
|
||||||
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
|
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
|
||||||
@ -228,8 +229,7 @@ template<> struct numeric_limits<double>
|
|||||||
__device__ __forceinline__ static type quiet_NaN();
|
__device__ __forceinline__ static type quiet_NaN();
|
||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
|
@ -45,173 +45,172 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
|
||||||
{
|
{
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||||
|
{
|
||||||
return (uchar) ::max((int)v, 0);
|
return (uchar) ::max((int)v, 0);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||||
{
|
{
|
||||||
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
|
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||||
{
|
{
|
||||||
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
|
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||||
{
|
{
|
||||||
return (uchar) ::min(v, (uint)UCHAR_MAX);
|
return (uchar) ::min(v, (uint)UCHAR_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||||
{
|
{
|
||||||
return saturate_cast<uchar>((uint)v);
|
return saturate_cast<uchar>((uint)v);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||||
{
|
{
|
||||||
int iv = __float2int_rn(v);
|
int iv = __float2int_rn(v);
|
||||||
return saturate_cast<uchar>(iv);
|
return saturate_cast<uchar>(iv);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v);
|
int iv = __double2int_rn(v);
|
||||||
return saturate_cast<uchar>(iv);
|
return saturate_cast<uchar>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<uchar>((float)v);
|
return saturate_cast<uchar>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||||
{
|
{
|
||||||
return (schar) ::min((int)v, SCHAR_MAX);
|
return (schar) ::min((int)v, SCHAR_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||||
{
|
{
|
||||||
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
|
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||||
{
|
{
|
||||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||||
{
|
{
|
||||||
return saturate_cast<schar>((int)v);
|
return saturate_cast<schar>((int)v);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||||
{
|
{
|
||||||
return (schar) ::min(v, (uint)SCHAR_MAX);
|
return (schar) ::min(v, (uint)SCHAR_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||||
{
|
{
|
||||||
int iv = __float2int_rn(v);
|
int iv = __float2int_rn(v);
|
||||||
return saturate_cast<schar>(iv);
|
return saturate_cast<schar>(iv);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v);
|
int iv = __double2int_rn(v);
|
||||||
return saturate_cast<schar>(iv);
|
return saturate_cast<schar>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<schar>((float)v);
|
return saturate_cast<schar>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||||
{
|
{
|
||||||
return (ushort) ::max((int)v, 0);
|
return (ushort) ::max((int)v, 0);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||||
{
|
{
|
||||||
return (ushort) ::max((int)v, 0);
|
return (ushort) ::max((int)v, 0);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||||
{
|
{
|
||||||
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
|
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||||
{
|
{
|
||||||
return (ushort) ::min(v, (uint)USHRT_MAX);
|
return (ushort) ::min(v, (uint)USHRT_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||||
{
|
{
|
||||||
int iv = __float2int_rn(v);
|
int iv = __float2int_rn(v);
|
||||||
return saturate_cast<ushort>(iv);
|
return saturate_cast<ushort>(iv);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v);
|
int iv = __double2int_rn(v);
|
||||||
return saturate_cast<ushort>(iv);
|
return saturate_cast<ushort>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<ushort>((float)v);
|
return saturate_cast<ushort>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||||
{
|
{
|
||||||
return (short) ::min((int)v, SHRT_MAX);
|
return (short) ::min((int)v, SHRT_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||||
{
|
{
|
||||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||||
{
|
{
|
||||||
return (short) ::min(v, (uint)SHRT_MAX);
|
return (short) ::min(v, (uint)SHRT_MAX);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||||
{
|
{
|
||||||
int iv = __float2int_rn(v);
|
int iv = __float2int_rn(v);
|
||||||
return saturate_cast<short>(iv);
|
return saturate_cast<short>(iv);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v);
|
int iv = __double2int_rn(v);
|
||||||
return saturate_cast<short>(iv);
|
return saturate_cast<short>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<short>((float)v);
|
return saturate_cast<short>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||||
{
|
{
|
||||||
return __float2int_rn(v);
|
return __float2int_rn(v);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
return __double2int_rn(v);
|
return __double2int_rn(v);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<int>((float)v);
|
return saturate_cast<int>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||||
{
|
{
|
||||||
return __float2uint_rn(v);
|
return __float2uint_rn(v);
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
return __double2uint_rn(v);
|
return __double2uint_rn(v);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<uint>((float)v);
|
return saturate_cast<uint>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
@ -49,10 +49,8 @@
|
|||||||
#define __OPENCV_GPU_HOST_DEVICE__
|
#define __OPENCV_GPU_HOST_DEVICE__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv
|
namespace cv { namespace gpu
|
||||||
{
|
{
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
namespace device
|
namespace device
|
||||||
{
|
{
|
||||||
template<bool expr> struct Static {};
|
template<bool expr> struct Static {};
|
||||||
@ -63,10 +61,9 @@ namespace cv
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
using cv::gpu::device::Static;
|
using ::cv::gpu::device::Static;
|
||||||
}
|
}}
|
||||||
}
|
|
||||||
|
|
||||||
#undef __PCL_GPU_HOST_DEVICE__
|
#undef __OPENCV_GPU_HOST_DEVICE__
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */
|
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */
|
@ -47,30 +47,31 @@
|
|||||||
#include "utility.hpp"
|
#include "utility.hpp"
|
||||||
#include "detail/transform_detail.hpp"
|
#include "detail/transform_detail.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp>
|
||||||
|
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
|
||||||
|
{
|
||||||
|
transform_detail::transform_caller(src, dst, op, WithOutMask(), stream);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
|
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStepb& mask, const UnOp& op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
detail::transform_caller(src, dst, op, WithOutMask(), stream);
|
transform_detail::transform_caller(src, dst, op, SingleMask(mask), stream);
|
||||||
}
|
}
|
||||||
template <typename T, typename D, typename UnOp>
|
|
||||||
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStepb& mask, const UnOp& op, cudaStream_t stream = 0)
|
|
||||||
{
|
|
||||||
detail::transform_caller(src, dst, op, SingleMask(mask), stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp>
|
template <typename T1, typename T2, typename D, typename BinOp>
|
||||||
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, cudaStream_t stream = 0)
|
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
|
transform_detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
template <typename T1, typename T2, typename D, typename BinOp>
|
|
||||||
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const PtrStepb& mask, const BinOp& op, cudaStream_t stream = 0)
|
|
||||||
{
|
|
||||||
detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
template <typename T1, typename T2, typename D, typename BinOp>
|
||||||
|
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const PtrStepb& mask, const BinOp& op, cudaStream_t stream = 0)
|
||||||
|
{
|
||||||
|
transform_detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
||||||
|
@ -46,37 +46,38 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "detail/type_traits_detail.hpp"
|
#include "detail/type_traits_detail.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template <typename T> struct IsSimpleParameter
|
|
||||||
{
|
{
|
||||||
enum {value = detail::IsIntegral<T>::value || detail::IsFloat<T>::value || detail::PointerTraits<typename detail::ReferenceTraits<T>::type>::value};
|
template <typename T> struct IsSimpleParameter
|
||||||
};
|
{
|
||||||
|
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||||
|
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T> struct TypeTraits
|
template <typename T> struct TypeTraits
|
||||||
{
|
{
|
||||||
typedef typename detail::UnConst<T>::type NonConstType;
|
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||||
typedef typename detail::UnVolatile<T>::type NonVolatileType;
|
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||||
typedef typename detail::UnVolatile<typename detail::UnConst<T>::type>::type UnqualifiedType;
|
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||||
typedef typename detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||||
typedef typename detail::ReferenceTraits<T>::type ReferredType;
|
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||||
|
|
||||||
enum { isConst = detail::UnConst<T>::value };
|
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||||
enum { isVolatile = detail::UnVolatile<T>::value };
|
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||||
|
|
||||||
enum { isReference = detail::ReferenceTraits<UnqualifiedType>::value };
|
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||||
enum { isPointer = detail::PointerTraits<typename detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||||
|
|
||||||
enum { isUnsignedInt = detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||||
enum { isSignedInt = detail::IsSignedIntergral<UnqualifiedType>::value };
|
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||||
enum { isIntegral = detail::IsIntegral<UnqualifiedType>::value };
|
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||||
enum { isFloat = detail::IsFloat<UnqualifiedType>::value };
|
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||||
enum { isArith = isIntegral || isFloat };
|
enum { isArith = isIntegral || isFloat };
|
||||||
enum { isVec = detail::IsVec<UnqualifiedType>::value };
|
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||||
|
|
||||||
typedef typename detail::Select<IsSimpleParameter<UnqualifiedType>::value, T, typename detail::AddParameterType<T>::type>::type ParameterType;
|
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||||
};
|
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||||
|
};
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
}}}
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||||
|
@ -48,28 +48,28 @@
|
|||||||
#include "datamov_utils.hpp"
|
#include "datamov_utils.hpp"
|
||||||
#include "detail/utility_detail.hpp"
|
#include "detail/utility_detail.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
|
||||||
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
|
||||||
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
|
||||||
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// swap
|
|
||||||
|
|
||||||
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
|
||||||
{
|
{
|
||||||
|
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
||||||
|
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
||||||
|
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||||
|
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// swap
|
||||||
|
|
||||||
|
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||||
|
{
|
||||||
const T temp = a;
|
const T temp = a;
|
||||||
a = b;
|
a = b;
|
||||||
b = temp;
|
b = temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Mask Reader
|
// Mask Reader
|
||||||
|
|
||||||
struct SingleMask
|
struct SingleMask
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ SingleMask(const PtrStepb& mask_) : mask(mask_) {}
|
explicit __host__ __device__ __forceinline__ SingleMask(const PtrStepb& mask_) : mask(mask_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
@ -78,10 +78,10 @@ struct SingleMask
|
|||||||
}
|
}
|
||||||
|
|
||||||
const PtrStepb mask;
|
const PtrStepb mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MaskCollection
|
struct MaskCollection
|
||||||
{
|
{
|
||||||
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) : maskCollection(maskCollection_) {}
|
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) : maskCollection(maskCollection_) {}
|
||||||
|
|
||||||
__device__ __forceinline__ void next()
|
__device__ __forceinline__ void next()
|
||||||
@ -101,10 +101,10 @@ struct MaskCollection
|
|||||||
|
|
||||||
const PtrStepb* maskCollection;
|
const PtrStepb* maskCollection;
|
||||||
PtrStepb curMask;
|
PtrStepb curMask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct WithOutMask
|
struct WithOutMask
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ void next() const
|
__device__ __forceinline__ void next() const
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -131,37 +131,37 @@ struct WithOutMask
|
|||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
|
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
|
||||||
{
|
{
|
||||||
StaticAssert<n >= 8 && n <= 512>::check();
|
StaticAssert<n >= 8 && n <= 512>::check();
|
||||||
detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
|
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int n, typename T, typename V, typename Pred>
|
template <int n, typename T, typename V, typename Pred>
|
||||||
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
|
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
|
||||||
{
|
{
|
||||||
StaticAssert<n >= 8 && n <= 512>::check();
|
StaticAssert<n >= 8 && n <= 512>::check();
|
||||||
detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
|
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int n, typename T, typename V1, typename V2, typename Pred>
|
template <int n, typename T, typename V1, typename V2, typename Pred>
|
||||||
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
|
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
|
||||||
{
|
{
|
||||||
StaticAssert<n >= 8 && n <= 512>::check();
|
StaticAssert<n >= 8 && n <= 512>::check();
|
||||||
detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
|
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Solve linear system
|
// Solve linear system
|
||||||
|
|
||||||
// solve 2x2 linear system Ax=b
|
// solve 2x2 linear system Ax=b
|
||||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||||
{
|
{
|
||||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||||
|
|
||||||
if (det != 0)
|
if (det != 0)
|
||||||
@ -176,11 +176,11 @@ template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2],
|
|||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// solve 3x3 linear system Ax=b
|
// solve 3x3 linear system Ax=b
|
||||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||||
{
|
{
|
||||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||||
@ -208,8 +208,7 @@ template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3],
|
|||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_UTILITY_HPP__
|
#endif // __OPENCV_GPU_UTILITY_HPP__
|
||||||
|
@ -48,10 +48,10 @@
|
|||||||
#include "functional.hpp"
|
#include "functional.hpp"
|
||||||
#include "detail/vec_distance_detail.hpp"
|
#include "detail/vec_distance_detail.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template <typename T> struct L1Dist
|
|
||||||
{
|
{
|
||||||
|
template <typename T> struct L1Dist
|
||||||
|
{
|
||||||
typedef int value_type;
|
typedef int value_type;
|
||||||
typedef int result_type;
|
typedef int result_type;
|
||||||
|
|
||||||
@ -73,9 +73,9 @@ template <typename T> struct L1Dist
|
|||||||
}
|
}
|
||||||
|
|
||||||
int mySum;
|
int mySum;
|
||||||
};
|
};
|
||||||
template <> struct L1Dist<float>
|
template <> struct L1Dist<float>
|
||||||
{
|
{
|
||||||
typedef float value_type;
|
typedef float value_type;
|
||||||
typedef float result_type;
|
typedef float result_type;
|
||||||
|
|
||||||
@ -97,10 +97,10 @@ template <> struct L1Dist<float>
|
|||||||
}
|
}
|
||||||
|
|
||||||
float mySum;
|
float mySum;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct L2Dist
|
struct L2Dist
|
||||||
{
|
{
|
||||||
typedef float value_type;
|
typedef float value_type;
|
||||||
typedef float result_type;
|
typedef float result_type;
|
||||||
|
|
||||||
@ -123,10 +123,10 @@ struct L2Dist
|
|||||||
}
|
}
|
||||||
|
|
||||||
float mySum;
|
float mySum;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HammingDist
|
struct HammingDist
|
||||||
{
|
{
|
||||||
typedef int value_type;
|
typedef int value_type;
|
||||||
typedef int result_type;
|
typedef int result_type;
|
||||||
|
|
||||||
@ -148,12 +148,12 @@ struct HammingDist
|
|||||||
}
|
}
|
||||||
|
|
||||||
int mySum;
|
int mySum;
|
||||||
};
|
};
|
||||||
|
|
||||||
// calc distance between two vectors in global memory
|
// calc distance between two vectors in global memory
|
||||||
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||||
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||||
{
|
{
|
||||||
for (int i = tid; i < len; i += THREAD_DIM)
|
for (int i = tid; i < len; i += THREAD_DIM)
|
||||||
{
|
{
|
||||||
T1 val1;
|
T1 val1;
|
||||||
@ -166,20 +166,20 @@ __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist&
|
|||||||
}
|
}
|
||||||
|
|
||||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||||
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||||
{
|
{
|
||||||
detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||||
|
|
||||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
// calc distance between two vectors in global memory
|
// calc distance between two vectors in global memory
|
||||||
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||||
{
|
{
|
||||||
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||||
{
|
{
|
||||||
vec1 = vec1_;
|
vec1 = vec1_;
|
||||||
@ -192,11 +192,11 @@ template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
|||||||
}
|
}
|
||||||
|
|
||||||
const T1* vec1;
|
const T1* vec1;
|
||||||
};
|
};
|
||||||
|
|
||||||
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||||
{
|
{
|
||||||
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||||
{
|
{
|
||||||
if (glob_tid < len)
|
if (glob_tid < len)
|
||||||
@ -219,8 +219,7 @@ template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct V
|
|||||||
}
|
}
|
||||||
|
|
||||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||||
|
@ -48,10 +48,10 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "functional.hpp"
|
#include "functional.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace detail
|
|
||||||
{
|
{
|
||||||
|
namespace vec_math_detail
|
||||||
|
{
|
||||||
template <int cn, typename VecD> struct SatCastHelper;
|
template <int cn, typename VecD> struct SatCastHelper;
|
||||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||||
{
|
{
|
||||||
@ -90,43 +90,43 @@ namespace detail
|
|||||||
{
|
{
|
||||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
|
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
|
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
|
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
|
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
|
||||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
|
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
|
||||||
@ -150,8 +150,8 @@ template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const
|
|||||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
|
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace detail
|
namespace vec_math_detail
|
||||||
{
|
{
|
||||||
template <typename T1, typename T2> struct BinOpTraits
|
template <typename T1, typename T2> struct BinOpTraits
|
||||||
{
|
{
|
||||||
typedef int argument_type;
|
typedef int argument_type;
|
||||||
@ -192,7 +192,7 @@ namespace detail
|
|||||||
{
|
{
|
||||||
typedef double argument_type;
|
typedef double argument_type;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
|
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
|
||||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
|
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
|
||||||
@ -201,16 +201,16 @@ namespace detail
|
|||||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
|
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
|
||||||
} \
|
} \
|
||||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
|
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
|
||||||
{ \
|
{ \
|
||||||
@ -218,16 +218,16 @@ namespace detail
|
|||||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
|
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
|
||||||
} \
|
} \
|
||||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
|
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
|
||||||
{ \
|
{ \
|
||||||
@ -235,16 +235,16 @@ namespace detail
|
|||||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
|
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
|
||||||
} \
|
} \
|
||||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
|
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
|
||||||
{ \
|
{ \
|
||||||
@ -252,16 +252,16 @@ namespace detail
|
|||||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
|
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<type, T>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
|
||||||
} \
|
} \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
__device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
|
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
|
||||||
{ \
|
{ \
|
||||||
func<typename detail::BinOpTraits<T, type>::argument_type> f; \
|
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
|
||||||
return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
|
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||||
@ -313,20 +313,19 @@ namespace detail
|
|||||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
|
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
|
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
|
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
|
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
|
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VECMATH_HPP__
|
#endif // __OPENCV_GPU_VECMATH_HPP__
|
@ -45,82 +45,82 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template<typename T, int N> struct TypeVec;
|
|
||||||
|
|
||||||
struct __align__(8) uchar8
|
|
||||||
{
|
{
|
||||||
|
template<typename T, int N> struct TypeVec;
|
||||||
|
|
||||||
|
struct __align__(8) uchar8
|
||||||
|
{
|
||||||
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||||
{
|
{
|
||||||
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(8) char8
|
struct __align__(8) char8
|
||||||
{
|
{
|
||||||
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||||
{
|
{
|
||||||
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(16) ushort8
|
struct __align__(16) ushort8
|
||||||
{
|
{
|
||||||
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||||
{
|
{
|
||||||
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(16) short8
|
struct __align__(16) short8
|
||||||
{
|
{
|
||||||
short a0, a1, a2, a3, a4, a5, a6, a7;
|
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||||
{
|
{
|
||||||
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(32) uint8
|
struct __align__(32) uint8
|
||||||
{
|
{
|
||||||
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||||
{
|
{
|
||||||
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(32) int8
|
struct __align__(32) int8
|
||||||
{
|
{
|
||||||
int a0, a1, a2, a3, a4, a5, a6, a7;
|
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||||
{
|
{
|
||||||
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct __align__(32) float8
|
struct __align__(32) float8
|
||||||
{
|
{
|
||||||
float a0, a1, a2, a3, a4, a5, a6, a7;
|
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||||
{
|
{
|
||||||
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
struct double8
|
struct double8
|
||||||
{
|
{
|
||||||
double a0, a1, a2, a3, a4, a5, a6, a7;
|
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
};
|
};
|
||||||
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||||
{
|
{
|
||||||
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
|
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
|
||||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||||
@ -134,28 +134,28 @@ static __host__ __device__ __forceinline__ double8 make_double8(double a0, doubl
|
|||||||
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||||
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
|
||||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
|
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
|
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
|
||||||
|
|
||||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||||
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||||
|
|
||||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||||
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||||
|
|
||||||
template<typename T> struct VecTraits;
|
template<typename T> struct VecTraits;
|
||||||
|
|
||||||
@ -209,73 +209,72 @@ template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
|||||||
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||||
};
|
};
|
||||||
|
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
|
||||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
|
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
|
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
|
||||||
|
|
||||||
template<> struct VecTraits<char>
|
template<> struct VecTraits<char>
|
||||||
{
|
{
|
||||||
typedef char elem_type;
|
typedef char elem_type;
|
||||||
enum {cn=1};
|
enum {cn=1};
|
||||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||||
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<schar>
|
template<> struct VecTraits<schar>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=1};
|
enum {cn=1};
|
||||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||||
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<char1>
|
template<> struct VecTraits<char1>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=1};
|
enum {cn=1};
|
||||||
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||||
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||||
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<char2>
|
template<> struct VecTraits<char2>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=2};
|
enum {cn=2};
|
||||||
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||||
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||||
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<char3>
|
template<> struct VecTraits<char3>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=3};
|
enum {cn=3};
|
||||||
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||||
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||||
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<char4>
|
template<> struct VecTraits<char4>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=4};
|
enum {cn=4};
|
||||||
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||||
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||||
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||||
};
|
};
|
||||||
template<> struct VecTraits<char8>
|
template<> struct VecTraits<char8>
|
||||||
{
|
{
|
||||||
typedef schar elem_type;
|
typedef schar elem_type;
|
||||||
enum {cn=8};
|
enum {cn=8};
|
||||||
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||||
|
@ -45,10 +45,10 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
struct Warp
|
|
||||||
{
|
{
|
||||||
|
struct Warp
|
||||||
|
{
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
LOG_WARP_SIZE = 5,
|
LOG_WARP_SIZE = 5,
|
||||||
@ -108,8 +108,7 @@ struct Warp
|
|||||||
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||||
*t = value;
|
*t = value;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
|
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
|
@ -46,11 +46,11 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
template <class T>
|
|
||||||
__device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int tid = threadIdx.x )
|
|
||||||
{
|
{
|
||||||
|
template <class T>
|
||||||
|
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||||
|
{
|
||||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||||
|
|
||||||
if (lane < 16)
|
if (lane < 16)
|
||||||
@ -65,8 +65,7 @@ __device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int
|
|||||||
}
|
}
|
||||||
|
|
||||||
return ptr[tid - lane];
|
return ptr[tid - lane];
|
||||||
}
|
}
|
||||||
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
|
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
|
@ -55,21 +55,20 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace split_merge
|
|
||||||
{
|
{
|
||||||
|
namespace split_merge
|
||||||
|
{
|
||||||
void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
|
void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
|
||||||
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
|
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
|
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
|
using namespace ::cv::gpu::device::split_merge;
|
||||||
|
|
||||||
CV_Assert(src);
|
CV_Assert(src);
|
||||||
CV_Assert(n > 0);
|
CV_Assert(n > 0);
|
||||||
@ -108,7 +107,7 @@ namespace
|
|||||||
|
|
||||||
void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
|
void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
|
using namespace ::cv::gpu::device::split_merge;
|
||||||
|
|
||||||
CV_Assert(dst);
|
CV_Assert(dst);
|
||||||
|
|
||||||
|
@ -55,16 +55,15 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereobm
|
|
||||||
{
|
{
|
||||||
|
namespace stereobm
|
||||||
|
{
|
||||||
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t & stream);
|
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t & stream);
|
||||||
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
||||||
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
|
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
|
||||||
|
|
||||||
const float defaultAvgTexThreshold = 3;
|
const float defaultAvgTexThreshold = 3;
|
||||||
|
|
||||||
@ -99,7 +98,7 @@ namespace
|
|||||||
{
|
{
|
||||||
void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ stereobm;
|
using namespace ::cv::gpu::device::stereobm;
|
||||||
|
|
||||||
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
||||||
CV_DbgAssert(left.type() == CV_8UC1);
|
CV_DbgAssert(left.type() == CV_8UC1);
|
||||||
|
@ -59,10 +59,10 @@ void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereobp
|
|
||||||
{
|
{
|
||||||
|
namespace stereobp
|
||||||
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
||||||
@ -76,11 +76,10 @@ namespace stereobp
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
||||||
const DevMem2D_<short>& disp, cudaStream_t stream);
|
const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::stereobp;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ stereobp;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -57,10 +57,10 @@ void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, Gp
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace stereocsbp
|
|
||||||
{
|
{
|
||||||
|
namespace stereocsbp
|
||||||
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp);
|
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp);
|
||||||
|
|
||||||
@ -86,11 +86,10 @@ namespace stereocsbp
|
|||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::stereocsbp;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ stereocsbp;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -63,10 +63,10 @@ void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
BEGIN_OPENCV_DEVICE_NAMESPACE
|
namespace cv { namespace gpu { namespace device
|
||||||
|
|
||||||
namespace surf
|
|
||||||
{
|
{
|
||||||
|
namespace surf
|
||||||
|
{
|
||||||
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
||||||
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
||||||
|
|
||||||
@ -87,11 +87,10 @@ namespace surf
|
|||||||
|
|
||||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
||||||
}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
END_OPENCV_DEVICE_NAMESPACE
|
using namespace ::cv::gpu::device::surf;
|
||||||
|
|
||||||
using namespace OPENCV_DEVICE_NAMESPACE_ surf;
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -225,7 +225,7 @@ TEST_P(InterpolateFrames, Regression)
|
|||||||
|
|
||||||
#ifndef DUMP
|
#ifndef DUMP
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(newFrame_gold, newFrame, 1e-4);
|
EXPECT_MAT_NEAR(newFrame_gold, newFrame, 1e-3);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user