[+] Pixel types via templates
[+] Color conversions stub via pixel types [+] Pyramid calculation (required for mipmaps in CUDA 4.1) [~] Changed C strings to C++ throughout NCV [~] Fixed a couple of bugs in NCV
This commit is contained in:
parent
0b192cb4ea
commit
5afb445283
@ -129,7 +129,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||
private:
|
||||
|
||||
|
||||
static void NCVDebugOutputHandler(const char* msg) { CV_Error(CV_GpuApiCallError, msg); }
|
||||
static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
|
||||
|
||||
|
||||
NCVStatus load(const string& classifierFile)
|
||||
|
@ -40,10 +40,9 @@
|
||||
//M*/
|
||||
|
||||
|
||||
#include <ios>
|
||||
#include <stdarg.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include "NCV.hpp"
|
||||
|
||||
using namespace std;
|
||||
@ -56,24 +55,18 @@ using namespace std;
|
||||
//==============================================================================
|
||||
|
||||
|
||||
static void stdioDebugOutput(const char *msg)
|
||||
static void stdDebugOutput(const string &msg)
|
||||
{
|
||||
printf("%s", msg);
|
||||
cout << msg;
|
||||
}
|
||||
|
||||
|
||||
static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;
|
||||
static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput;
|
||||
|
||||
|
||||
void ncvDebugOutput(const char *msg, ...)
|
||||
void ncvDebugOutput(const string &msg)
|
||||
{
|
||||
const int K_DEBUG_STRING_MAXLEN = 1024;
|
||||
char buffer[K_DEBUG_STRING_MAXLEN];
|
||||
va_list args;
|
||||
va_start(args, msg);
|
||||
vsnprintf(buffer, K_DEBUG_STRING_MAXLEN, msg, args);
|
||||
va_end (args);
|
||||
debugOutputHandler(buffer);
|
||||
debugOutputHandler(msg);
|
||||
}
|
||||
|
||||
|
||||
@ -288,7 +281,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
|
||||
|
||||
allocBegin = NULL;
|
||||
|
||||
if (reusePtr == NULL)
|
||||
if (reusePtr == NULL && capacity != 0)
|
||||
{
|
||||
bReusesMemory = false;
|
||||
switch (memT)
|
||||
@ -329,7 +322,7 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
|
||||
{
|
||||
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
|
||||
|
||||
if (!bReusesMemory)
|
||||
if (!bReusesMemory && (allocBegin != (Ncv8u *)(0x1)))
|
||||
{
|
||||
switch (_memType)
|
||||
{
|
||||
@ -355,7 +348,7 @@ NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||
seg.clear();
|
||||
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
size = alignUp(static_cast<Ncv32u>(size), this->_alignment);
|
||||
size = alignUp(size, this->_alignment);
|
||||
this->currentSize += size;
|
||||
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||
|
||||
@ -464,7 +457,7 @@ NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||
break;
|
||||
}
|
||||
|
||||
this->currentSize += alignUp(static_cast<Ncv32u>(size), this->_alignment);
|
||||
this->currentSize += alignUp(size, this->_alignment);
|
||||
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||
|
||||
seg.begin.memtype = this->_memType;
|
||||
@ -480,8 +473,8 @@ NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
|
||||
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
|
||||
ncvAssertReturn(currentSize >= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
currentSize -= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment);
|
||||
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
currentSize -= alignUp(seg.size, this->_alignment);
|
||||
|
||||
switch (this->_memType)
|
||||
{
|
||||
|
@ -42,7 +42,7 @@
|
||||
#ifndef _ncv_hpp_
|
||||
#define _ncv_hpp_
|
||||
|
||||
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__)
|
||||
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
|
||||
#define NCV_EXPORTS __declspec(dllexport)
|
||||
#else
|
||||
#define NCV_EXPORTS
|
||||
@ -53,6 +53,8 @@
|
||||
#endif
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -78,7 +80,7 @@ namespace NcvCTprep
|
||||
}
|
||||
|
||||
|
||||
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
|
||||
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
|
||||
#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
|
||||
|
||||
|
||||
@ -181,6 +183,25 @@ struct NcvSize32u
|
||||
Ncv32u height; ///< Rectangle height.
|
||||
__host__ __device__ NcvSize32u() : width(0), height(0) {};
|
||||
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
|
||||
__host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
|
||||
};
|
||||
|
||||
|
||||
struct NcvPoint2D32s
|
||||
{
|
||||
Ncv32s x; ///< Point X.
|
||||
Ncv32s y; ///< Point Y.
|
||||
__host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
|
||||
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y) : x(x), y(y) {}
|
||||
};
|
||||
|
||||
|
||||
struct NcvPoint2D32u
|
||||
{
|
||||
Ncv32u x; ///< Point X.
|
||||
Ncv32u y; ///< Point Y.
|
||||
__host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
|
||||
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y) : x(x), y(y) {}
|
||||
};
|
||||
|
||||
|
||||
@ -199,6 +220,7 @@ NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -219,49 +241,44 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
|
||||
//==============================================================================
|
||||
|
||||
|
||||
#define NCV_CT_PREP_STRINGIZE_AUX(x) #x
|
||||
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
|
||||
NCV_EXPORTS void ncvDebugOutput(const std::string &msg);
|
||||
|
||||
|
||||
NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);
|
||||
|
||||
|
||||
typedef void NCVDebugOutputHandler(const char* msg);
|
||||
typedef void NCVDebugOutputHandler(const std::string &msg);
|
||||
|
||||
|
||||
NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
|
||||
|
||||
#define ncvAssertPrintCheck(pred, msg) \
|
||||
((pred) ? true : (ncvDebugOutput("\n%s\n", \
|
||||
"NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \
|
||||
), false))
|
||||
|
||||
|
||||
#define ncvAssertPrintReturn(pred, msg, err) \
|
||||
if (ncvAssertPrintCheck(pred, msg)) ; else return err
|
||||
|
||||
|
||||
#define ncvAssertReturn(pred, err) \
|
||||
do \
|
||||
{ \
|
||||
if (!(pred)) \
|
||||
{ \
|
||||
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||
return err; \
|
||||
std::ostringstream oss; \
|
||||
oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \
|
||||
ncvDebugOutput(oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define ncvAssertPrintReturn(pred, msg, err) \
|
||||
do \
|
||||
{ \
|
||||
ncvAssertPrintCheck(pred, msg); \
|
||||
if (!(pred)) return err; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define ncvAssertReturn(pred, err) \
|
||||
ncvAssertPrintReturn(pred, "retcode=" << (int)err, err)
|
||||
|
||||
|
||||
#define ncvAssertReturnNcvStat(ncvOp) \
|
||||
do \
|
||||
{ \
|
||||
NCVStatus _ncvStat = ncvOp; \
|
||||
if (NCV_SUCCESS != _ncvStat) \
|
||||
{ \
|
||||
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||
return _ncvStat; \
|
||||
} \
|
||||
ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \
|
||||
} while (0)
|
||||
|
||||
|
||||
@ -270,18 +287,14 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
{ \
|
||||
cudaError_t resCall = cudacall; \
|
||||
cudaError_t resGLE = cudaGetLastError(); \
|
||||
if (cudaSuccess != resCall || cudaSuccess != resGLE) \
|
||||
{ \
|
||||
ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||
return errCode; \
|
||||
} \
|
||||
ncvAssertPrintReturn(cudaSuccess==resCall && cudaSuccess==resGLE, "cudaError_t=" << (int)(resCall | resGLE), errCode); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/**
|
||||
* Return-codes for status notification, errors and warnings
|
||||
*/
|
||||
enum NCVStatus
|
||||
enum
|
||||
{
|
||||
//NCV statuses
|
||||
NCV_SUCCESS,
|
||||
@ -338,9 +351,14 @@ enum NCVStatus
|
||||
NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
|
||||
NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
|
||||
NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
|
||||
|
||||
NCV_LAST_STATUS ///< Marker to continue error numeration in other files
|
||||
};
|
||||
|
||||
|
||||
typedef Ncv32u NCVStatus;
|
||||
|
||||
|
||||
#define NCV_SET_SKIP_COND(x) \
|
||||
bool __ncv_skip_cond = x
|
||||
|
||||
@ -774,9 +792,20 @@ public:
|
||||
return ncvStat;
|
||||
}
|
||||
|
||||
T &at(Ncv32u x, Ncv32u y) const
|
||||
{
|
||||
if (x >= this->_width || y >= this->_height)
|
||||
{
|
||||
printf("Error addressing matrix at [%d, %d]\n", x, y);
|
||||
return *this->_ptr;
|
||||
}
|
||||
return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
|
||||
}
|
||||
|
||||
T *ptr() const {return this->_ptr;}
|
||||
Ncv32u width() const {return this->_width;}
|
||||
Ncv32u height() const {return this->_height;}
|
||||
NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
|
||||
Ncv32u pitch() const {return this->_pitch;}
|
||||
NCVMemoryType memType() const {return this->_memtype;}
|
||||
|
||||
@ -923,7 +952,7 @@ public:
|
||||
this->_width = roi.width;
|
||||
this->_height = roi.height;
|
||||
this->_pitch = mat.pitch();
|
||||
this->_ptr = mat.ptr() + roi.y * mat.stride() + roi.x;
|
||||
this->_ptr = &mat.at(roi.x, roi.y);
|
||||
this->_memtype = mat.memType();
|
||||
|
||||
this->bReused = true;
|
||||
@ -962,4 +991,24 @@ NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv
|
||||
NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
|
||||
NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
|
||||
|
||||
|
||||
#define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
|
||||
#define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x))
|
||||
#define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x))
|
||||
#define CLAMP_0_255(x) CLAMP(x,0,255)
|
||||
|
||||
|
||||
#define SUB_BEGIN(type, name) struct { __inline type name
|
||||
#define SUB_END(name) } name;
|
||||
#define SUB_CALL(name) name.name
|
||||
|
||||
#define SQR(x) ((x)*(x))
|
||||
|
||||
|
||||
#define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
|
||||
NCVMatrixAlloc<type> name(alloc, width, height); \
|
||||
ncvAssertReturn(name.isMemAllocated(), err);
|
||||
|
||||
|
||||
|
||||
#endif // _ncv_hpp_
|
||||
|
96
modules/gpu/src/nvidia/core/NCVColorConversion.hpp
Normal file
96
modules/gpu/src/nvidia/core/NCVColorConversion.hpp
Normal file
@ -0,0 +1,96 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncv_color_conversion_hpp_
|
||||
#define _ncv_color_conversion_hpp_
|
||||
|
||||
#include "NCVPixelOperations.hpp"
|
||||
|
||||
enum NCVColorSpace
|
||||
{
|
||||
NCVColorSpaceGray,
|
||||
NCVColorSpaceRGBA,
|
||||
};
|
||||
|
||||
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout> struct __pixColorConv {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut);
|
||||
};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceRGBA, NCVColorSpaceGray, Tin, Tout> {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
|
||||
{
|
||||
Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z;
|
||||
_TDemoteClampNN(luma, pixOut.x);
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceGray, NCVColorSpaceRGBA, Tin, Tout> {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
|
||||
{
|
||||
_TDemoteClampNN(pixIn.x, pixOut.x);
|
||||
_TDemoteClampNN(pixIn.x, pixOut.y);
|
||||
_TDemoteClampNN(pixIn.x, pixOut.z);
|
||||
pixOut.w = 0;
|
||||
}};
|
||||
|
||||
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
|
||||
static
|
||||
NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
|
||||
const NCVMatrix<Tout> &h_imgOut)
|
||||
{
|
||||
ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
|
||||
ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() &&
|
||||
(h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||
NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone);
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
for (Ncv32u i=0; i<h_imgIn.height(); i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<h_imgIn.width(); j++)
|
||||
{
|
||||
__pixColorConv<CSin, CSout, Tin, Tout>::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i));
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
#endif //_ncv_color_conversion_hpp_
|
350
modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
Normal file
350
modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
Normal file
@ -0,0 +1,350 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncv_pixel_operations_hpp_
|
||||
#define _ncv_pixel_operations_hpp_
|
||||
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
template<typename TBase> inline TBase _pixMaxVal();
|
||||
template<> static inline Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
|
||||
template<> static inline Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
|
||||
template<> static inline Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
|
||||
template<> static inline Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
|
||||
template<> static inline Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
|
||||
template<> static inline Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
|
||||
template<> static inline Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
|
||||
template<> static inline Ncv64f _pixMaxVal<Ncv64f>() {return DBL_MAX;}
|
||||
|
||||
template<typename TBase> inline TBase _pixMinVal();
|
||||
template<> static inline Ncv8u _pixMinVal<Ncv8u>() {return 0;}
|
||||
template<> static inline Ncv16u _pixMinVal<Ncv16u>() {return 0;}
|
||||
template<> static inline Ncv32u _pixMinVal<Ncv32u>() {return 0;}
|
||||
template<> static inline Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
|
||||
template<> static inline Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
|
||||
template<> static inline Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
|
||||
template<> static inline Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
|
||||
template<> static inline Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
|
||||
|
||||
template<typename Tvec> struct TConvVec2Base;
|
||||
template<> struct TConvVec2Base<uchar1> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<uchar3> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<uchar4> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<uint1> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<uint3> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<uint4> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<float1> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<float3> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<float4> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
|
||||
template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
|
||||
template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
|
||||
|
||||
#define NC(T) (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
|
||||
|
||||
template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
|
||||
template<> struct TConvBase2Vec<Ncv8u, 1> {typedef uchar1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv8u, 3> {typedef uchar3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv8u, 4> {typedef uchar4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
template<typename Tout> inline Tout _pixMakeZero();
|
||||
template<> static inline uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
|
||||
template<> static inline uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
|
||||
template<> static inline uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
|
||||
template<> static inline ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
|
||||
template<> static inline ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
|
||||
template<> static inline ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
|
||||
template<> static inline uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
|
||||
template<> static inline uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
|
||||
template<> static inline uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
|
||||
template<> static inline float1 _pixMakeZero<float1>() {return make_float1(0.f);}
|
||||
template<> static inline float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
|
||||
template<> static inline float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
|
||||
template<> static inline double1 _pixMakeZero<double1>() {return make_double1(0.);}
|
||||
template<> static inline double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
|
||||
template<> static inline double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
|
||||
|
||||
static inline uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
|
||||
static inline uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
|
||||
static inline uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
|
||||
static inline ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
|
||||
static inline ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
|
||||
static inline ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
|
||||
static inline uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
|
||||
static inline uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
|
||||
static inline uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
|
||||
static inline float1 _pixMake(Ncv32f x) {return make_float1(x);}
|
||||
static inline float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
|
||||
static inline float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
|
||||
static inline double1 _pixMake(Ncv64f x) {return make_double1(x);}
|
||||
static inline double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
|
||||
static inline double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static Tout _pixDemoteClampZ_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
_TDemoteClampZ(pix.y, out.y);
|
||||
_TDemoteClampZ(pix.z, out.z);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
_TDemoteClampZ(pix.y, out.y);
|
||||
_TDemoteClampZ(pix.z, out.z);
|
||||
_TDemoteClampZ(pix.w, out.w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampZ(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static Tout _pixDemoteClampNN_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
_TDemoteClampNN(pix.y, out.y);
|
||||
_TDemoteClampNN(pix.z, out.z);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
_TDemoteClampNN(pix.y, out.y);
|
||||
_TDemoteClampNN(pix.z, out.z);
|
||||
_TDemoteClampNN(pix.w, out.w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampNN(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static Tout _pixScale_CN(Tin &pix, Tw w);};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
out.y = (TBout)(pix.y * w);
|
||||
out.z = (TBout)(pix.z * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
out.y = (TBout)(pix.y * w);
|
||||
out.z = (TBout)(pix.z * w);
|
||||
out.w = (TBout)(pix.w * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> static Tout _pixScale(Tin &pix, Tw w)
|
||||
{
|
||||
return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
out.y = pix1.y + pix2.y;
|
||||
out.z = pix1.z + pix2.z;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
out.y = pix1.y + pix2.y;
|
||||
out.z = pix1.z + pix2.z;
|
||||
out.w = pix1.w + pix2.w;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _pixAdd(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _pixDist(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> struct TAccPixWeighted;
|
||||
template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
|
||||
template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
|
||||
template<> struct TAccPixWeighted<float1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<float3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<float4> {typedef double4 type;};
|
||||
|
||||
template<typename Tfrom> struct TAccPixDist {};
|
||||
template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
|
||||
template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
|
||||
template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
|
||||
|
||||
#endif //_ncv_pixel_operations_hpp_
|
397
modules/gpu/src/nvidia/core/NCVPyramid.cu
Normal file
397
modules/gpu/src/nvidia/core/NCVPyramid.cu
Normal file
@ -0,0 +1,397 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include "NCV.hpp"
|
||||
#include "NCVPyramid.hpp"
|
||||
#include "NCVPixelOperations.hpp"
|
||||
|
||||
|
||||
template<typename T, Ncv32u CN> struct __average4_CN {static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 1> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float1, 1> {
|
||||
static float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11)
|
||||
{
|
||||
float1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double1, 1> {
|
||||
static double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11)
|
||||
{
|
||||
double1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 3> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
|
||||
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float3, 3> {
|
||||
static float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11)
|
||||
{
|
||||
float3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double3, 3> {
|
||||
static double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11)
|
||||
{
|
||||
double3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 4> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
|
||||
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
|
||||
out.w = ((Ncv32s)p00.w + p01.w + p10.w + p11.w + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float4, 4> {
|
||||
static float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11)
|
||||
{
|
||||
float4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double4, 4> {
|
||||
static double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11)
|
||||
{
|
||||
double4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> static T _average4(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
return __average4_CN<T, NC(T)>::_average4_CN(p00, p01, p10, p11);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __lerp_CN {static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 1> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 3> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
TB(b.y * d + a.y * (1 - d)),
|
||||
TB(b.z * d + a.z * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 4> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
TB(b.y * d + a.y * (1 - d)),
|
||||
TB(b.z * d + a.z * (1 - d)),
|
||||
TB(b.w * d + a.w * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _lerp(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
return __lerp_CN<Tin, Tout, NC(Tin)>::_lerp_CN(a, b, d);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static T _interpLinear(const T &a, const T &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt tmp = _lerp<T, TVFlt>(a, b, d);
|
||||
return _pixDemoteClampZ<TVFlt, T>(tmp);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static T _interpBilinear(const NCVMatrix<T> &refLayer, Ncv32f x, Ncv32f y)
|
||||
{
|
||||
Ncv32u xl = (Ncv32u)x;
|
||||
Ncv32u xh = xl+1;
|
||||
Ncv32f dx = x - xl;
|
||||
Ncv32u yl = (Ncv32u)y;
|
||||
Ncv32u yh = yl+1;
|
||||
Ncv32f dy = y - yl;
|
||||
T p00, p01, p10, p11;
|
||||
p00 = refLayer.at(xl, yl);
|
||||
p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00;
|
||||
p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00;
|
||||
p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00;
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt m_00_01 = _lerp<T, TVFlt>(p00, p01, dx);
|
||||
TVFlt m_10_11 = _lerp<T, TVFlt>(p10, p11, dx);
|
||||
TVFlt mixture = _lerp<TVFlt, TVFlt>(m_00_01, m_10_11, dy);
|
||||
return _pixDemoteClampZ<TVFlt, T>(mixture);
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
Ncv8u numLayers,
|
||||
INCVMemAllocator &alloc,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
this->_isInitialized = false;
|
||||
ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid_host::ctor error", );
|
||||
|
||||
this->layer0 = &img;
|
||||
NcvSize32u szLastLayer(img.width(), img.height());
|
||||
this->nLayers = 1;
|
||||
|
||||
NCV_SET_SKIP_COND(alloc.isCounting());
|
||||
NcvBool bDeviceCode = alloc.memType() == NCVMemoryTypeDevice;
|
||||
|
||||
if (numLayers == 0)
|
||||
{
|
||||
numLayers = 255; //it will cut-off when any of the dimensions goes 1
|
||||
}
|
||||
|
||||
for (Ncv32u i=0; i<(Ncv32u)numLayers-1; i++)
|
||||
{
|
||||
NcvSize32u szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
this->pyramid.push_back(new NCVMatrixAlloc<T>(alloc, szCurLayer.width, szCurLayer.height));
|
||||
ncvAssertPrintReturn(((NCVMatrixAlloc<T> *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid_host::ctor error", );
|
||||
this->nLayers++;
|
||||
|
||||
//fill in the layer
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
const NCVMatrix<T> *prevLayer = i == 0 ? this->layer0 : this->pyramid[i-1];
|
||||
NCVMatrix<T> *curLayer = this->pyramid[i];
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
//TODO: in cuStream
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Ncv32u i=0; i<szCurLayer.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<szCurLayer.width; j++)
|
||||
{
|
||||
T p00 = prevLayer->at(2*j+0, 2*i+0);
|
||||
T p01 = prevLayer->at(2*j+1, 2*i+0);
|
||||
T p10 = prevLayer->at(2*j+0, 2*i+1);
|
||||
T p11 = prevLayer->at(2*j+1, 2*i+1);
|
||||
curLayer->at(j, i) = _average4(p00, p01, p10, p11);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
|
||||
this->_isInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVImagePyramid<T>::~NCVImagePyramid()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NcvBool NCVImagePyramid<T>::isInitialized() const
|
||||
{
|
||||
return this->_isInitialized;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVStatus NCVImagePyramid<T>::getLayer(NCVMatrix<T> &outImg,
|
||||
NcvSize32u outRoi,
|
||||
NcvBool bTrilinear,
|
||||
cudaStream_t cuStream) const
|
||||
{
|
||||
ncvAssertReturn(this->isInitialized(), NCV_UNKNOWN_ERROR);
|
||||
ncvAssertReturn(outImg.memType() == this->layer0->memType(), NCV_MEM_RESIDENCE_ERROR);
|
||||
ncvAssertReturn(outRoi.width <= this->layer0->width() && outRoi.height <= this->layer0->height() &&
|
||||
outRoi.width > 0 && outRoi.height > 0, NCV_DIMENSIONS_INVALID);
|
||||
|
||||
if (outRoi.width == this->layer0->width() && outRoi.height == this->layer0->height())
|
||||
{
|
||||
ncvAssertReturnNcvStat(this->layer0->copy2D(outImg, NcvSize32u(this->layer0->width(), this->layer0->height()), cuStream));
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
Ncv32f lastScale = 1.0f;
|
||||
Ncv32f curScale;
|
||||
const NCVMatrix<T> *lastLayer = this->layer0;
|
||||
const NCVMatrix<T> *curLayer = NULL;
|
||||
NcvBool bUse2Refs = false;
|
||||
|
||||
for (Ncv32u i=0; i<this->nLayers-1; i++)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = this->pyramid[i];
|
||||
|
||||
if (outRoi.width == curLayer->width() && outRoi.height == curLayer->height())
|
||||
{
|
||||
ncvAssertReturnNcvStat(this->pyramid[i]->copy2D(outImg, NcvSize32u(this->pyramid[i]->width(), this->pyramid[i]->height()), cuStream));
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer->width() && outRoi.height >= curLayer->height())
|
||||
{
|
||||
if (outRoi.width < lastLayer->width() && outRoi.height < lastLayer->height())
|
||||
{
|
||||
bUse2Refs = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
bUse2Refs = bUse2Refs && bTrilinear;
|
||||
|
||||
NCV_SET_SKIP_COND(outImg.memType() == NCVMemoryTypeNone);
|
||||
NcvBool bDeviceCode = this->layer0->memType() == NCVMemoryTypeDevice;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
//TODO: in cuStream
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Ncv32u i=0; i<outRoi.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<outRoi.width; j++)
|
||||
{
|
||||
//top layer pixel (always exists)
|
||||
NcvSize32u szTopLayer(lastLayer->width(), lastLayer->height());
|
||||
Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T topPix = _interpBilinear(*lastLayer, ptTopX, ptTopY);
|
||||
T trilinearPix = topPix;
|
||||
|
||||
if (bUse2Refs)
|
||||
{
|
||||
//bottom layer pixel (exists only if the requested scale is greater than the smallest layer scale)
|
||||
NcvSize32u szBottomLayer(curLayer->width(), curLayer->height());
|
||||
Ncv32f ptBottomX = 1.0f * (szBottomLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptBottomY = 1.0f * (szBottomLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T bottomPix = _interpBilinear(*curLayer, ptBottomX, ptBottomY);
|
||||
|
||||
Ncv32f scale = (1.0f * outRoi.width / layer0->width() + 1.0f * outRoi.height / layer0->height()) / 2;
|
||||
Ncv32f dl = (scale - curScale) / (lastScale - curScale);
|
||||
dl = CLAMP(dl, 0.0f, 1.0f);
|
||||
trilinearPix = _interpLinear(bottomPix, topPix, dl);
|
||||
}
|
||||
|
||||
outImg.at(j, i) = trilinearPix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
template class NCVImagePyramid<uchar1>;
|
||||
template class NCVImagePyramid<uchar3>;
|
||||
template class NCVImagePyramid<uchar4>;
|
||||
template class NCVImagePyramid<ushort1>;
|
||||
template class NCVImagePyramid<ushort3>;
|
||||
template class NCVImagePyramid<ushort4>;
|
||||
template class NCVImagePyramid<uint1>;
|
||||
template class NCVImagePyramid<uint3>;
|
||||
template class NCVImagePyramid<uint4>;
|
||||
template class NCVImagePyramid<float1>;
|
||||
template class NCVImagePyramid<float3>;
|
||||
template class NCVImagePyramid<float4>;
|
97
modules/gpu/src/nvidia/core/NCVPyramid.hpp
Normal file
97
modules/gpu/src/nvidia/core/NCVPyramid.hpp
Normal file
@ -0,0 +1,97 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef _ncvpyramid_hpp_
|
||||
#define _ncvpyramid_hpp_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
class NCV_EXPORTS NCVMatrixStack
|
||||
{
|
||||
public:
|
||||
NCVMatrixStack() {this->_arr.clear();}
|
||||
~NCVMatrixStack()
|
||||
{
|
||||
const Ncv32u nElem = this->_arr.size();
|
||||
for (Ncv32u i=0; i<nElem; i++)
|
||||
{
|
||||
pop_back();
|
||||
}
|
||||
}
|
||||
void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
|
||||
void pop_back() {this->_arr.pop_back();}
|
||||
NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
|
||||
private:
|
||||
std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
|
||||
};
|
||||
|
||||
|
||||
template <class T>
|
||||
class NCV_EXPORTS NCVImagePyramid
|
||||
{
|
||||
public:
|
||||
|
||||
NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
Ncv8u nLayers,
|
||||
INCVMemAllocator &alloc,
|
||||
cudaStream_t cuStream);
|
||||
~NCVImagePyramid();
|
||||
NcvBool isInitialized() const;
|
||||
NCVStatus getLayer(NCVMatrix<T> &outImg,
|
||||
NcvSize32u outRoi,
|
||||
NcvBool bTrilinear,
|
||||
cudaStream_t cuStream) const;
|
||||
|
||||
private:
|
||||
|
||||
NcvBool _isInitialized;
|
||||
const NCVMatrix<T> *layer0;
|
||||
NCVMatrixStack<T> pyramid;
|
||||
Ncv32u nLayers;
|
||||
};
|
||||
|
||||
|
||||
#endif //_ncvpyramid_hpp_
|
@ -68,10 +68,7 @@ namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
void outputHandler(const char* msg)
|
||||
{
|
||||
CV_Error(CV_GpuApiCallError, msg);
|
||||
}
|
||||
static void outputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
|
||||
}
|
||||
|
||||
void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
|
||||
|
Loading…
x
Reference in New Issue
Block a user