gpunvidia module for NCV & NPP API
This commit is contained in:
@@ -722,240 +722,3 @@ bool cv::gpu::CascadeClassifier_GPU::load(const String& filename)
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined (HAVE_CUDA)
|
||||
|
||||
struct RectConvert
|
||||
{
|
||||
Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
|
||||
NcvRect32u operator()(const Rect& nr) const
|
||||
{
|
||||
NcvRect32u rect;
|
||||
rect.x = nr.x;
|
||||
rect.y = nr.y;
|
||||
rect.width = nr.width;
|
||||
rect.height = nr.height;
|
||||
return rect;
|
||||
}
|
||||
};
|
||||
|
||||
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
|
||||
{
|
||||
std::vector<Rect> rects(hypotheses.size());
|
||||
std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
|
||||
|
||||
if (weights)
|
||||
{
|
||||
std::vector<int> weights_int;
|
||||
weights_int.assign(weights->begin(), weights->end());
|
||||
cv::groupRectangles(rects, weights_int, groupThreshold, eps);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::groupRectangles(rects, groupThreshold, eps);
|
||||
}
|
||||
std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
|
||||
hypotheses.resize(rects.size());
|
||||
}
|
||||
|
||||
NCVStatus loadFromXML(const String &filename,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
std::vector<HaarStage64> &haarStages,
|
||||
std::vector<HaarClassifierNode128> &haarClassifierNodes,
|
||||
std::vector<HaarFeature64> &haarFeatures)
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
|
||||
haar.NumStages = 0;
|
||||
haar.NumClassifierRootNodes = 0;
|
||||
haar.NumClassifierTotalNodes = 0;
|
||||
haar.NumFeatures = 0;
|
||||
haar.ClassifierSize.width = 0;
|
||||
haar.ClassifierSize.height = 0;
|
||||
haar.bHasStumpsOnly = true;
|
||||
haar.bNeedsTiltedII = false;
|
||||
Ncv32u curMaxTreeDepth;
|
||||
|
||||
std::vector<char> xmlFileCont;
|
||||
|
||||
std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
|
||||
haarStages.resize(0);
|
||||
haarClassifierNodes.resize(0);
|
||||
haarFeatures.resize(0);
|
||||
|
||||
Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
|
||||
if (oldCascade.empty())
|
||||
{
|
||||
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||
}
|
||||
|
||||
haar.ClassifierSize.width = oldCascade->orig_window_size.width;
|
||||
haar.ClassifierSize.height = oldCascade->orig_window_size.height;
|
||||
|
||||
int stagesCound = oldCascade->count;
|
||||
for(int s = 0; s < stagesCound; ++s) // by stages
|
||||
{
|
||||
HaarStage64 curStage;
|
||||
curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
|
||||
|
||||
curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
|
||||
|
||||
int treesCount = oldCascade->stage_classifier[s].count;
|
||||
for(int t = 0; t < treesCount; ++t) // by trees
|
||||
{
|
||||
Ncv32u nodeId = 0;
|
||||
CvHaarClassifier* tree = &oldCascade->stage_classifier[s].classifier[t];
|
||||
|
||||
int nodesCount = tree->count;
|
||||
for(int n = 0; n < nodesCount; ++n) //by features
|
||||
{
|
||||
CvHaarFeature* feature = &tree->haar_feature[n];
|
||||
|
||||
HaarClassifierNode128 curNode;
|
||||
curNode.setThreshold(tree->threshold[n]);
|
||||
|
||||
NcvBool bIsLeftNodeLeaf = false;
|
||||
NcvBool bIsRightNodeLeaf = false;
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft;
|
||||
if ( tree->left[n] <= 0 )
|
||||
{
|
||||
Ncv32f leftVal = tree->alpha[-tree->left[n]];
|
||||
ncvStat = nodeLeft.create(leftVal);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
bIsLeftNodeLeaf = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Ncv32u leftNodeOffset = tree->left[n];
|
||||
nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
|
||||
haar.bHasStumpsOnly = false;
|
||||
}
|
||||
curNode.setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight;
|
||||
if ( tree->right[n] <= 0 )
|
||||
{
|
||||
Ncv32f rightVal = tree->alpha[-tree->right[n]];
|
||||
ncvStat = nodeRight.create(rightVal);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
bIsRightNodeLeaf = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Ncv32u rightNodeOffset = tree->right[n];
|
||||
nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
|
||||
haar.bHasStumpsOnly = false;
|
||||
}
|
||||
curNode.setRightNodeDesc(nodeRight);
|
||||
|
||||
Ncv32u tiltedVal = feature->tilted;
|
||||
haar.bNeedsTiltedII = (tiltedVal != 0);
|
||||
|
||||
Ncv32u featureId = 0;
|
||||
for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
|
||||
{
|
||||
Ncv32u rectX = feature->rect[l].r.x;
|
||||
Ncv32u rectY = feature->rect[l].r.y;
|
||||
Ncv32u rectWidth = feature->rect[l].r.width;
|
||||
Ncv32u rectHeight = feature->rect[l].r.height;
|
||||
|
||||
Ncv32f rectWeight = feature->rect[l].weight;
|
||||
|
||||
if (rectWeight == 0/* && rectX == 0 &&rectY == 0 && rectWidth == 0 && rectHeight == 0*/)
|
||||
break;
|
||||
|
||||
HaarFeature64 curFeature;
|
||||
ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
|
||||
curFeature.setWeight(rectWeight);
|
||||
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||
haarFeatures.push_back(curFeature);
|
||||
|
||||
featureId++;
|
||||
}
|
||||
|
||||
HaarFeatureDescriptor32 tmpFeatureDesc;
|
||||
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
|
||||
featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
|
||||
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||
curNode.setFeatureDesc(tmpFeatureDesc);
|
||||
|
||||
if (!nodeId)
|
||||
{
|
||||
//root node
|
||||
haarClassifierNodes.push_back(curNode);
|
||||
curMaxTreeDepth = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
//other node
|
||||
h_TmpClassifierNotRootNodes.push_back(curNode);
|
||||
curMaxTreeDepth++;
|
||||
}
|
||||
|
||||
nodeId++;
|
||||
}
|
||||
}
|
||||
|
||||
curStage.setNumClassifierRootNodes(treesCount);
|
||||
haarStages.push_back(curStage);
|
||||
}
|
||||
|
||||
//fill in cascade stats
|
||||
haar.NumStages = static_cast<Ncv32u>(haarStages.size());
|
||||
haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
|
||||
haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
|
||||
haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
|
||||
|
||||
//merge root and leaf nodes in one classifiers array
|
||||
Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
|
||||
for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
|
||||
{
|
||||
HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc();
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
|
||||
if (!featureDesc.isLeftNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||
nodeLeft.create(newOffset);
|
||||
}
|
||||
haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
|
||||
if (!featureDesc.isRightNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||
nodeRight.create(newOffset);
|
||||
}
|
||||
haarClassifierNodes[i].setRightNodeDesc(nodeRight);
|
||||
}
|
||||
|
||||
for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
|
||||
{
|
||||
HaarFeatureDescriptor32 featureDesc = h_TmpClassifierNotRootNodes[i].getFeatureDesc();
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
|
||||
if (!featureDesc.isLeftNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||
nodeLeft.create(newOffset);
|
||||
}
|
||||
h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
|
||||
if (!featureDesc.isRightNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||
nodeRight.create(newOffset);
|
||||
}
|
||||
h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
|
||||
|
||||
haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
|
||||
}
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* HAVE_CUDA */
|
||||
|
@@ -45,10 +45,12 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <npp.h>
|
||||
#include "NPP_staging.hpp"
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/gpunvidia.hpp"
|
||||
|
||||
#include "safe_call.hpp"
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
|
@@ -45,7 +45,7 @@
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include "NCV.hpp"
|
||||
#include "opencv2/gpunvidia.hpp"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,104 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// NVIDIA CUDA implementation of Brox et al Optical Flow algorithm
|
||||
//
|
||||
// Algorithm is explained in the original paper:
|
||||
// T. Brox, A. Bruhn, N. Papenberg, J. Weickert:
|
||||
// High accuracy optical flow estimation based on a theory for warping.
|
||||
// ECCV 2004.
|
||||
//
|
||||
// Implementation by Mikhail Smirnov
|
||||
// email: msmirnov@nvidia.com, devsupport@nvidia.com
|
||||
//
|
||||
// Credits for help with the code to:
|
||||
// Alexey Mendelenko, Anton Obukhov, and Alexander Kharlamov.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ncv_optical_flow_h_
|
||||
#define _ncv_optical_flow_h_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
/// \brief Model and solver parameters
|
||||
struct NCVBroxOpticalFlowDescriptor
|
||||
{
|
||||
/// flow smoothness
|
||||
Ncv32f alpha;
|
||||
/// gradient constancy importance
|
||||
Ncv32f gamma;
|
||||
/// pyramid scale factor
|
||||
Ncv32f scale_factor;
|
||||
/// number of lagged non-linearity iterations (inner loop)
|
||||
Ncv32u number_of_inner_iterations;
|
||||
/// number of warping iterations (number of pyramid levels)
|
||||
Ncv32u number_of_outer_iterations;
|
||||
/// number of linear system solver iterations
|
||||
Ncv32u number_of_solver_iterations;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// \brief Compute optical flow
|
||||
///
|
||||
/// Based on method by Brox et al [2004]
|
||||
/// \param [in] desc model and solver parameters
|
||||
/// \param [in] gpu_mem_allocator GPU memory allocator
|
||||
/// \param [in] frame0 source frame
|
||||
/// \param [in] frame1 frame to track
|
||||
/// \param [out] u flow horizontal component (along \b x axis)
|
||||
/// \param [out] v flow vertical component (along \b y axis)
|
||||
/// \return computation status
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
||||
INCVMemAllocator &gpu_mem_allocator,
|
||||
const NCVMatrix<Ncv32f> &frame0,
|
||||
const NCVMatrix<Ncv32f> &frame1,
|
||||
NCVMatrix<Ncv32f> &u,
|
||||
NCVMatrix<Ncv32f> &v,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,461 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
|
||||
//
|
||||
// The algorithm and code are explained in the upcoming GPU Computing Gems
|
||||
// chapter in detail:
|
||||
//
|
||||
// Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
|
||||
// PDF URL placeholder
|
||||
// email: aobukhov@nvidia.com, devsupport@nvidia.com
|
||||
//
|
||||
// Credits for help with the code to:
|
||||
// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ncvhaarobjectdetection_hpp_
|
||||
#define _ncvhaarobjectdetection_hpp_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Guaranteed size cross-platform classifier structures
|
||||
//
|
||||
//==============================================================================
|
||||
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
|
||||
typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
|
||||
#else
|
||||
typedef Ncv32f Ncv32f_a;
|
||||
#endif
|
||||
|
||||
struct HaarFeature64
|
||||
{
|
||||
uint2 _ui2;
|
||||
|
||||
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
|
||||
|
||||
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
|
||||
{
|
||||
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
|
||||
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
|
||||
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
|
||||
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
|
||||
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setWeight(Ncv32f weight)
|
||||
{
|
||||
((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
|
||||
{
|
||||
NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
|
||||
*rectX = tmpRect.x;
|
||||
*rectY = tmpRect.y;
|
||||
*rectWidth = tmpRect.width;
|
||||
*rectHeight = tmpRect.height;
|
||||
}
|
||||
|
||||
__device__ __host__ Ncv32f getWeight(void)
|
||||
{
|
||||
return *(Ncv32f_a*)(&this->_ui2.y);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct HaarFeatureDescriptor32
|
||||
{
|
||||
private:
|
||||
|
||||
#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted 0x80000000
|
||||
#define HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf 0x40000000
|
||||
#define HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf 0x20000000
|
||||
#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures 0x1F
|
||||
#define HaarFeatureDescriptor32_NumFeatures_Shift 24
|
||||
#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset 0x00FFFFFF
|
||||
|
||||
Ncv32u desc;
|
||||
|
||||
public:
|
||||
|
||||
__host__ NCVStatus create(NcvBool bTilted, NcvBool bLeftLeaf, NcvBool bRightLeaf,
|
||||
Ncv32u numFeatures, Ncv32u offsetFeatures)
|
||||
{
|
||||
if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
|
||||
{
|
||||
return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
|
||||
}
|
||||
if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
|
||||
{
|
||||
return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
|
||||
}
|
||||
this->desc = 0;
|
||||
this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
|
||||
this->desc |= (bLeftLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf : 0);
|
||||
this->desc |= (bRightLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf : 0);
|
||||
this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
|
||||
this->desc |= offsetFeatures;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__device__ __host__ NcvBool isTilted(void)
|
||||
{
|
||||
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
|
||||
}
|
||||
|
||||
__device__ __host__ NcvBool isLeftNodeLeaf(void)
|
||||
{
|
||||
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf) != 0;
|
||||
}
|
||||
|
||||
__device__ __host__ NcvBool isRightNodeLeaf(void)
|
||||
{
|
||||
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf) != 0;
|
||||
}
|
||||
|
||||
__device__ __host__ Ncv32u getNumFeatures(void)
|
||||
{
|
||||
return (this->desc >> HaarFeatureDescriptor32_NumFeatures_Shift) & HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures;
|
||||
}
|
||||
|
||||
__device__ __host__ Ncv32u getFeaturesOffset(void)
|
||||
{
|
||||
return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
|
||||
}
|
||||
};
|
||||
|
||||
struct HaarClassifierNodeDescriptor32
|
||||
{
|
||||
uint1 _ui1;
|
||||
|
||||
__host__ NCVStatus create(Ncv32f leafValue)
|
||||
{
|
||||
*(Ncv32f_a *)&this->_ui1 = leafValue;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
|
||||
{
|
||||
this->_ui1.x = offsetHaarClassifierNode;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ Ncv32f getLeafValueHost(void)
|
||||
{
|
||||
return *(Ncv32f_a *)&this->_ui1.x;
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__device__ Ncv32f getLeafValue(void)
|
||||
{
|
||||
return __int_as_float(this->_ui1.x);
|
||||
}
|
||||
#endif
|
||||
|
||||
__device__ __host__ Ncv32u getNextNodeOffset(void)
|
||||
{
|
||||
return this->_ui1.x;
|
||||
}
|
||||
};
|
||||
|
||||
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
|
||||
typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
|
||||
#else
|
||||
typedef Ncv32u Ncv32u_a;
|
||||
#endif
|
||||
|
||||
struct HaarClassifierNode128
|
||||
{
|
||||
uint4 _ui4;
|
||||
|
||||
__host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
|
||||
{
|
||||
this->_ui4.x = *(Ncv32u *)&f;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setThreshold(Ncv32f t)
|
||||
{
|
||||
this->_ui4.y = *(Ncv32u_a *)&t;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
|
||||
{
|
||||
this->_ui4.z = *(Ncv32u_a *)&nl;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
|
||||
{
|
||||
this->_ui4.w = *(Ncv32u_a *)&nr;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
|
||||
{
|
||||
return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
|
||||
}
|
||||
|
||||
__host__ __device__ Ncv32f getThreshold(void)
|
||||
{
|
||||
return *(Ncv32f_a*)&this->_ui4.y;
|
||||
}
|
||||
|
||||
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
|
||||
{
|
||||
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
|
||||
}
|
||||
|
||||
__host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
|
||||
{
|
||||
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct HaarStage64
|
||||
{
|
||||
#define HaarStage64_Interpret_MaskRootNodes 0x0000FFFF
|
||||
#define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000
|
||||
#define HaarStage64_Interpret_ShiftRootNodeOffset 16
|
||||
|
||||
uint2 _ui2;
|
||||
|
||||
__host__ NCVStatus setStageThreshold(Ncv32f t)
|
||||
{
|
||||
this->_ui2.x = *(Ncv32u_a *)&t;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
|
||||
{
|
||||
if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
|
||||
{
|
||||
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||
}
|
||||
this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
|
||||
{
|
||||
if (val > HaarStage64_Interpret_MaskRootNodes)
|
||||
{
|
||||
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||
}
|
||||
this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
__host__ __device__ Ncv32f getStageThreshold(void)
|
||||
{
|
||||
return *(Ncv32f_a*)&this->_ui2.x;
|
||||
}
|
||||
|
||||
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
|
||||
{
|
||||
return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
|
||||
}
|
||||
|
||||
__host__ __device__ Ncv32u getNumClassifierRootNodes(void)
|
||||
{
|
||||
return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||
NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Classifier cascade descriptor
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
struct HaarClassifierCascadeDescriptor
|
||||
{
|
||||
Ncv32u NumStages;
|
||||
Ncv32u NumClassifierRootNodes;
|
||||
Ncv32u NumClassifierTotalNodes;
|
||||
Ncv32u NumFeatures;
|
||||
NcvSize32u ClassifierSize;
|
||||
NcvBool bNeedsTiltedII;
|
||||
NcvBool bHasStumpsOnly;
|
||||
};
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Functional interface
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
NCVPipeObjDet_Default = 0x000,
|
||||
NCVPipeObjDet_UseFairImageScaling = 0x001,
|
||||
NCVPipeObjDet_FindLargestObject = 0x002,
|
||||
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
||||
};
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
NcvSize32u srcRoi,
|
||||
NCVVector<NcvRect32u> &d_dstRects,
|
||||
Ncv32u &dstNumRects,
|
||||
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarStage64> &d_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &d_HaarNodes,
|
||||
NCVVector<HaarFeature64> &d_HaarFeatures,
|
||||
|
||||
NcvSize32u minObjSize,
|
||||
Ncv32u minNeighbors, //default 4
|
||||
Ncv32f scaleStep, //default 1.2f
|
||||
Ncv32u pixelStep, //default 1
|
||||
Ncv32u flags, //default NCVPipeObjDet_Default
|
||||
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
||||
#define HAAR_STDDEV_BORDER 1
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
||||
NCVMatrix<Ncv32f> &d_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||
Ncv32u &numDetections,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarStage64> &d_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &d_HaarNodes,
|
||||
NCVVector<HaarFeature64> &d_HaarFeatures,
|
||||
NcvBool bMaskElements,
|
||||
NcvSize32u anchorsRoi,
|
||||
Ncv32u pixelStep,
|
||||
Ncv32f scaleArea,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||
NCVMatrix<Ncv32f> &h_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
||||
Ncv32u &numDetections,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||
NCVVector<HaarFeature64> &h_HaarFeatures,
|
||||
NcvBool bMaskElements,
|
||||
NcvSize32u anchorsRoi,
|
||||
Ncv32u pixelStep,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
#define RECT_SIMILARITY_PROPORTION 0.2f
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &totalDetections,
|
||||
Ncv32u totalMaxDetections,
|
||||
Ncv32u rectWidth,
|
||||
Ncv32u rectHeight,
|
||||
Ncv32f curScale,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &totalDetections,
|
||||
Ncv32u totalMaxDetections,
|
||||
Ncv32u rectWidth,
|
||||
Ncv32u rectHeight,
|
||||
Ncv32f curScale);
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const cv::String &filename, Ncv32u &numStages,
|
||||
Ncv32u &numNodes, Ncv32u &numFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const cv::String &filename,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
|
||||
HaarClassifierCascadeDescriptor haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||
|
||||
|
||||
|
||||
#endif // _ncvhaarobjectdetection_hpp_
|
File diff suppressed because it is too large
Load Diff
@@ -1,907 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _npp_staging_hpp_
|
||||
#define _npp_staging_hpp_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
/**
|
||||
* \file NPP_staging.hpp
|
||||
* NPP Staging Library
|
||||
*/
|
||||
|
||||
|
||||
/** \defgroup core_npp NPPST Core
|
||||
* Basic functions for CUDA streams management.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Gets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \return Current CUDA stream
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
cudaStream_t nppStGetActiveCUDAstream();
|
||||
|
||||
|
||||
/**
|
||||
* Sets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \param cudaStream [IN] cudaStream CUDA stream to become current
|
||||
* \return CUDA stream used before
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup nppi NPPST Image Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/** Border type
|
||||
*
|
||||
* Filtering operations assume that each pixel has a neighborhood of pixels.
|
||||
* The following structure describes possible ways to define non-existent pixels.
|
||||
*/
|
||||
enum NppStBorderType
|
||||
{
|
||||
nppStBorderNone = 0, ///< There is no need to define additional pixels, image is extended already
|
||||
nppStBorderClamp = 1, ///< Clamp out of range position to borders
|
||||
nppStBorderWrap = 2, ///< Wrap out of range position. Image becomes periodic.
|
||||
nppStBorderMirror = 3 ///< reflect out of range position across borders
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Filter types for image resizing
|
||||
*/
|
||||
enum NppStInterpMode
|
||||
{
|
||||
nppStSupersample, ///< Supersampling. For downscaling only
|
||||
nppStBicubic ///< Bicubic convolution filter, a = -0.5 (cubic Hermite spline)
|
||||
};
|
||||
|
||||
|
||||
/** Frame interpolation state
|
||||
*
|
||||
* This structure holds parameters required for frame interpolation.
|
||||
* Forward displacement field is a per-pixel mapping from frame 0 to frame 1.
|
||||
* Backward displacement field is a per-pixel mapping from frame 1 to frame 0.
|
||||
*/
|
||||
|
||||
struct NppStInterpolationState
|
||||
{
|
||||
NcvSize32u size; ///< frame size
|
||||
Ncv32u nStep; ///< pitch
|
||||
Ncv32f pos; ///< new frame position
|
||||
Ncv32f *pSrcFrame0; ///< frame 0
|
||||
Ncv32f *pSrcFrame1; ///< frame 1
|
||||
Ncv32f *pFU; ///< forward horizontal displacement
|
||||
Ncv32f *pFV; ///< forward vertical displacement
|
||||
Ncv32f *pBU; ///< backward horizontal displacement
|
||||
Ncv32f *pBV; ///< backward vertical displacement
|
||||
Ncv32f *pNewFrame; ///< new frame
|
||||
Ncv32f *ppBuffers[6]; ///< temporary buffers
|
||||
};
|
||||
|
||||
|
||||
/** Size of a buffer required for interpolation.
|
||||
*
|
||||
* Requires several such buffers. See \see NppStInterpolationState.
|
||||
*
|
||||
* \param srcSize [IN] Frame size (both frames must be of the same size)
|
||||
* \param nStep [IN] Frame line step
|
||||
* \param hpSize [OUT] Where to store computed size (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
|
||||
Ncv32u nStep,
|
||||
Ncv32u *hpSize);
|
||||
|
||||
|
||||
/** Interpolate frames (images) using provided optical flow (displacement field).
|
||||
* 32-bit floating point images, single channel
|
||||
*
|
||||
* \param pState [IN] structure containing all required parameters (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
|
||||
|
||||
|
||||
/** Row linear filter. 32-bit floating point image, single channel
|
||||
*
|
||||
* Apply horizontal linear filter
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [OUT] Destination image size
|
||||
* \param oROI [IN] Region of interest in the source image
|
||||
* \param borderType [IN] Type of border
|
||||
* \param pKernel [IN] Pointer to row kernel values (CUDA device memory)
|
||||
* \param nKernelSize [IN] Size of the kernel in pixels
|
||||
* \param nAnchor [IN] The kernel row alignment with respect to the position of the input pixel
|
||||
* \param multiplier [IN] Value by which the computed result is multiplied
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier);
|
||||
|
||||
|
||||
/** Column linear filter. 32-bit floating point image, single channel
|
||||
*
|
||||
* Apply vertical linear filter
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [OUT] Destination image size
|
||||
* \param oROI [IN] Region of interest in the source image
|
||||
* \param borderType [IN] Type of border
|
||||
* \param pKernel [IN] Pointer to column kernel values (CUDA device memory)
|
||||
* \param nKernelSize [IN] Size of the kernel in pixels
|
||||
* \param nAnchor [IN] The kernel column alignment with respect to the position of the input pixel
|
||||
* \param multiplier [IN] Value by which the computed result is multiplied
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u oROI,
|
||||
NppStBorderType borderType,
|
||||
const Ncv32f *pKernel,
|
||||
Ncv32s nKernelSize,
|
||||
Ncv32s nAnchor,
|
||||
Ncv32f multiplier);
|
||||
|
||||
|
||||
/** Size of buffer required for vector image warping.
|
||||
*
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nStep [IN] Source image line step
|
||||
* \param hpSize [OUT] Where to store computed size (host memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarpGetBufferSize(NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
Ncv32u *hpSize);
|
||||
|
||||
|
||||
/** Warp image using provided 2D vector field and 1x1 point spread function.
|
||||
* 32-bit floating point image, single channel
|
||||
*
|
||||
* During warping pixels from the source image may fall between pixels of the destination image.
|
||||
* PSF (point spread function) describes how the source image pixel affects pixels of the destination.
|
||||
* For 1x1 PSF only single pixel with the largest intersection is affected (similar to nearest interpolation).
|
||||
*
|
||||
* Destination image size and line step must be the same as the source image size and line step
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pU [IN] Pointer to horizontal displacement field (CUDA device memory)
|
||||
* \param pV [IN] Pointer to vertical displacement field (CUDA device memory)
|
||||
* \param nVFStep [IN] Displacement field line step
|
||||
* \param timeScale [IN] Value by which displacement field will be scaled for warping
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst);
|
||||
|
||||
|
||||
/** Warp image using provided 2D vector field and 2x2 point spread function.
|
||||
* 32-bit floating point image, single channel
|
||||
*
|
||||
* During warping pixels from the source image may fall between pixels of the destination image.
|
||||
* PSF (point spread function) describes how the source image pixel affects pixels of the destination.
|
||||
* For 2x2 PSF all four intersected pixels will be affected.
|
||||
*
|
||||
* Destination image size and line step must be the same as the source image size and line step
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param pU [IN] Pointer to horizontal displacement field (CUDA device memory)
|
||||
* \param pV [IN] Pointer to vertical displacement field (CUDA device memory)
|
||||
* \param nVFStep [IN] Displacement field line step
|
||||
* \param timeScale [IN] Value by which displacement field will be scaled for warping
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
const Ncv32f *pU,
|
||||
const Ncv32f *pV,
|
||||
Ncv32u nVFStep,
|
||||
Ncv32f *pBuffer,
|
||||
Ncv32f timeScale,
|
||||
Ncv32f *pDst);
|
||||
|
||||
|
||||
/** Resize. 32-bit floating point image, single channel
|
||||
*
|
||||
* Resizes image using specified filter (interpolation type)
|
||||
*
|
||||
* \param pSrc [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcSize [IN] Source image size
|
||||
* \param nSrcStep [IN] Source image line step
|
||||
* \param srcROI [IN] Source image region of interest
|
||||
* \param pDst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstSize [IN] Destination image size
|
||||
* \param nDstStep [IN] Destination image line step
|
||||
* \param dstROI [IN] Destination image region of interest
|
||||
* \param xFactor [IN] Row scale factor
|
||||
* \param yFactor [IN] Column scale factor
|
||||
* \param interpolation [IN] Interpolation type
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
|
||||
NcvSize32u srcSize,
|
||||
Ncv32u nSrcStep,
|
||||
NcvRect32u srcROI,
|
||||
Ncv32f *pDst,
|
||||
NcvSize32u dstSize,
|
||||
Ncv32u nDstStep,
|
||||
NcvRect32u dstROI,
|
||||
Ncv32f xFactor,
|
||||
Ncv32f yFactor,
|
||||
NppStInterpMode interpolation);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
||||
* \see nppiStDecimate_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
||||
Ncv32s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
||||
* \see nppiStDecimate_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
||||
* \see nppiStDecimate_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
||||
* \see nppiStDecimate_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
||||
Ncv64s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
||||
* \see nppiStDecimate_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
||||
Ncv64f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDecimate_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
|
||||
Ncv32s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDecimate_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
||||
* \see nppiStDecimate_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDecimate_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
|
||||
Ncv64s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDecimate_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDecimate_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
|
||||
Ncv64f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images.
|
||||
*
|
||||
* \param d_sum [IN] Integral image pointer (CUDA device memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
|
||||
Ncv64u *d_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *d_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea, NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
|
||||
*
|
||||
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
|
||||
Ncv64u *h_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *h_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
|
||||
Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
|
||||
Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
|
||||
Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
|
||||
Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
|
||||
Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
|
||||
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 128-bit pixels of any type, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
|
||||
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
|
||||
Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
|
||||
Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
|
||||
Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
|
||||
Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
|
||||
Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
|
||||
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 128-bit pixels of any type, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
|
||||
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
* \see nppiStIntegralGetSize_8u32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
* \see nppiStIntegral_8u32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
* \see nppiStIntegral_8u32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for squared integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup npps NPPST Signal Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
|
||||
*
|
||||
* \param srcLen [IN] Length of the input vector in elements
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
|
||||
*
|
||||
* \param d_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
|
||||
Ncv32u *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32u elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
|
||||
Ncv32s *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32s elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
|
||||
Ncv32f *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32f elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
|
||||
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
|
||||
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
|
||||
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#endif // _npp_staging_hpp_
|
@@ -1,908 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Error handling helpers
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
static void stdDebugOutput(const cv::String &msg)
|
||||
{
|
||||
std::cout << msg.c_str() << std::endl;
|
||||
}
|
||||
|
||||
|
||||
static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput;
|
||||
|
||||
|
||||
void ncvDebugOutput(const cv::String &msg)
|
||||
{
|
||||
debugOutputHandler(msg);
|
||||
}
|
||||
|
||||
|
||||
void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
|
||||
{
|
||||
debugOutputHandler = func;
|
||||
}
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Memory wrappers and helpers
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
|
||||
{
|
||||
Ncv32u alignMask = alignment-1;
|
||||
Ncv32u inverseAlignMask = ~alignMask;
|
||||
Ncv32u res = (what + alignMask) & inverseAlignMask;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void NCVMemPtr::clear()
|
||||
{
|
||||
ptr = NULL;
|
||||
memtype = NCVMemoryTypeNone;
|
||||
}
|
||||
|
||||
|
||||
void NCVMemSegment::clear()
|
||||
{
|
||||
begin.clear();
|
||||
size = 0;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream)
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
switch (dstType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
switch (srcType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
memcpy(dst, src, sz);
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
switch (srcType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
|
||||
return ncvStat;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
|
||||
const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
|
||||
Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream)
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
switch (dstType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
switch (srcType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
for (Ncv32u i=0; i<height; i++)
|
||||
{
|
||||
memcpy((char*)dst + i * dstPitch, (char*)src + i * srcPitch, widthbytes);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
switch (srcType)
|
||||
{
|
||||
case NCVMemoryTypeHostPageable:
|
||||
case NCVMemoryTypeHostPinned:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
case NCVMemoryTypeDevice:
|
||||
if (cuStream != 0)
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
|
||||
}
|
||||
ncvStat = NCV_SUCCESS;
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||
}
|
||||
|
||||
return ncvStat;
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
//
|
||||
// NCVMemStackAllocator class members implementation
|
||||
//
|
||||
//===================================================================
|
||||
|
||||
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment_)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
allocBegin(NULL),
|
||||
begin(NULL),
|
||||
end(NULL),
|
||||
_memType(NCVMemoryTypeNone),
|
||||
_alignment(alignment_),
|
||||
bReusesMemory(false)
|
||||
{
|
||||
NcvBool bProperAlignment = (alignment_ & (alignment_ - 1)) == 0;
|
||||
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
|
||||
}
|
||||
|
||||
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment_, void *reusePtr)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
allocBegin(NULL),
|
||||
_memType(memT),
|
||||
_alignment(alignment_)
|
||||
{
|
||||
NcvBool bProperAlignment = (alignment_ & (alignment_ - 1)) == 0;
|
||||
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
|
||||
ncvAssertPrintCheck(memT != NCVMemoryTypeNone, "NCVMemStackAllocator ctor:: Incorrect allocator type");
|
||||
|
||||
allocBegin = NULL;
|
||||
|
||||
if (reusePtr == NULL && capacity != 0)
|
||||
{
|
||||
bReusesMemory = false;
|
||||
switch (memT)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
allocBegin = (Ncv8u *)malloc(capacity);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bReusesMemory = true;
|
||||
allocBegin = (Ncv8u *)reusePtr;
|
||||
}
|
||||
|
||||
if (capacity == 0)
|
||||
{
|
||||
allocBegin = (Ncv8u *)(0x1);
|
||||
}
|
||||
|
||||
if (!isCounting())
|
||||
{
|
||||
begin = allocBegin;
|
||||
end = begin + capacity;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NCVMemStackAllocator::~NCVMemStackAllocator()
|
||||
{
|
||||
if (allocBegin != NULL)
|
||||
{
|
||||
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
|
||||
|
||||
if (!bReusesMemory && (allocBegin != (Ncv8u *)(0x1)))
|
||||
{
|
||||
switch (_memType)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(allocBegin);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
allocBegin = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||
{
|
||||
seg.clear();
|
||||
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
size = alignUp(static_cast<Ncv32u>(size), this->_alignment);
|
||||
this->currentSize += size;
|
||||
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||
|
||||
if (!isCounting())
|
||||
{
|
||||
size_t availSize = end - begin;
|
||||
ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);
|
||||
}
|
||||
|
||||
seg.begin.ptr = begin;
|
||||
seg.begin.memtype = this->_memType;
|
||||
seg.size = size;
|
||||
begin += size;
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)
|
||||
{
|
||||
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);
|
||||
|
||||
currentSize -= seg.size;
|
||||
begin -= seg.size;
|
||||
|
||||
seg.clear();
|
||||
|
||||
ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NcvBool NCVMemStackAllocator::isInitialized(void) const
|
||||
{
|
||||
return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;
|
||||
}
|
||||
|
||||
|
||||
NcvBool NCVMemStackAllocator::isCounting(void) const
|
||||
{
|
||||
return this->_memType == NCVMemoryTypeNone;
|
||||
}
|
||||
|
||||
|
||||
NCVMemoryType NCVMemStackAllocator::memType(void) const
|
||||
{
|
||||
return this->_memType;
|
||||
}
|
||||
|
||||
|
||||
Ncv32u NCVMemStackAllocator::alignment(void) const
|
||||
{
|
||||
return this->_alignment;
|
||||
}
|
||||
|
||||
|
||||
size_t NCVMemStackAllocator::maxSize(void) const
|
||||
{
|
||||
return this->_maxSize;
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
//
|
||||
// NCVMemNativeAllocator class members implementation
|
||||
//
|
||||
//===================================================================
|
||||
|
||||
|
||||
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment_)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
_memType(memT),
|
||||
_alignment(alignment_)
|
||||
{
|
||||
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
|
||||
}
|
||||
|
||||
|
||||
NCVMemNativeAllocator::~NCVMemNativeAllocator()
|
||||
{
|
||||
ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");
|
||||
}
|
||||
|
||||
|
||||
NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||
{
|
||||
seg.clear();
|
||||
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
switch (this->_memType)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
seg.begin.ptr = (Ncv8u *)malloc(size);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
this->currentSize += alignUp(static_cast<Ncv32u>(size), this->_alignment);
|
||||
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||
|
||||
seg.begin.memtype = this->_memType;
|
||||
seg.size = size;
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
|
||||
{
|
||||
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
|
||||
ncvAssertReturn(currentSize >= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||
currentSize -= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment);
|
||||
|
||||
switch (this->_memType)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(seg.begin.ptr);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
seg.clear();
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NcvBool NCVMemNativeAllocator::isInitialized(void) const
|
||||
{
|
||||
return (this->_alignment != 0);
|
||||
}
|
||||
|
||||
|
||||
NcvBool NCVMemNativeAllocator::isCounting(void) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
NCVMemoryType NCVMemNativeAllocator::memType(void) const
|
||||
{
|
||||
return this->_memType;
|
||||
}
|
||||
|
||||
|
||||
Ncv32u NCVMemNativeAllocator::alignment(void) const
|
||||
{
|
||||
return this->_alignment;
|
||||
}
|
||||
|
||||
|
||||
size_t NCVMemNativeAllocator::maxSize(void) const
|
||||
{
|
||||
return this->_maxSize;
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
//
|
||||
// Time and timer routines
|
||||
//
|
||||
//===================================================================
|
||||
|
||||
|
||||
typedef struct _NcvTimeMoment NcvTimeMoment;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
typedef struct _NcvTimeMoment
|
||||
{
|
||||
LONGLONG moment, freq;
|
||||
} NcvTimeMoment;
|
||||
|
||||
|
||||
static void _ncvQueryMoment(NcvTimeMoment *t)
|
||||
{
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq));
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment));
|
||||
}
|
||||
|
||||
|
||||
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
|
||||
{
|
||||
return 1000000.0 * t->moment / t->freq;
|
||||
}
|
||||
|
||||
|
||||
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||
{
|
||||
return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
|
||||
}
|
||||
|
||||
|
||||
double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||
{
|
||||
return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
typedef struct _NcvTimeMoment
|
||||
{
|
||||
struct timeval tv;
|
||||
struct timezone tz;
|
||||
} NcvTimeMoment;
|
||||
|
||||
|
||||
void _ncvQueryMoment(NcvTimeMoment *t)
|
||||
{
|
||||
gettimeofday(& t->tv, & t->tz);
|
||||
}
|
||||
|
||||
|
||||
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
|
||||
{
|
||||
return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec;
|
||||
}
|
||||
|
||||
|
||||
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||
{
|
||||
return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec);
|
||||
}
|
||||
|
||||
double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||
{
|
||||
return ((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000;
|
||||
}
|
||||
|
||||
#endif //#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
|
||||
struct _NcvTimer
|
||||
{
|
||||
NcvTimeMoment t1, t2;
|
||||
};
|
||||
|
||||
|
||||
NcvTimer ncvStartTimer(void)
|
||||
{
|
||||
struct _NcvTimer *t;
|
||||
t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer));
|
||||
_ncvQueryMoment(&t->t1);
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
double ncvEndQueryTimerUs(NcvTimer t)
|
||||
{
|
||||
double res;
|
||||
_ncvQueryMoment(&t->t2);
|
||||
res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2);
|
||||
free(t);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
double ncvEndQueryTimerMs(NcvTimer t)
|
||||
{
|
||||
double res;
|
||||
_ncvQueryMoment(&t->t2);
|
||||
res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2);
|
||||
free(t);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
//
|
||||
// Operations with rectangles
|
||||
//
|
||||
//===================================================================
|
||||
|
||||
|
||||
//from OpenCV
|
||||
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights);
|
||||
|
||||
|
||||
NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &numHypotheses,
|
||||
Ncv32u minNeighbors,
|
||||
Ncv32f intersectEps,
|
||||
NCVVector<Ncv32u> *hypothesesWeights)
|
||||
{
|
||||
ncvAssertReturn(hypotheses.memType() == NCVMemoryTypeHostPageable ||
|
||||
hypotheses.memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
||||
if (hypothesesWeights != NULL)
|
||||
{
|
||||
ncvAssertReturn(hypothesesWeights->memType() == NCVMemoryTypeHostPageable ||
|
||||
hypothesesWeights->memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
||||
}
|
||||
|
||||
if (numHypotheses == 0)
|
||||
{
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<NcvRect32u> rects(numHypotheses);
|
||||
memcpy(&rects[0], hypotheses.ptr(), numHypotheses * sizeof(NcvRect32u));
|
||||
|
||||
std::vector<Ncv32u> weights;
|
||||
if (hypothesesWeights != NULL)
|
||||
{
|
||||
groupRectangles(rects, minNeighbors, intersectEps, &weights);
|
||||
}
|
||||
else
|
||||
{
|
||||
groupRectangles(rects, minNeighbors, intersectEps, NULL);
|
||||
}
|
||||
|
||||
numHypotheses = (Ncv32u)rects.size();
|
||||
if (numHypotheses > 0)
|
||||
{
|
||||
memcpy(hypotheses.ptr(), &rects[0], numHypotheses * sizeof(NcvRect32u));
|
||||
}
|
||||
|
||||
if (hypothesesWeights != NULL)
|
||||
{
|
||||
memcpy(hypothesesWeights->ptr(), &weights[0], numHypotheses * sizeof(Ncv32u));
|
||||
}
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
static NCVStatus drawRectsWrapperHost(T *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *h_rects,
|
||||
Ncv32u numRects,
|
||||
T color)
|
||||
{
|
||||
ncvAssertReturn(h_dst != NULL && h_rects != NULL, NCV_NULL_PTR);
|
||||
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
||||
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
||||
ncvAssertReturn(numRects != 0, NCV_SUCCESS);
|
||||
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
||||
|
||||
for (Ncv32u i=0; i<numRects; i++)
|
||||
{
|
||||
NcvRect32u rect = h_rects[i];
|
||||
|
||||
if (rect.x < dstWidth)
|
||||
{
|
||||
for (Ncv32u each=rect.y; each<rect.y+rect.height && each<dstHeight; each++)
|
||||
{
|
||||
h_dst[each*dstStride+rect.x] = color;
|
||||
}
|
||||
}
|
||||
if (rect.x+rect.width-1 < dstWidth)
|
||||
{
|
||||
for (Ncv32u each=rect.y; each<rect.y+rect.height && each<dstHeight; each++)
|
||||
{
|
||||
h_dst[each*dstStride+rect.x+rect.width-1] = color;
|
||||
}
|
||||
}
|
||||
if (rect.y < dstHeight)
|
||||
{
|
||||
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
||||
{
|
||||
h_dst[rect.y*dstStride+j] = color;
|
||||
}
|
||||
}
|
||||
if (rect.y + rect.height - 1 < dstHeight)
|
||||
{
|
||||
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
||||
{
|
||||
h_dst[(rect.y+rect.height-1)*dstStride+j] = color;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *h_rects,
|
||||
Ncv32u numRects,
|
||||
Ncv8u color)
|
||||
{
|
||||
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *h_rects,
|
||||
Ncv32u numRects,
|
||||
Ncv32u color)
|
||||
{
|
||||
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
||||
}
|
||||
|
||||
|
||||
const Ncv32u NUMTHREADS_DRAWRECTS = 32;
|
||||
const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
|
||||
|
||||
|
||||
template <class T>
|
||||
__global__ void drawRects(T *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *d_rects,
|
||||
Ncv32u numRects,
|
||||
T color)
|
||||
{
|
||||
Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
|
||||
if (blockId > numRects * 4)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
NcvRect32u curRect = d_rects[blockId >> 2];
|
||||
NcvBool bVertical = blockId & 0x1;
|
||||
NcvBool bTopLeft = blockId & 0x2;
|
||||
|
||||
Ncv32u pt0x, pt0y;
|
||||
if (bVertical)
|
||||
{
|
||||
Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
||||
|
||||
pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
|
||||
pt0y = curRect.y;
|
||||
|
||||
if (pt0x < dstWidth)
|
||||
{
|
||||
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
||||
{
|
||||
Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
||||
if (ptY < pt0y + curRect.height && ptY < dstHeight)
|
||||
{
|
||||
d_dst[ptY * dstStride + pt0x] = color;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
||||
|
||||
pt0x = curRect.x;
|
||||
pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
|
||||
|
||||
if (pt0y < dstHeight)
|
||||
{
|
||||
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
||||
{
|
||||
Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
||||
if (ptX < pt0x + curRect.width && ptX < dstWidth)
|
||||
{
|
||||
d_dst[pt0y * dstStride + ptX] = color;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
static NCVStatus drawRectsWrapperDevice(T *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *d_rects,
|
||||
Ncv32u numRects,
|
||||
T color,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
(void)cuStream;
|
||||
ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
|
||||
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
||||
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
||||
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
||||
|
||||
if (numRects == 0)
|
||||
{
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
dim3 grid(numRects * 4);
|
||||
dim3 block(NUMTHREADS_DRAWRECTS);
|
||||
if (grid.x > 65535)
|
||||
{
|
||||
grid.y = (grid.x + 65534) / 65535;
|
||||
grid.x = 65535;
|
||||
}
|
||||
|
||||
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
|
||||
|
||||
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *d_rects,
|
||||
Ncv32u numRects,
|
||||
Ncv8u color,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
Ncv32u dstHeight,
|
||||
NcvRect32u *d_rects,
|
||||
Ncv32u numRects,
|
||||
Ncv32u color,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
File diff suppressed because it is too large
Load Diff
@@ -1,155 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncv_alg_hpp_
|
||||
#define _ncv_alg_hpp_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
static void swap(T &p1, T &p2)
|
||||
{
|
||||
T tmp = p1;
|
||||
p1 = p2;
|
||||
p2 = tmp;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static T divUp(T a, T b)
|
||||
{
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct functorAddValues
|
||||
{
|
||||
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
|
||||
{
|
||||
//Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
|
||||
*dst = *src;
|
||||
}
|
||||
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
|
||||
{
|
||||
in1out += in2;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct functorMinValues
|
||||
{
|
||||
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
|
||||
{
|
||||
//Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
|
||||
*dst = *src;
|
||||
}
|
||||
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
|
||||
{
|
||||
in1out = in1out > in2 ? in2 : in1out;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct functorMaxValues
|
||||
{
|
||||
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
|
||||
{
|
||||
//Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
|
||||
*dst = *src;
|
||||
}
|
||||
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
|
||||
{
|
||||
in1out = in1out > in2 ? in1out : in2;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Tdata, class Tfunc, Ncv32u nThreads>
|
||||
static __device__ Tdata subReduce(Tdata threadElem)
|
||||
{
|
||||
Tfunc functor;
|
||||
|
||||
__shared__ Tdata _reduceArr[nThreads];
|
||||
volatile Tdata *reduceArr = _reduceArr;
|
||||
functor.assign(reduceArr + threadIdx.x, &threadElem);
|
||||
__syncthreads();
|
||||
|
||||
if (nThreads >= 256 && threadIdx.x < 128)
|
||||
{
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (nThreads >= 128 && threadIdx.x < 64)
|
||||
{
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 32)
|
||||
{
|
||||
if (nThreads >= 64)
|
||||
{
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]);
|
||||
}
|
||||
if (nThreads >= 32 && threadIdx.x < 16)
|
||||
{
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 16]);
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 8]);
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 4]);
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 2]);
|
||||
functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
Tdata reduceRes;
|
||||
functor.assign(&reduceRes, reduceArr);
|
||||
return reduceRes;
|
||||
}
|
||||
|
||||
|
||||
#endif //_ncv_alg_hpp_
|
@@ -1,100 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
// this file does not contain any used code.
|
||||
|
||||
#ifndef _ncv_color_conversion_hpp_
|
||||
#define _ncv_color_conversion_hpp_
|
||||
|
||||
#include "NCVPixelOperations.hpp"
|
||||
|
||||
#if 0
|
||||
enum NCVColorSpace
|
||||
{
|
||||
NCVColorSpaceGray,
|
||||
NCVColorSpaceRGBA,
|
||||
};
|
||||
|
||||
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout> struct __pixColorConv {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut);
|
||||
};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceRGBA, NCVColorSpaceGray, Tin, Tout> {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
|
||||
{
|
||||
Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z;
|
||||
_TDemoteClampNN(luma, pixOut.x);
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceGray, NCVColorSpaceRGBA, Tin, Tout> {
|
||||
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
|
||||
{
|
||||
_TDemoteClampNN(pixIn.x, pixOut.x);
|
||||
_TDemoteClampNN(pixIn.x, pixOut.y);
|
||||
_TDemoteClampNN(pixIn.x, pixOut.z);
|
||||
pixOut.w = 0;
|
||||
}};
|
||||
|
||||
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
|
||||
static NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
|
||||
const NCVMatrix<Tout> &h_imgOut)
|
||||
{
|
||||
ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
|
||||
ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() &&
|
||||
(h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||
NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone);
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
for (Ncv32u i=0; i<h_imgIn.height(); i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<h_imgIn.width(); j++)
|
||||
{
|
||||
__pixColorConv<CSin, CSout, Tin, Tout>::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i));
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ncv_color_conversion_hpp_
|
@@ -1,351 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncv_pixel_operations_hpp_
|
||||
#define _ncv_pixel_operations_hpp_
|
||||
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
|
||||
template<> static inline __host__ __device__ Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return DBL_MAX;}
|
||||
|
||||
template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
|
||||
template<> static inline __host__ __device__ Ncv8u _pixMinVal<Ncv8u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
|
||||
|
||||
template<typename Tvec> struct TConvVec2Base;
|
||||
template<> struct TConvVec2Base<uchar1> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<uchar3> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<uchar4> {typedef Ncv8u TBase;};
|
||||
template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
|
||||
template<> struct TConvVec2Base<uint1> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<uint3> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<uint4> {typedef Ncv32u TBase;};
|
||||
template<> struct TConvVec2Base<float1> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<float3> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<float4> {typedef Ncv32f TBase;};
|
||||
template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
|
||||
template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
|
||||
template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
|
||||
|
||||
#define NC(T) (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
|
||||
|
||||
template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
|
||||
template<> struct TConvBase2Vec<Ncv8u, 1> {typedef uchar1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv8u, 3> {typedef uchar3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv8u, 4> {typedef uchar4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
template<typename Tout> inline Tout _pixMakeZero();
|
||||
template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
|
||||
template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
|
||||
template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
|
||||
template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
|
||||
template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
|
||||
template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
|
||||
template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
|
||||
template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
|
||||
template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
|
||||
template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
|
||||
template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
|
||||
template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
|
||||
|
||||
static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
|
||||
static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
|
||||
static inline __host__ __device__ uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
|
||||
static inline __host__ __device__ ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
|
||||
static inline __host__ __device__ ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
|
||||
static inline __host__ __device__ ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
|
||||
static inline __host__ __device__ uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
|
||||
static inline __host__ __device__ uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
|
||||
static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
|
||||
static inline __host__ __device__ float1 _pixMake(Ncv32f x) {return make_float1(x);}
|
||||
static inline __host__ __device__ float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
|
||||
static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
|
||||
static inline __host__ __device__ double1 _pixMake(Ncv64f x) {return make_double1(x);}
|
||||
static inline __host__ __device__ double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
|
||||
static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
_TDemoteClampZ(pix.y, out.y);
|
||||
_TDemoteClampZ(pix.z, out.z);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
_TDemoteClampZ(pix.y, out.y);
|
||||
_TDemoteClampZ(pix.z, out.z);
|
||||
_TDemoteClampZ(pix.w, out.w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
_TDemoteClampNN(pix.y, out.y);
|
||||
_TDemoteClampNN(pix.z, out.z);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
_TDemoteClampNN(pix.y, out.y);
|
||||
_TDemoteClampNN(pix.z, out.z);
|
||||
_TDemoteClampNN(pix.w, out.w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w);};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
out.y = (TBout)(pix.y * w);
|
||||
out.z = (TBout)(pix.z * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
out.x = (TBout)(pix.x * w);
|
||||
out.y = (TBout)(pix.y * w);
|
||||
out.z = (TBout)(pix.z * w);
|
||||
out.w = (TBout)(pix.w * w);
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> static __host__ __device__ Tout _pixScale(Tin &pix, Tw w)
|
||||
{
|
||||
return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
out.y = pix1.y + pix2.y;
|
||||
out.z = pix1.z + pix2.z;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
out.y = pix1.y + pix2.y;
|
||||
out.z = pix1.z + pix2.z;
|
||||
out.w = pix1.w + pix2.w;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _pixAdd(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _pixDist(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> struct TAccPixWeighted;
|
||||
template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
|
||||
template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
|
||||
template<> struct TAccPixWeighted<float1> {typedef double1 type;};
|
||||
template<> struct TAccPixWeighted<float3> {typedef double3 type;};
|
||||
template<> struct TAccPixWeighted<float4> {typedef double4 type;};
|
||||
|
||||
template<typename Tfrom> struct TAccPixDist {};
|
||||
template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
|
||||
template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
|
||||
template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
|
||||
template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
|
||||
|
||||
#endif //_ncv_pixel_operations_hpp_
|
@@ -1,606 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include "NCV.hpp"
|
||||
#include "NCVAlg.hpp"
|
||||
#include "NCVPyramid.hpp"
|
||||
#include "NCVPixelOperations.hpp"
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
template<typename T, Ncv32u CN> struct __average4_CN {static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 1> {
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float1, 1> {
|
||||
static __host__ __device__ float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11)
|
||||
{
|
||||
float1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double1, 1> {
|
||||
static __host__ __device__ double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11)
|
||||
{
|
||||
double1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 3> {
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
|
||||
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float3, 3> {
|
||||
static __host__ __device__ float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11)
|
||||
{
|
||||
float3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double3, 3> {
|
||||
static __host__ __device__ double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11)
|
||||
{
|
||||
double3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 4> {
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
|
||||
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
|
||||
out.w = ((Ncv32s)p00.w + p01.w + p10.w + p11.w + 2) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float4, 4> {
|
||||
static __host__ __device__ float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11)
|
||||
{
|
||||
float4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double4, 4> {
|
||||
static __host__ __device__ double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11)
|
||||
{
|
||||
double4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
|
||||
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
|
||||
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> static __host__ __device__ T _average4(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
return __average4_CN<T, NC(T)>::_average4_CN(p00, p01, p10, p11);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __lerp_CN {static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 1> {
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 3> {
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
TB(b.y * d + a.y * (1 - d)),
|
||||
TB(b.z * d + a.z * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 4> {
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
TB(b.y * d + a.y * (1 - d)),
|
||||
TB(b.z * d + a.z * (1 - d)),
|
||||
TB(b.w * d + a.w * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _lerp(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
return __lerp_CN<Tin, Tout, NC(Tin)>::_lerp_CN(a, b, d);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernelDownsampleX2(T *d_src,
|
||||
Ncv32u srcPitch,
|
||||
T *d_dst,
|
||||
Ncv32u dstPitch,
|
||||
NcvSize32u dstRoi)
|
||||
{
|
||||
Ncv32u i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
Ncv32u j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i < dstRoi.height && j < dstRoi.width)
|
||||
{
|
||||
T *d_src_line1 = (T *)((Ncv8u *)d_src + (2 * i + 0) * srcPitch);
|
||||
T *d_src_line2 = (T *)((Ncv8u *)d_src + (2 * i + 1) * srcPitch);
|
||||
T *d_dst_line = (T *)((Ncv8u *)d_dst + i * dstPitch);
|
||||
|
||||
T p00 = d_src_line1[2*j+0];
|
||||
T p01 = d_src_line1[2*j+1];
|
||||
T p10 = d_src_line2[2*j+0];
|
||||
T p11 = d_src_line2[2*j+1];
|
||||
|
||||
d_dst_line[j] = _average4(p00, p01, p10, p11);
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace pyramid
|
||||
{
|
||||
template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(src.cols, bDim.x), divUp(src.rows, bDim.y));
|
||||
|
||||
kernelDownsampleX2<<<gDim, bDim, 0, stream>>>((T*)src.data, static_cast<Ncv32u>(src.step),
|
||||
(T*)dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void kernelDownsampleX2_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void kernelDownsampleX2_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelDownsampleX2_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernelInterpolateFrom1(T *d_srcTop,
|
||||
Ncv32u srcTopPitch,
|
||||
NcvSize32u szTopRoi,
|
||||
T *d_dst,
|
||||
Ncv32u dstPitch,
|
||||
NcvSize32u dstRoi)
|
||||
{
|
||||
Ncv32u i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
Ncv32u j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i < dstRoi.height && j < dstRoi.width)
|
||||
{
|
||||
Ncv32f ptTopX = 1.0f * (szTopRoi.width - 1) * j / (dstRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopRoi.height - 1) * i / (dstRoi.height - 1);
|
||||
Ncv32u xl = (Ncv32u)ptTopX;
|
||||
Ncv32u xh = xl+1;
|
||||
Ncv32f dx = ptTopX - xl;
|
||||
Ncv32u yl = (Ncv32u)ptTopY;
|
||||
Ncv32u yh = yl+1;
|
||||
Ncv32f dy = ptTopY - yl;
|
||||
|
||||
T *d_src_line1 = (T *)((Ncv8u *)d_srcTop + yl * srcTopPitch);
|
||||
T *d_src_line2 = (T *)((Ncv8u *)d_srcTop + yh * srcTopPitch);
|
||||
T *d_dst_line = (T *)((Ncv8u *)d_dst + i * dstPitch);
|
||||
|
||||
T p00, p01, p10, p11;
|
||||
p00 = d_src_line1[xl];
|
||||
p01 = xh < szTopRoi.width ? d_src_line1[xh] : p00;
|
||||
p10 = yh < szTopRoi.height ? d_src_line2[xl] : p00;
|
||||
p11 = (xh < szTopRoi.width && yh < szTopRoi.height) ? d_src_line2[xh] : p00;
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt m_00_01 = _lerp<T, TVFlt>(p00, p01, dx);
|
||||
TVFlt m_10_11 = _lerp<T, TVFlt>(p10, p11, dx);
|
||||
TVFlt mixture = _lerp<TVFlt, TVFlt>(m_00_01, m_10_11, dy);
|
||||
T outPix = _pixDemoteClampZ<TVFlt, T>(mixture);
|
||||
|
||||
d_dst_line[j] = outPix;
|
||||
}
|
||||
}
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace pyramid
|
||||
{
|
||||
template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(dst.cols, bDim.x), divUp(dst.rows, bDim.y));
|
||||
|
||||
kernelInterpolateFrom1<<<gDim, bDim, 0, stream>>>((T*) src.data, static_cast<Ncv32u>(src.step), NcvSize32u(src.cols, src.rows),
|
||||
(T*) dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void kernelInterpolateFrom1_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void kernelInterpolateFrom1_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void kernelInterpolateFrom1_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#if 0 //def _WIN32
|
||||
|
||||
template<typename T>
|
||||
static T _interpLinear(const T &a, const T &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt tmp = _lerp<T, TVFlt>(a, b, d);
|
||||
return _pixDemoteClampZ<TVFlt, T>(tmp);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static T _interpBilinear(const NCVMatrix<T> &refLayer, Ncv32f x, Ncv32f y)
|
||||
{
|
||||
Ncv32u xl = (Ncv32u)x;
|
||||
Ncv32u xh = xl+1;
|
||||
Ncv32f dx = x - xl;
|
||||
Ncv32u yl = (Ncv32u)y;
|
||||
Ncv32u yh = yl+1;
|
||||
Ncv32f dy = y - yl;
|
||||
T p00, p01, p10, p11;
|
||||
p00 = refLayer.at(xl, yl);
|
||||
p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00;
|
||||
p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00;
|
||||
p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00;
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt m_00_01 = _lerp<T, TVFlt>(p00, p01, dx);
|
||||
TVFlt m_10_11 = _lerp<T, TVFlt>(p10, p11, dx);
|
||||
TVFlt mixture = _lerp<TVFlt, TVFlt>(m_00_01, m_10_11, dy);
|
||||
return _pixDemoteClampZ<TVFlt, T>(mixture);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
Ncv8u numLayers,
|
||||
INCVMemAllocator &alloc,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
this->_isInitialized = false;
|
||||
ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid::ctor error", );
|
||||
|
||||
this->layer0 = &img;
|
||||
NcvSize32u szLastLayer(img.width(), img.height());
|
||||
this->nLayers = 1;
|
||||
|
||||
NCV_SET_SKIP_COND(alloc.isCounting());
|
||||
NcvBool bDeviceCode = alloc.memType() == NCVMemoryTypeDevice;
|
||||
|
||||
if (numLayers == 0)
|
||||
{
|
||||
numLayers = 255; //it will cut-off when any of the dimensions goes 1
|
||||
}
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMemNativeAllocator allocCPU(NCVMemoryTypeHostPinned, 512);
|
||||
#endif
|
||||
|
||||
for (Ncv32u i=0; i<(Ncv32u)numLayers-1; i++)
|
||||
{
|
||||
NcvSize32u szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
this->pyramid.push_back(new NCVMatrixAlloc<T>(alloc, szCurLayer.width, szCurLayer.height));
|
||||
ncvAssertPrintReturn(((NCVMatrixAlloc<T> *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid::ctor error", );
|
||||
this->nLayers++;
|
||||
|
||||
//fill in the layer
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
const NCVMatrix<T> *prevLayer = i == 0 ? this->layer0 : this->pyramid[i-1];
|
||||
NCVMatrix<T> *curLayer = this->pyramid[i];
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(szCurLayer.width, bDim.x), divUp(szCurLayer.height, bDim.y));
|
||||
kernelDownsampleX2<<<gDim, bDim, 0, cuStream>>>(prevLayer->ptr(),
|
||||
prevLayer->pitch(),
|
||||
curLayer->ptr(),
|
||||
curLayer->pitch(),
|
||||
szCurLayer);
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetLastError(), "NCVImagePyramid::ctor error", );
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMatrixAlloc<T> h_prevLayer(allocCPU, prevLayer->width(), prevLayer->height());
|
||||
ncvAssertPrintReturn(h_prevLayer.isMemAllocated(), "Validation failure in NCVImagePyramid::ctor", );
|
||||
NCVMatrixAlloc<T> h_curLayer(allocCPU, curLayer->width(), curLayer->height());
|
||||
ncvAssertPrintReturn(h_curLayer.isMemAllocated(), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == prevLayer->copy2D(h_prevLayer, prevLayer->size(), cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == curLayer->copy2D(h_curLayer, curLayer->size(), cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaStreamSynchronize(cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
for (Ncv32u i=0; i<szCurLayer.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<szCurLayer.width; j++)
|
||||
{
|
||||
T p00 = h_prevLayer.at(2*j+0, 2*i+0);
|
||||
T p01 = h_prevLayer.at(2*j+1, 2*i+0);
|
||||
T p10 = h_prevLayer.at(2*j+0, 2*i+1);
|
||||
T p11 = h_prevLayer.at(2*j+1, 2*i+1);
|
||||
T outGold = _average4(p00, p01, p10, p11);
|
||||
T outGPU = h_curLayer.at(j, i);
|
||||
ncvAssertPrintReturn(0 == memcmp(&outGold, &outGPU, sizeof(T)), "Validation failure in NCVImagePyramid::ctor with kernelDownsampleX2", );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Ncv32u i=0; i<szCurLayer.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<szCurLayer.width; j++)
|
||||
{
|
||||
T p00 = prevLayer->at(2*j+0, 2*i+0);
|
||||
T p01 = prevLayer->at(2*j+1, 2*i+0);
|
||||
T p10 = prevLayer->at(2*j+0, 2*i+1);
|
||||
T p11 = prevLayer->at(2*j+1, 2*i+1);
|
||||
curLayer->at(j, i) = _average4(p00, p01, p10, p11);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
|
||||
this->_isInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVImagePyramid<T>::~NCVImagePyramid()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NcvBool NCVImagePyramid<T>::isInitialized() const
|
||||
{
|
||||
return this->_isInitialized;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVStatus NCVImagePyramid<T>::getLayer(NCVMatrix<T> &outImg,
|
||||
NcvSize32u outRoi,
|
||||
NcvBool bTrilinear,
|
||||
cudaStream_t cuStream) const
|
||||
{
|
||||
ncvAssertReturn(this->isInitialized(), NCV_UNKNOWN_ERROR);
|
||||
ncvAssertReturn(outImg.memType() == this->layer0->memType(), NCV_MEM_RESIDENCE_ERROR);
|
||||
ncvAssertReturn(outRoi.width <= this->layer0->width() && outRoi.height <= this->layer0->height() &&
|
||||
outRoi.width > 0 && outRoi.height > 0, NCV_DIMENSIONS_INVALID);
|
||||
|
||||
if (outRoi.width == this->layer0->width() && outRoi.height == this->layer0->height())
|
||||
{
|
||||
ncvAssertReturnNcvStat(this->layer0->copy2D(outImg, NcvSize32u(this->layer0->width(), this->layer0->height()), cuStream));
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
Ncv32f lastScale = 1.0f;
|
||||
Ncv32f curScale;
|
||||
const NCVMatrix<T> *lastLayer = this->layer0;
|
||||
const NCVMatrix<T> *curLayer = NULL;
|
||||
NcvBool bUse2Refs = false;
|
||||
|
||||
for (Ncv32u i=0; i<this->nLayers-1; i++)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = this->pyramid[i];
|
||||
|
||||
if (outRoi.width == curLayer->width() && outRoi.height == curLayer->height())
|
||||
{
|
||||
ncvAssertReturnNcvStat(this->pyramid[i]->copy2D(outImg, NcvSize32u(this->pyramid[i]->width(), this->pyramid[i]->height()), cuStream));
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer->width() && outRoi.height >= curLayer->height())
|
||||
{
|
||||
if (outRoi.width < lastLayer->width() && outRoi.height < lastLayer->height())
|
||||
{
|
||||
bUse2Refs = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
bUse2Refs = bUse2Refs && bTrilinear;
|
||||
|
||||
NCV_SET_SKIP_COND(outImg.memType() == NCVMemoryTypeNone);
|
||||
NcvBool bDeviceCode = this->layer0->memType() == NCVMemoryTypeDevice;
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMemNativeAllocator allocCPU(NCVMemoryTypeHostPinned, 512);
|
||||
#endif
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
ncvAssertReturn(bUse2Refs == false, NCV_NOT_IMPLEMENTED);
|
||||
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(outRoi.width, bDim.x), divUp(outRoi.height, bDim.y));
|
||||
kernelInterpolateFrom1<<<gDim, bDim, 0, cuStream>>>(lastLayer->ptr(),
|
||||
lastLayer->pitch(),
|
||||
lastLayer->size(),
|
||||
outImg.ptr(),
|
||||
outImg.pitch(),
|
||||
outRoi);
|
||||
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
ncvSafeMatAlloc(h_lastLayer, T, allocCPU, lastLayer->width(), lastLayer->height(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvSafeMatAlloc(h_outImg, T, allocCPU, outImg.width(), outImg.height(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvAssertReturnNcvStat(lastLayer->copy2D(h_lastLayer, lastLayer->size(), cuStream));
|
||||
ncvAssertReturnNcvStat(outImg.copy2D(h_outImg, outRoi, cuStream));
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(cuStream), NCV_CUDA_ERROR);
|
||||
|
||||
for (Ncv32u i=0; i<outRoi.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<outRoi.width; j++)
|
||||
{
|
||||
NcvSize32u szTopLayer(lastLayer->width(), lastLayer->height());
|
||||
Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T outGold = _interpBilinear(h_lastLayer, ptTopX, ptTopY);
|
||||
ncvAssertPrintReturn(0 == memcmp(&outGold, &h_outImg.at(j,i), sizeof(T)), "Validation failure in NCVImagePyramid::ctor with kernelInterpolateFrom1", NCV_UNKNOWN_ERROR);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Ncv32u i=0; i<outRoi.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<outRoi.width; j++)
|
||||
{
|
||||
//top layer pixel (always exists)
|
||||
NcvSize32u szTopLayer(lastLayer->width(), lastLayer->height());
|
||||
Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T topPix = _interpBilinear(*lastLayer, ptTopX, ptTopY);
|
||||
T trilinearPix = topPix;
|
||||
|
||||
if (bUse2Refs)
|
||||
{
|
||||
//bottom layer pixel (exists only if the requested scale is greater than the smallest layer scale)
|
||||
NcvSize32u szBottomLayer(curLayer->width(), curLayer->height());
|
||||
Ncv32f ptBottomX = 1.0f * (szBottomLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptBottomY = 1.0f * (szBottomLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T bottomPix = _interpBilinear(*curLayer, ptBottomX, ptBottomY);
|
||||
|
||||
Ncv32f scale = (1.0f * outRoi.width / layer0->width() + 1.0f * outRoi.height / layer0->height()) / 2;
|
||||
Ncv32f dl = (scale - curScale) / (lastScale - curScale);
|
||||
dl = CLAMP(dl, 0.0f, 1.0f);
|
||||
trilinearPix = _interpLinear(bottomPix, topPix, dl);
|
||||
}
|
||||
|
||||
outImg.at(j, i) = trilinearPix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
template class NCVImagePyramid<uchar1>;
|
||||
template class NCVImagePyramid<uchar3>;
|
||||
template class NCVImagePyramid<uchar4>;
|
||||
template class NCVImagePyramid<ushort1>;
|
||||
template class NCVImagePyramid<ushort3>;
|
||||
template class NCVImagePyramid<ushort4>;
|
||||
template class NCVImagePyramid<uint1>;
|
||||
template class NCVImagePyramid<uint3>;
|
||||
template class NCVImagePyramid<uint4>;
|
||||
template class NCVImagePyramid<float1>;
|
||||
template class NCVImagePyramid<float3>;
|
||||
template class NCVImagePyramid<float4>;
|
||||
|
||||
#endif //_WIN32
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,99 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncvpyramid_hpp_
|
||||
#define _ncvpyramid_hpp_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "NCV.hpp"
|
||||
|
||||
#if 0 //def _WIN32
|
||||
|
||||
template <class T>
|
||||
class NCV_EXPORTS NCVMatrixStack
|
||||
{
|
||||
public:
|
||||
NCVMatrixStack() {this->_arr.clear();}
|
||||
~NCVMatrixStack()
|
||||
{
|
||||
const Ncv32u nElem = this->_arr.size();
|
||||
for (Ncv32u i=0; i<nElem; i++)
|
||||
{
|
||||
pop_back();
|
||||
}
|
||||
}
|
||||
void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
|
||||
void pop_back() {this->_arr.pop_back();}
|
||||
NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
|
||||
private:
|
||||
std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
|
||||
};
|
||||
|
||||
|
||||
template <class T>
|
||||
class NCV_EXPORTS NCVImagePyramid
|
||||
{
|
||||
public:
|
||||
|
||||
NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
Ncv8u nLayers,
|
||||
INCVMemAllocator &alloc,
|
||||
cudaStream_t cuStream);
|
||||
~NCVImagePyramid();
|
||||
NcvBool isInitialized() const;
|
||||
NCVStatus getLayer(NCVMatrix<T> &outImg,
|
||||
NcvSize32u outRoi,
|
||||
NcvBool bTrilinear,
|
||||
cudaStream_t cuStream) const;
|
||||
|
||||
private:
|
||||
|
||||
NcvBool _isInitialized;
|
||||
const NCVMatrix<T> *layer0;
|
||||
NCVMatrixStack<T> pyramid;
|
||||
Ncv32u nLayers;
|
||||
};
|
||||
|
||||
#endif //_WIN32
|
||||
|
||||
#endif //_ncvpyramid_hpp_
|
@@ -1,221 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncvruntimetemplates_hpp_
|
||||
#define _ncvruntimetemplates_hpp_
|
||||
#if defined _MSC_VER &&_MSC_VER >= 1200
|
||||
#pragma warning( disable: 4800 )
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// The Loki Library
|
||||
// Copyright (c) 2001 by Andrei Alexandrescu
|
||||
// This code accompanies the book:
|
||||
// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
|
||||
// Patterns Applied". Copyright (c) 2001. Addison-Wesley.
|
||||
// Permission to use, copy, modify, distribute and sell this software for any
|
||||
// purpose is hereby granted without fee, provided that the above copyright
|
||||
// notice appear in all copies and that both that copyright notice and this
|
||||
// permission notice appear in supporting documentation.
|
||||
// The author or Addison-Welsey Longman make no representations about the
|
||||
// suitability of this software for any purpose. It is provided "as is"
|
||||
// without express or implied warranty.
|
||||
// http://loki-lib.sourceforge.net/index.php?n=Main.License
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace Loki
|
||||
{
|
||||
//==============================================================================
|
||||
// class NullType
|
||||
// Used as a placeholder for "no type here"
|
||||
// Useful as an end marker in typelists
|
||||
//==============================================================================
|
||||
|
||||
class NullType {};
|
||||
|
||||
//==============================================================================
|
||||
// class template Typelist
|
||||
// The building block of typelists of any length
|
||||
// Use it through the LOKI_TYPELIST_NN macros
|
||||
// Defines nested types:
|
||||
// Head (first element, a non-typelist type by convention)
|
||||
// Tail (second element, can be another typelist)
|
||||
//==============================================================================
|
||||
|
||||
template <class T, class U>
|
||||
struct Typelist
|
||||
{
|
||||
typedef T Head;
|
||||
typedef U Tail;
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
// class template Int2Type
|
||||
// Converts each integral constant into a unique type
|
||||
// Invocation: Int2Type<v> where v is a compile-time constant integral
|
||||
// Defines 'value', an enum that evaluates to v
|
||||
//==============================================================================
|
||||
|
||||
template <int v>
|
||||
struct Int2Type
|
||||
{
|
||||
enum { value = v };
|
||||
};
|
||||
|
||||
namespace TL
|
||||
{
|
||||
//==============================================================================
|
||||
// class template TypeAt
|
||||
// Finds the type at a given index in a typelist
|
||||
// Invocation (TList is a typelist and index is a compile-time integral
|
||||
// constant):
|
||||
// TypeAt<TList, index>::Result
|
||||
// returns the type in position 'index' in TList
|
||||
// If you pass an out-of-bounds index, the result is a compile-time error
|
||||
//==============================================================================
|
||||
|
||||
template <class TList, unsigned int index> struct TypeAt;
|
||||
|
||||
template <class Head, class Tail>
|
||||
struct TypeAt<Typelist<Head, Tail>, 0>
|
||||
{
|
||||
typedef Head Result;
|
||||
};
|
||||
|
||||
template <class Head, class Tail, unsigned int i>
|
||||
struct TypeAt<Typelist<Head, Tail>, i>
|
||||
{
|
||||
typedef typename TypeAt<Tail, i - 1>::Result Result;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Runtime boolean template instance dispatcher
|
||||
// Cyril Crassin <cyril.crassin@icare3d.org>
|
||||
// NVIDIA, 2010
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace NCVRuntimeTemplateBool
|
||||
{
|
||||
//This struct is used to transform a list of parameters into template arguments
|
||||
//The idea is to build a typelist containing the arguments
|
||||
//and to pass this typelist to a user defined functor
|
||||
template<typename TList, int NumArguments, class Func>
|
||||
struct KernelCaller
|
||||
{
|
||||
//Convenience function used by the user
|
||||
//Takes a variable argument list, transforms it into a list
|
||||
static void call(Func *functor, ...)
|
||||
{
|
||||
//Vector used to collect arguments
|
||||
std::vector<int> templateParamList;
|
||||
|
||||
//Variable argument list manipulation
|
||||
va_list listPointer;
|
||||
va_start(listPointer, functor);
|
||||
//Collect parameters into the list
|
||||
for(int i=0; i<NumArguments; i++)
|
||||
{
|
||||
int val = va_arg(listPointer, int);
|
||||
templateParamList.push_back(val);
|
||||
}
|
||||
va_end(listPointer);
|
||||
|
||||
//Call the actual typelist building function
|
||||
call(*functor, templateParamList);
|
||||
}
|
||||
|
||||
//Actual function called recursively to build a typelist based
|
||||
//on a list of values
|
||||
static void call( Func &functor, std::vector<int> &templateParamList)
|
||||
{
|
||||
//Get current parameter value in the list
|
||||
NcvBool val = templateParamList[templateParamList.size() - 1];
|
||||
templateParamList.pop_back();
|
||||
|
||||
//Select the compile time value to add into the typelist
|
||||
//depending on the runtime variable and make recursive call.
|
||||
//Both versions are really instantiated
|
||||
if (val)
|
||||
{
|
||||
KernelCaller<
|
||||
Loki::Typelist<typename Loki::Int2Type<1>, TList >,
|
||||
NumArguments-1, Func >
|
||||
::call(functor, templateParamList);
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelCaller<
|
||||
Loki::Typelist<typename Loki::Int2Type<0>, TList >,
|
||||
NumArguments-1, Func >
|
||||
::call(functor, templateParamList);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//Specialization for 0 value left in the list
|
||||
//-> actual kernel functor call
|
||||
template<class TList, class Func>
|
||||
struct KernelCaller<TList, 0, Func>
|
||||
{
|
||||
static void call(Func &functor)
|
||||
{
|
||||
//Call to the functor's kernel call method
|
||||
functor.call(TList()); //TList instantiated to get the method template parameter resolved
|
||||
}
|
||||
|
||||
static void call(Func &functor, std::vector<int> &templateParams)
|
||||
{
|
||||
(void)templateParams;
|
||||
functor.call(TList());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif //_ncvruntimetemplates_hpp_
|
@@ -79,10 +79,7 @@
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/core/stream_accessor.hpp"
|
||||
|
||||
#include "nvidia/core/NCV.hpp"
|
||||
#include "nvidia/NPP_staging/NPP_staging.hpp"
|
||||
#include "nvidia/NCVHaarObjectDetection.hpp"
|
||||
#include "nvidia/NCVBroxOpticalFlow.hpp"
|
||||
#include "opencv2/gpunvidia.hpp"
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */
|
||||
|
Reference in New Issue
Block a user