renamed gpunvidia -> gpulegacy

2013-04-18 10:14:43 +04:00
parent 7e91e1871d
commit 508fb6aa5b
62 changed files with 69 additions and 60 deletions
--- a/modules/gpulegacy/include/opencv2/gpulegacy.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy.hpp
@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPULEGACY_HPP__
+#define __OPENCV_GPULEGACY_HPP__
+
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/gpulegacy/NPP_staging.hpp"
+#include "opencv2/gpulegacy/NCVPyramid.hpp"
+#include "opencv2/gpulegacy/NCVHaarObjectDetection.hpp"
+#include "opencv2/gpulegacy/NCVBroxOpticalFlow.hpp"
+
+#endif /* __OPENCV_GPULEGACY_HPP__ */
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVBroxOpticalFlow.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVBroxOpticalFlow.hpp
@@ -0,0 +1,104 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// NVIDIA CUDA implementation of Brox et al Optical Flow algorithm
+//
+// Algorithm is explained in the original paper:
+//      T. Brox, A. Bruhn, N. Papenberg, J. Weickert:
+//      High accuracy optical flow estimation based on a theory for warping.
+//      ECCV 2004.
+//
+// Implementation by Mikhail Smirnov
+// email: msmirnov@nvidia.com, devsupport@nvidia.com
+//
+// Credits for help with the code to:
+// Alexey Mendelenko, Anton Obukhov, and Alexander Kharlamov.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _ncv_optical_flow_h_
+#define _ncv_optical_flow_h_
+
+#include "opencv2/gpulegacy/NCV.hpp"
+
+/// \brief Model and solver parameters
+struct NCVBroxOpticalFlowDescriptor
+{
+    /// flow smoothness
+    Ncv32f alpha;
+    /// gradient constancy importance
+    Ncv32f gamma;
+    /// pyramid scale factor
+    Ncv32f scale_factor;
+    /// number of lagged non-linearity iterations (inner loop)
+    Ncv32u number_of_inner_iterations;
+    /// number of warping iterations (number of pyramid levels)
+    Ncv32u number_of_outer_iterations;
+    /// number of linear system solver iterations
+    Ncv32u number_of_solver_iterations;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/// \brief Compute optical flow
+///
+/// Based on method by Brox et al [2004]
+/// \param [in]  desc              model and solver parameters
+/// \param [in]  gpu_mem_allocator GPU memory allocator
+/// \param [in]  frame0            source frame
+/// \param [in]  frame1            frame to track
+/// \param [out] u                 flow horizontal component (along \b x axis)
+/// \param [out] v                 flow vertical component (along \b y axis)
+/// \return                        computation status
+/////////////////////////////////////////////////////////////////////////////////////////
+
+CV_EXPORTS
+NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
+                             INCVMemAllocator &gpu_mem_allocator,
+                             const NCVMatrix<Ncv32f> &frame0,
+                             const NCVMatrix<Ncv32f> &frame1,
+                             NCVMatrix<Ncv32f> &u,
+                             NCVMatrix<Ncv32f> &v,
+                             cudaStream_t stream);
+
+#endif
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVHaarObjectDetection.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVHaarObjectDetection.hpp
@@ -0,0 +1,461 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
+//
+// The algorithm and code are explained in the upcoming GPU Computing Gems
+// chapter in detail:
+//
+//   Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
+//   PDF URL placeholder
+//   email: aobukhov@nvidia.com, devsupport@nvidia.com
+//
+// Credits for help with the code to:
+// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _ncvhaarobjectdetection_hpp_
+#define _ncvhaarobjectdetection_hpp_
+
+#include "opencv2/gpulegacy/NCV.hpp"
+
+
+//==============================================================================
+//
+// Guaranteed size cross-platform classifier structures
+//
+//==============================================================================
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
+#else
+typedef Ncv32f Ncv32f_a;
+#endif
+
+struct HaarFeature64
+{
+    uint2 _ui2;
+
+#define HaarFeature64_CreateCheck_MaxRectField                  0xFF
+
+    __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
+    {
+        ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
+        ((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
+        ((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
+        ((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
+        ((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setWeight(Ncv32f weight)
+    {
+        ((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
+        return NCV_SUCCESS;
+    }
+
+    __device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
+    {
+        NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
+        *rectX = tmpRect.x;
+        *rectY = tmpRect.y;
+        *rectWidth = tmpRect.width;
+        *rectHeight = tmpRect.height;
+    }
+
+    __device__ __host__ Ncv32f getWeight(void)
+    {
+        return *(Ncv32f_a*)(&this->_ui2.y);
+    }
+};
+
+
+struct HaarFeatureDescriptor32
+{
+private:
+
+#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted        0x80000000
+#define HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf  0x40000000
+#define HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf 0x20000000
+#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures      0x1F
+#define HaarFeatureDescriptor32_NumFeatures_Shift               24
+#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset    0x00FFFFFF
+
+    Ncv32u desc;
+
+public:
+
+    __host__ NCVStatus create(NcvBool bTilted, NcvBool bLeftLeaf, NcvBool bRightLeaf,
+                              Ncv32u numFeatures, Ncv32u offsetFeatures)
+    {
+        if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
+        {
+            return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
+        }
+        if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
+        {
+            return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
+        }
+        this->desc = 0;
+        this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
+        this->desc |= (bLeftLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf : 0);
+        this->desc |= (bRightLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf : 0);
+        this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
+        this->desc |= offsetFeatures;
+        return NCV_SUCCESS;
+    }
+
+    __device__ __host__ NcvBool isTilted(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
+    }
+
+    __device__ __host__ NcvBool isLeftNodeLeaf(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf) != 0;
+    }
+
+    __device__ __host__ NcvBool isRightNodeLeaf(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf) != 0;
+    }
+
+    __device__ __host__ Ncv32u getNumFeatures(void)
+    {
+        return (this->desc >> HaarFeatureDescriptor32_NumFeatures_Shift) & HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures;
+    }
+
+    __device__ __host__ Ncv32u getFeaturesOffset(void)
+    {
+        return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
+    }
+};
+
+struct HaarClassifierNodeDescriptor32
+{
+    uint1 _ui1;
+
+    __host__ NCVStatus create(Ncv32f leafValue)
+    {
+        *(Ncv32f_a *)&this->_ui1 = leafValue;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
+    {
+        this->_ui1.x = offsetHaarClassifierNode;
+        return NCV_SUCCESS;
+    }
+
+    __host__ Ncv32f getLeafValueHost(void)
+    {
+        return *(Ncv32f_a *)&this->_ui1.x;
+    }
+
+#ifdef __CUDACC__
+    __device__ Ncv32f getLeafValue(void)
+    {
+        return __int_as_float(this->_ui1.x);
+    }
+#endif
+
+    __device__ __host__ Ncv32u getNextNodeOffset(void)
+    {
+        return this->_ui1.x;
+    }
+};
+
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
+#else
+typedef Ncv32u Ncv32u_a;
+#endif
+
+struct HaarClassifierNode128
+{
+    uint4 _ui4;
+
+    __host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
+    {
+        this->_ui4.x = *(Ncv32u *)&f;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setThreshold(Ncv32f t)
+    {
+        this->_ui4.y = *(Ncv32u_a *)&t;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
+    {
+        this->_ui4.z = *(Ncv32u_a *)&nl;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
+    {
+        this->_ui4.w = *(Ncv32u_a *)&nr;
+        return NCV_SUCCESS;
+    }
+
+    __host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
+    {
+        return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
+    }
+
+    __host__ __device__ Ncv32f getThreshold(void)
+    {
+        return *(Ncv32f_a*)&this->_ui4.y;
+    }
+
+    __host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
+    {
+        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
+    }
+
+    __host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
+    {
+        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
+    }
+};
+
+
+struct HaarStage64
+{
+#define HaarStage64_Interpret_MaskRootNodes         0x0000FFFF
+#define HaarStage64_Interpret_MaskRootNodeOffset    0xFFFF0000
+#define HaarStage64_Interpret_ShiftRootNodeOffset   16
+
+    uint2 _ui2;
+
+    __host__ NCVStatus setStageThreshold(Ncv32f t)
+    {
+        this->_ui2.x = *(Ncv32u_a *)&t;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
+    {
+        if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
+        {
+            return NCV_HAAR_XML_LOADING_EXCEPTION;
+        }
+        this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
+    {
+        if (val > HaarStage64_Interpret_MaskRootNodes)
+        {
+            return NCV_HAAR_XML_LOADING_EXCEPTION;
+        }
+        this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
+        return NCV_SUCCESS;
+    }
+
+    __host__ __device__ Ncv32f getStageThreshold(void)
+    {
+        return *(Ncv32f_a*)&this->_ui2.x;
+    }
+
+    __host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
+    {
+        return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
+    }
+
+    __host__ __device__ Ncv32u getNumClassifierRootNodes(void)
+    {
+        return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
+    }
+};
+
+
+NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
+NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
+NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
+NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
+NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
+
+
+//==============================================================================
+//
+// Classifier cascade descriptor
+//
+//==============================================================================
+
+
+struct HaarClassifierCascadeDescriptor
+{
+    Ncv32u NumStages;
+    Ncv32u NumClassifierRootNodes;
+    Ncv32u NumClassifierTotalNodes;
+    Ncv32u NumFeatures;
+    NcvSize32u ClassifierSize;
+    NcvBool bNeedsTiltedII;
+    NcvBool bHasStumpsOnly;
+};
+
+
+//==============================================================================
+//
+// Functional interface
+//
+//==============================================================================
+
+
+enum
+{
+    NCVPipeObjDet_Default               = 0x000,
+    NCVPipeObjDet_UseFairImageScaling   = 0x001,
+    NCVPipeObjDet_FindLargestObject     = 0x002,
+    NCVPipeObjDet_VisualizeInPlace      = 0x004,
+};
+
+
+CV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
+                                                        NcvSize32u srcRoi,
+                                                        NCVVector<NcvRect32u> &d_dstRects,
+                                                        Ncv32u &dstNumRects,
+
+                                                        HaarClassifierCascadeDescriptor &haar,
+                                                        NCVVector<HaarStage64> &h_HaarStages,
+                                                        NCVVector<HaarStage64> &d_HaarStages,
+                                                        NCVVector<HaarClassifierNode128> &d_HaarNodes,
+                                                        NCVVector<HaarFeature64> &d_HaarFeatures,
+
+                                                        NcvSize32u minObjSize,
+                                                        Ncv32u minNeighbors,      //default 4
+                                                        Ncv32f scaleStep,         //default 1.2f
+                                                        Ncv32u pixelStep,         //default 1
+                                                        Ncv32u flags,             //default NCVPipeObjDet_Default
+
+                                                        INCVMemAllocator &gpuAllocator,
+                                                        INCVMemAllocator &cpuAllocator,
+                                                        cudaDeviceProp &devProp,
+                                                        cudaStream_t cuStream);
+
+
+#define OBJDET_MASK_ELEMENT_INVALID_32U     0xFFFFFFFF
+#define HAAR_STDDEV_BORDER                  1
+
+
+CV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
+                                                           NCVMatrix<Ncv32f> &d_weights,
+                                                           NCVMatrixAlloc<Ncv32u> &d_pixelMask,
+                                                           Ncv32u &numDetections,
+                                                           HaarClassifierCascadeDescriptor &haar,
+                                                           NCVVector<HaarStage64> &h_HaarStages,
+                                                           NCVVector<HaarStage64> &d_HaarStages,
+                                                           NCVVector<HaarClassifierNode128> &d_HaarNodes,
+                                                           NCVVector<HaarFeature64> &d_HaarFeatures,
+                                                           NcvBool bMaskElements,
+                                                           NcvSize32u anchorsRoi,
+                                                           Ncv32u pixelStep,
+                                                           Ncv32f scaleArea,
+                                                           INCVMemAllocator &gpuAllocator,
+                                                           INCVMemAllocator &cpuAllocator,
+                                                           cudaDeviceProp &devProp,
+                                                           cudaStream_t cuStream);
+
+
+CV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
+                                                         NCVMatrix<Ncv32f> &h_weights,
+                                                         NCVMatrixAlloc<Ncv32u> &h_pixelMask,
+                                                         Ncv32u &numDetections,
+                                                         HaarClassifierCascadeDescriptor &haar,
+                                                         NCVVector<HaarStage64> &h_HaarStages,
+                                                         NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                                         NCVVector<HaarFeature64> &h_HaarFeatures,
+                                                         NcvBool bMaskElements,
+                                                         NcvSize32u anchorsRoi,
+                                                         Ncv32u pixelStep,
+                                                         Ncv32f scaleArea);
+
+
+#define RECT_SIMILARITY_PROPORTION      0.2f
+
+
+CV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
+                                                     Ncv32u numPixelMaskDetections,
+                                                     NCVVector<NcvRect32u> &hypotheses,
+                                                     Ncv32u &totalDetections,
+                                                     Ncv32u totalMaxDetections,
+                                                     Ncv32u rectWidth,
+                                                     Ncv32u rectHeight,
+                                                     Ncv32f curScale,
+                                                     cudaStream_t cuStream);
+
+
+CV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
+                                                   Ncv32u numPixelMaskDetections,
+                                                   NCVVector<NcvRect32u> &hypotheses,
+                                                   Ncv32u &totalDetections,
+                                                   Ncv32u totalMaxDetections,
+                                                   Ncv32u rectWidth,
+                                                   Ncv32u rectHeight,
+                                                   Ncv32f curScale);
+
+
+CV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const cv::String &filename, Ncv32u &numStages,
+                                               Ncv32u &numNodes, Ncv32u &numFeatures);
+
+
+CV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const cv::String &filename,
+                                               HaarClassifierCascadeDescriptor &haar,
+                                               NCVVector<HaarStage64> &h_HaarStages,
+                                               NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                               NCVVector<HaarFeature64> &h_HaarFeatures);
+
+
+CV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
+                                             HaarClassifierCascadeDescriptor haar,
+                                             NCVVector<HaarStage64> &h_HaarStages,
+                                             NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                             NCVVector<HaarFeature64> &h_HaarFeatures);
+
+
+
+#endif // _ncvhaarobjectdetection_hpp_
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
@@ -0,0 +1,109 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _ncvpyramid_hpp_
+#define _ncvpyramid_hpp_
+
+#include <memory>
+#include <vector>
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/core/cuda/common.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace pyramid
+    {
+        template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+        template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    }
+}}}
+
+#if 0 //def _WIN32
+
+template <class T>
+class CV_EXPORTS NCVMatrixStack
+{
+public:
+    NCVMatrixStack() {this->_arr.clear();}
+    ~NCVMatrixStack()
+    {
+        const Ncv32u nElem = this->_arr.size();
+        for (Ncv32u i=0; i<nElem; i++)
+        {
+            pop_back();
+        }
+    }
+    void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
+    void pop_back() {this->_arr.pop_back();}
+    NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
+private:
+    std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
+};
+
+
+template <class T>
+class CV_EXPORTS NCVImagePyramid
+{
+public:
+
+    NCVImagePyramid(const NCVMatrix<T> &img,
+                    Ncv8u nLayers,
+                    INCVMemAllocator &alloc,
+                    cudaStream_t cuStream);
+    ~NCVImagePyramid();
+    NcvBool isInitialized() const;
+    NCVStatus getLayer(NCVMatrix<T> &outImg,
+                       NcvSize32u outRoi,
+                       NcvBool bTrilinear,
+                       cudaStream_t cuStream) const;
+
+private:
+
+    NcvBool _isInitialized;
+    const NCVMatrix<T> *layer0;
+    NCVMatrixStack<T> pyramid;
+    Ncv32u nLayers;
+};
+
+#endif //_WIN32
+
+#endif //_ncvpyramid_hpp_
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NPP_staging.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NPP_staging.hpp
@@ -0,0 +1,907 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _npp_staging_hpp_
+#define _npp_staging_hpp_
+
+#include "opencv2/gpulegacy/NCV.hpp"
+
+
+/**
+* \file NPP_staging.hpp
+* NPP Staging Library
+*/
+
+
+/** \defgroup core_npp NPPST Core
+ * Basic functions for CUDA streams management.
+ * @{
+ */
+
+
+/**
+ * Gets an active CUDA stream used by NPPST
+ * NOT THREAD SAFE
+ * \return Current CUDA stream
+ */
+CV_EXPORTS
+cudaStream_t nppStGetActiveCUDAstream();
+
+
+/**
+ * Sets an active CUDA stream used by NPPST
+ * NOT THREAD SAFE
+ * \param cudaStream        [IN] cudaStream CUDA stream to become current
+ * \return CUDA stream used before
+ */
+CV_EXPORTS
+cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
+
+
+/*@}*/
+
+
+/** \defgroup nppi NPPST Image Processing
+* @{
+*/
+
+
+/** Border type
+ *
+ * Filtering operations assume that each pixel has a neighborhood of pixels.
+ * The following structure describes possible ways to define non-existent pixels.
+ */
+enum NppStBorderType
+{
+    nppStBorderNone   = 0, ///< There is no need to define additional pixels, image is extended already
+    nppStBorderClamp  = 1, ///< Clamp out of range position to borders
+    nppStBorderWrap   = 2, ///< Wrap out of range position. Image becomes periodic.
+    nppStBorderMirror = 3  ///< reflect out of range position across borders
+};
+
+
+/**
+ * Filter types for image resizing
+ */
+enum NppStInterpMode
+{
+    nppStSupersample, ///< Supersampling. For downscaling only
+    nppStBicubic      ///< Bicubic convolution filter, a = -0.5 (cubic Hermite spline)
+};
+
+
+/** Frame interpolation state
+ *
+ * This structure holds parameters required for frame interpolation.
+ * Forward displacement field is a per-pixel mapping from frame 0 to frame 1.
+ * Backward displacement field is a per-pixel mapping from frame 1 to frame 0.
+ */
+
+ struct NppStInterpolationState
+{
+    NcvSize32u size;      ///< frame size
+    Ncv32u nStep;         ///< pitch
+    Ncv32f pos;           ///< new frame position
+    Ncv32f *pSrcFrame0;   ///< frame 0
+    Ncv32f *pSrcFrame1;   ///< frame 1
+    Ncv32f *pFU;          ///< forward horizontal displacement
+    Ncv32f *pFV;          ///< forward vertical displacement
+    Ncv32f *pBU;          ///< backward horizontal displacement
+    Ncv32f *pBV;          ///< backward vertical displacement
+    Ncv32f *pNewFrame;    ///< new frame
+    Ncv32f *ppBuffers[6]; ///< temporary buffers
+};
+
+
+/** Size of a buffer required for interpolation.
+ *
+ * Requires several such buffers. See \see NppStInterpolationState.
+ *
+ * \param srcSize           [IN]  Frame size (both frames must be of the same size)
+ * \param nStep             [IN]  Frame line step
+ * \param hpSize            [OUT] Where to store computed size (host memory)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
+                                           Ncv32u nStep,
+                                           Ncv32u *hpSize);
+
+
+/** Interpolate frames (images) using provided optical flow (displacement field).
+ * 32-bit floating point images, single channel
+ *
+ * \param pState            [IN] structure containing all required parameters (host memory)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
+
+
+/** Row linear filter. 32-bit floating point image, single channel
+ *
+ * Apply horizontal linear filter
+ *
+ * \param pSrc              [IN]  Source image pointer (CUDA device memory)
+ * \param srcSize           [IN]  Source image size
+ * \param nSrcStep          [IN]  Source image line step
+ * \param pDst              [OUT] Destination image pointer (CUDA device memory)
+ * \param dstSize           [OUT] Destination image size
+ * \param oROI              [IN]  Region of interest in the source image
+ * \param borderType        [IN]  Type of border
+ * \param pKernel           [IN]  Pointer to row kernel values (CUDA device memory)
+ * \param nKernelSize       [IN]  Size of the kernel in pixels
+ * \param nAnchor           [IN]  The kernel row alignment with respect to the position of the input pixel
+ * \param multiplier        [IN]  Value by which the computed result is multiplied
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
+                                        NcvSize32u srcSize,
+                                        Ncv32u nSrcStep,
+                                        Ncv32f *pDst,
+                                        NcvSize32u dstSize,
+                                        Ncv32u nDstStep,
+                                        NcvRect32u oROI,
+                                        NppStBorderType borderType,
+                                        const Ncv32f *pKernel,
+                                        Ncv32s nKernelSize,
+                                        Ncv32s nAnchor,
+                                        Ncv32f multiplier);
+
+
+/** Column linear filter. 32-bit floating point image, single channel
+ *
+ * Apply vertical linear filter
+ *
+ * \param pSrc              [IN]  Source image pointer (CUDA device memory)
+ * \param srcSize           [IN]  Source image size
+ * \param nSrcStep          [IN]  Source image line step
+ * \param pDst              [OUT] Destination image pointer (CUDA device memory)
+ * \param dstSize           [OUT] Destination image size
+ * \param oROI              [IN]  Region of interest in the source image
+ * \param borderType        [IN]  Type of border
+ * \param pKernel           [IN]  Pointer to column kernel values (CUDA device memory)
+ * \param nKernelSize       [IN]  Size of the kernel in pixels
+ * \param nAnchor           [IN]  The kernel column alignment with respect to the position of the input pixel
+ * \param multiplier        [IN]  Value by which the computed result is multiplied
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
+                                           NcvSize32u srcSize,
+                                           Ncv32u nSrcStep,
+                                           Ncv32f *pDst,
+                                           NcvSize32u dstSize,
+                                           Ncv32u nDstStep,
+                                           NcvRect32u oROI,
+                                           NppStBorderType borderType,
+                                           const Ncv32f *pKernel,
+                                           Ncv32s nKernelSize,
+                                           Ncv32s nAnchor,
+                                           Ncv32f multiplier);
+
+
+/** Size of buffer required for vector image warping.
+ *
+ * \param srcSize           [IN]  Source image size
+ * \param nStep             [IN]  Source image line step
+ * \param hpSize            [OUT] Where to store computed size (host memory)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStVectorWarpGetBufferSize(NcvSize32u srcSize,
+                                        Ncv32u nSrcStep,
+                                        Ncv32u *hpSize);
+
+
+/** Warp image using provided 2D vector field and 1x1 point spread function.
+ * 32-bit floating point image, single channel
+ *
+ * During warping pixels from the source image may fall between pixels of the destination image.
+ * PSF (point spread function) describes how the source image pixel affects pixels of the destination.
+ * For 1x1 PSF only single pixel with the largest intersection is affected (similar to nearest interpolation).
+ *
+ * Destination image size and line step must be the same as the source image size and line step
+ *
+ * \param pSrc              [IN]  Source image pointer (CUDA device memory)
+ * \param srcSize           [IN]  Source image size
+ * \param nSrcStep          [IN]  Source image line step
+ * \param pU                [IN]  Pointer to horizontal displacement field (CUDA device memory)
+ * \param pV                [IN]  Pointer to vertical displacement field (CUDA device memory)
+ * \param nVFStep           [IN]  Displacement field line step
+ * \param timeScale         [IN]  Value by which displacement field will be scaled for warping
+ * \param pDst              [OUT] Destination image pointer (CUDA device memory)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
+                                         NcvSize32u srcSize,
+                                         Ncv32u nSrcStep,
+                                         const Ncv32f *pU,
+                                         const Ncv32f *pV,
+                                         Ncv32u nVFStep,
+                                         Ncv32f timeScale,
+                                         Ncv32f *pDst);
+
+
+/** Warp image using provided 2D vector field and 2x2 point spread function.
+ * 32-bit floating point image, single channel
+ *
+ * During warping pixels from the source image may fall between pixels of the destination image.
+ * PSF (point spread function) describes how the source image pixel affects pixels of the destination.
+ * For 2x2 PSF all four intersected pixels will be affected.
+ *
+ * Destination image size and line step must be the same as the source image size and line step
+ *
+ * \param pSrc              [IN]  Source image pointer (CUDA device memory)
+ * \param srcSize           [IN]  Source image size
+ * \param nSrcStep          [IN]  Source image line step
+ * \param pU                [IN]  Pointer to horizontal displacement field (CUDA device memory)
+ * \param pV                [IN]  Pointer to vertical displacement field (CUDA device memory)
+ * \param nVFStep           [IN]  Displacement field line step
+ * \param timeScale         [IN]  Value by which displacement field will be scaled for warping
+ * \param pDst              [OUT] Destination image pointer (CUDA device memory)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
+                                         NcvSize32u srcSize,
+                                         Ncv32u nSrcStep,
+                                         const Ncv32f *pU,
+                                         const Ncv32f *pV,
+                                         Ncv32u nVFStep,
+                                         Ncv32f *pBuffer,
+                                         Ncv32f timeScale,
+                                         Ncv32f *pDst);
+
+
+/** Resize. 32-bit floating point image, single channel
+ *
+ * Resizes image using specified filter (interpolation type)
+ *
+ * \param pSrc              [IN]  Source image pointer (CUDA device memory)
+ * \param srcSize           [IN]  Source image size
+ * \param nSrcStep          [IN]  Source image line step
+ * \param srcROI            [IN]  Source image region of interest
+ * \param pDst              [OUT] Destination image pointer (CUDA device memory)
+ * \param dstSize           [IN]  Destination image size
+ * \param nDstStep          [IN]  Destination image line step
+ * \param dstROI            [IN]  Destination image region of interest
+ * \param xFactor           [IN]  Row scale factor
+ * \param yFactor           [IN]  Column scale factor
+ * \param interpolation     [IN]  Interpolation type
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
+                               NcvSize32u srcSize,
+                               Ncv32u nSrcStep,
+                               NcvRect32u srcROI,
+                               Ncv32f *pDst,
+                               NcvSize32u dstSize,
+                               Ncv32u nDstStep,
+                               NcvRect32u dstROI,
+                               Ncv32f xFactor,
+                               Ncv32f yFactor,
+                               NppStInterpMode interpolation);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
+ *
+ * \param d_src             [IN] Source image pointer (CUDA device memory)
+ * \param srcStep           [IN] Source image line step
+ * \param d_dst             [OUT] Destination image pointer (CUDA device memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param srcRoi            [IN] Region of interest in the source image
+ * \param scale             [IN] Downsampling scale factor (positive integer)
+ * \param readThruTexture   [IN] Performance hint to cache source in texture (true) or read directly (false)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
+                                 Ncv32u *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
+ * \see nppiStDecimate_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
+                                 Ncv32s *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
+ * \see nppiStDecimate_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
+                                 Ncv32f *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
+* \see nppiStDecimate_32u_C1R
+*/
+CV_EXPORTS
+NCVStatus nppiStDecimate_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
+                                 Ncv64u *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
+ * \see nppiStDecimate_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
+                                 Ncv64s *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
+ * \see nppiStDecimate_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
+                                 Ncv64f *d_dst, Ncv32u dstStep,
+                                 NcvSize32u srcRoi, Ncv32u scale,
+                                 NcvBool readThruTexture);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
+ *
+ * \param h_src             [IN] Source image pointer (Host or pinned memory)
+ * \param srcStep           [IN] Source image line step
+ * \param h_dst             [OUT] Destination image pointer (Host or pinned memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param srcRoi            [IN] Region of interest in the source image
+ * \param scale             [IN] Downsampling scale factor (positive integer)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
+                                      Ncv32u *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
+ * \see nppiStDecimate_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
+                                      Ncv32s *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
+ * \see nppiStDecimate_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
+                                      Ncv32f *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
+ * \see nppiStDecimate_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
+                                      Ncv64u *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
+ * \see nppiStDecimate_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
+                                      Ncv64s *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
+ * \see nppiStDecimate_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStDecimate_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
+                                      Ncv64f *h_dst, Ncv32u dstStep,
+                                      NcvSize32u srcRoi, Ncv32u scale);
+
+
+/**
+ * Computes standard deviation for each rectangular region of the input image using integral images.
+ *
+ * \param d_sum             [IN] Integral image pointer (CUDA device memory)
+ * \param sumStep           [IN] Integral image line step
+ * \param d_sqsum           [IN] Squared integral image pointer (CUDA device memory)
+ * \param sqsumStep         [IN] Squared integral image line step
+ * \param d_norm            [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
+ * \param normStep          [IN] Stddev image line step
+ * \param roi               [IN] Region of interest in the source image
+ * \param rect              [IN] Rectangular region to calculate stddev over
+ * \param scaleArea         [IN] Multiplication factor to account decimated scale
+ * \param readThruTexture   [IN] Performance hint to cache source in texture (true) or read directly (false)
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
+                                   Ncv64u *d_sqsum, Ncv32u sqsumStep,
+                                   Ncv32f *d_norm, Ncv32u normStep,
+                                   NcvSize32u roi, NcvRect32u rect,
+                                   Ncv32f scaleArea, NcvBool readThruTexture);
+
+
+/**
+ * Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
+ *
+ * \param h_sum             [IN] Integral image pointer (Host or pinned memory)
+ * \param sumStep           [IN] Integral image line step
+ * \param h_sqsum           [IN] Squared integral image pointer (Host or pinned memory)
+ * \param sqsumStep         [IN] Squared integral image line step
+ * \param h_norm            [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
+ * \param normStep          [IN] Stddev image line step
+ * \param roi               [IN] Region of interest in the source image
+ * \param rect              [IN] Rectangular region to calculate stddev over
+ * \param scaleArea         [IN] Multiplication factor to account decimated scale
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
+                                        Ncv64u *h_sqsum, Ncv32u sqsumStep,
+                                        Ncv32f *h_norm, Ncv32u normStep,
+                                        NcvSize32u roi, NcvRect32u rect,
+                                        Ncv32f scaleArea);
+
+
+/**
+ * Transposes an image. 32-bit unsigned pixels, single channel
+ *
+ * \param d_src             [IN] Source image pointer (CUDA device memory)
+ * \param srcStride         [IN] Source image line step
+ * \param d_dst             [OUT] Destination image pointer (CUDA device memory)
+ * \param dstStride         [IN] Destination image line step
+ * \param srcRoi            [IN] Region of interest of the source image
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
+                                  Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 32-bit signed pixels, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
+                                  Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 32-bit float pixels, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
+                                  Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit unsigned pixels, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
+                                  Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit signed pixels, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
+                                  Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit float pixels, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
+                                  Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 128-bit pixels of any type, single channel
+ * \see nppiStTranspose_32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
+                                  void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
+ *
+ * \param h_src             [IN] Source image pointer (Host or pinned memory)
+ * \param srcStride         [IN] Source image line step
+ * \param h_dst             [OUT] Destination image pointer (Host or pinned memory)
+ * \param dstStride         [IN] Destination image line step
+ * \param srcRoi            [IN] Region of interest of the source image
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
+                                       Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 32-bit signed pixels, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
+                                       Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 32-bit float pixels, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
+                                       Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
+                                       Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit signed pixels, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
+                                       Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 64-bit float pixels, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
+                                       Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
+
+
+/**
+ * Transposes an image. 128-bit pixels of any type, single channel. Host implementation
+ * \see nppiStTranspose_32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
+                                       void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
+
+
+/**
+ * Calculates the size of the temporary buffer for integral image creation
+ *
+ * \param roiSize           [IN] Size of the input image
+ * \param pBufsize          [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Calculates the size of the temporary buffer for integral image creation
+ * \see nppiStIntegralGetSize_8u32u
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Creates an integral image representation for the input image
+ *
+ * \param d_src             [IN] Source image pointer (CUDA device memory)
+ * \param srcStep           [IN] Source image line step
+ * \param d_dst             [OUT] Destination integral image pointer (CUDA device memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param roiSize           [IN] Region of interest of the source image
+ * \param pBuffer           [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
+ * \param bufSize           [IN] Size of the pBuffer in bytes
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
+                                   Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
+                                   Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Creates an integral image representation for the input image
+ * \see nppiStIntegral_8u32u_C1R
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
+                                    Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
+                                    Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Creates an integral image representation for the input image. Host implementation
+ *
+ * \param h_src             [IN] Source image pointer (Host or pinned memory)
+ * \param srcStep           [IN] Source image line step
+ * \param h_dst             [OUT] Destination integral image pointer (Host or pinned memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param roiSize           [IN] Region of interest of the source image
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
+                                        Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
+
+
+/**
+ * Creates an integral image representation for the input image. Host implementation
+ * \see nppiStIntegral_8u32u_C1R_host
+ */
+CV_EXPORTS
+NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
+                                         Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
+
+
+/**
+ * Calculates the size of the temporary buffer for squared integral image creation
+ *
+ * \param roiSize           [IN] Size of the input image
+ * \param pBufsize          [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Creates a squared integral image representation for the input image
+ *
+ * \param d_src             [IN] Source image pointer (CUDA device memory)
+ * \param srcStep           [IN] Source image line step
+ * \param d_dst             [OUT] Destination squared integral image pointer (CUDA device memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param roiSize           [IN] Region of interest of the source image
+ * \param pBuffer           [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
+ * \param bufSize           [IN] Size of the pBuffer in bytes
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
+                                      Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
+                                      Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Creates a squared integral image representation for the input image. Host implementation
+ *
+ * \param h_src             [IN] Source image pointer (Host or pinned memory)
+ * \param srcStep           [IN] Source image line step
+ * \param h_dst             [OUT] Destination squared integral image pointer (Host or pinned memory)
+ * \param dstStep           [IN] Destination image line step
+ * \param roiSize           [IN] Region of interest of the source image
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
+                                           Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
+
+
+/*@}*/
+
+
+/** \defgroup npps NPPST Signal Processing
+* @{
+*/
+
+
+/**
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
+ *
+ * \param srcLen            [IN] Length of the input vector in elements
+ * \param pBufsize          [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
+ * \see nppsStCompactGetSize_32u
+ */
+NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit float values
+ * \see nppsStCompactGetSize_32u
+ */
+NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values
+ *
+ * \param d_src             [IN] Source vector pointer (CUDA device memory)
+ * \param srcLen            [IN] Source vector length
+ * \param d_dst             [OUT] Destination vector pointer (CUDA device memory)
+ * \param p_dstLen          [OUT] Pointer to the destination vector length (Pinned memory or NULL)
+ * \param elemRemove        [IN] The value to be removed
+ * \param pBuffer           [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
+ * \param bufSize           [IN] Size of the pBuffer in bytes
+ * \param devProp           [IN] CUDA device properties structure, containing texture alignment information
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
+                            Ncv32u *d_dst, Ncv32u *p_dstLen,
+                            Ncv32u elemRemove, Ncv8u *pBuffer,
+                            Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values
+ * \see nppsStCompact_32u
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
+                            Ncv32s *d_dst, Ncv32u *p_dstLen,
+                            Ncv32s elemRemove, Ncv8u *pBuffer,
+                            Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit float values
+ * \see nppsStCompact_32u
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
+                            Ncv32f *d_dst, Ncv32u *p_dstLen,
+                            Ncv32f elemRemove, Ncv8u *pBuffer,
+                            Ncv32u bufSize, cudaDeviceProp &devProp);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
+ *
+ * \param h_src             [IN] Source vector pointer (CUDA device memory)
+ * \param srcLen            [IN] Source vector length
+ * \param h_dst             [OUT] Destination vector pointer (CUDA device memory)
+ * \param dstLen            [OUT] Pointer to the destination vector length (can be NULL)
+ * \param elemRemove        [IN] The value to be removed
+ *
+ * \return NCV status code
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
+                                 Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
+ * \see nppsStCompact_32u_host
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
+                                 Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
+
+
+/**
+ * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
+ * \see nppsStCompact_32u_host
+ */
+CV_EXPORTS
+NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
+                                 Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
+
+
+/*@}*/
+
+
+#endif // _npp_staging_hpp_
--- a/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
@@ -0,0 +1,96 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
+#define __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "opencv2/core/gpu_private.hpp"
+
+#ifndef HAVE_CUDA
+#  error gpulegacy module requires CUDA
+#endif
+
+#include "opencv2/gpulegacy.hpp"
+
+namespace cv { namespace gpu
+{
+    class NppStStreamHandler
+    {
+    public:
+        inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
+        {
+            oldStream = nppStSetActiveCUDAstream(newStream);
+        }
+
+        inline ~NppStStreamHandler()
+        {
+            nppStSetActiveCUDAstream(oldStream);
+        }
+
+    private:
+        cudaStream_t oldStream;
+    };
+
+    CV_EXPORTS cv::String getNcvErrorMessage(int code);
+
+    static inline void checkNcvError(int err, const char* file, const int line, const char* func)
+    {
+        if (NCV_SUCCESS != err)
+        {
+            cv::String msg = getNcvErrorMessage(err);
+            cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
+        }
+    }
+}}
+
+#if defined(__GNUC__)
+    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, "")
+#endif
+
+#endif // __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__