temporary added NPP_staging, functionality from the library will be moved to NPP with next release.

This commit is contained in:
Anatoly Baksheev 2010-12-17 15:41:26 +00:00
parent e5c5a1cb3d
commit 9dd4a22a5e
12 changed files with 968 additions and 16 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

760
3rdparty/NPP_staging/npp_staging.h vendored Normal file
View File

@ -0,0 +1,760 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef _npp_staging_h_
#define _npp_staging_h_
/**
* \file npp_staging.h
* NPP Staging Library (will become part of NPP next release)
*/
#ifdef __cplusplus
/** \defgroup ctassert Compile-time assert functionality
* @{
*/
/**
* Compile-time assert namespace
*/
namespace NppStCTprep
{
template <bool x>
struct CT_ASSERT_FAILURE;
template <>
struct CT_ASSERT_FAILURE<true> {};
template <int x>
struct assertTest{};
}
#define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
#define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
/**
* Performs compile-time assertion of a condition on the file scope
*/
#define NPPST_CT_ASSERT(X) \
typedef NppStCTprep::assertTest<sizeof(NppStCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
/*@}*/
#endif
/** \defgroup typedefs NPP Integral and compound types of guaranteed size
* @{
*/
typedef bool NppStBool; ///< Bool of size less than integer
typedef long long NppSt64s; ///< 64-bit signed integer
typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer
typedef int NppSt32s; ///< 32-bit signed integer
typedef unsigned int NppSt32u; ///< 32-bit unsigned integer
typedef short NppSt16s; ///< 16-bit signed short
typedef unsigned short NppSt16u; ///< 16-bit unsigned short
typedef char NppSt8s; ///< 8-bit signed char
typedef unsigned char NppSt8u; ///< 8-bit unsigned char
typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float
typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float
/**
* 2D Rectangle, 8-bit unsigned fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect8u
{
NppSt8u x; ///< x-coordinate of upper left corner
NppSt8u y; ///< y-coordinate of upper left corner
NppSt8u width; ///< Rectangle width
NppSt8u height; ///< Rectangle height
#ifdef __cplusplus
NppStRect8u() : x(0), y(0), width(0), height(0) {};
NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Rectangle, 32-bit signed fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect32s
{
NppSt32s x; ///< x-coordinate of upper left corner
NppSt32s y; ///< y-coordinate of upper left corner
NppSt32s width; ///< Rectangle width
NppSt32s height; ///< Rectangle height
#ifdef __cplusplus
NppStRect32s() : x(0), y(0), width(0), height(0) {};
NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Rectangle, 32-bit unsigned fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect32u
{
NppSt32u x; ///< x-coordinate of upper left corner
NppSt32u y; ///< y-coordinate of upper left corner
NppSt32u width; ///< Rectangle width
NppSt32u height; ///< Rectangle height
#ifdef __cplusplus
NppStRect32u() : x(0), y(0), width(0), height(0) {};
NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Size, 32-bit signed fields
* This struct typically represents the size of a a rectangular region in two space
*/
struct NppStSize32s
{
NppSt32s width; ///< Rectangle width
NppSt32s height; ///< Rectangle height
#ifdef __cplusplus
NppStSize32s() : width(0), height(0) {};
NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {}
#endif
};
/**
* 2D Size, 32-bit unsigned fields
* This struct typically represents the size of a a rectangular region in two space
*/
struct NppStSize32u
{
NppSt32u width; ///< Rectangle width
NppSt32u height; ///< Rectangle height
#ifdef __cplusplus
NppStSize32u() : width(0), height(0) {};
NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {}
#endif
};
/**
* Error Status Codes
*
* Almost all NPP function return error-status information using
* these return codes.
* Negative return codes indicate errors, positive return codes indicate
* warnings, a return code of 0 indicates success.
*/
enum NppStStatus
{
//already present in NPP
/* NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
NPP_ERROR = -1, ///< Unknown error
NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error*/
//to be added
NPP_INVALID_ROI, ///< Invalid region of interest argument
NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
NPP_INVALID_SCALE, ///< Invalid scale parameter passed
NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error
};
/*@}*/
#ifdef __cplusplus
/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check
* @{
*/
NPPST_CT_ASSERT(sizeof(NppStBool) <= 4);
NPPST_CT_ASSERT(sizeof(NppSt64s) == 8);
NPPST_CT_ASSERT(sizeof(NppSt64u) == 8);
NPPST_CT_ASSERT(sizeof(NppSt32s) == 4);
NPPST_CT_ASSERT(sizeof(NppSt32u) == 4);
NPPST_CT_ASSERT(sizeof(NppSt16s) == 2);
NPPST_CT_ASSERT(sizeof(NppSt16u) == 2);
NPPST_CT_ASSERT(sizeof(NppSt8s) == 1);
NPPST_CT_ASSERT(sizeof(NppSt8u) == 1);
NPPST_CT_ASSERT(sizeof(NppSt32f) == 4);
NPPST_CT_ASSERT(sizeof(NppSt64f) == 8);
NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u));
NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s));
NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u));
NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u));
/*@}*/
#endif
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup core_npp NPP Core
* Basic functions for CUDA streams management.
* WARNING: These functions couldn't be exported from NPP_staging library, so they can't be used
* @{
*/
/**
* Gets an active CUDA stream used by NPP (Not an API yet!)
* \return Current CUDA stream
*/
cudaStream_t nppStGetActiveCUDAstream();
/**
* Sets an active CUDA stream used by NPP (Not an API yet!)
* \param cudaStream [IN] cudaStream CUDA stream to become current
* \return CUDA stream used before
*/
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
/*@}*/
/** \defgroup nppi NPP Image Processing
* @{
*/
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NPP status code
*/
NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep,
NppSt32u *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep,
NppSt32s *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
NppSt32f *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep,
NppSt64u *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep,
NppSt64s *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep,
NppSt64f *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
*
* \return NPP status code
*/
NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep,
NppSt32u *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep,
NppSt32s *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
NppSt32f *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep,
NppSt64u *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep,
NppSt64s *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep,
NppSt64f *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Computes standard deviation for each rectangular region of the input image using integral images.
*
* \param d_sum [IN] Integral image pointer (CUDA device memory)
* \param sumStep [IN] Integral image line step
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
* \param sqsumStep [IN] Squared integral image line step
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NPP status code
*/
NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep,
NppSt64u *d_sqsum, NppSt32u sqsumStep,
NppSt32f *d_norm, NppSt32u normStep,
NppStSize32u roi, NppStRect32u rect,
NppSt32f scaleArea, NppStBool readThruTexture);
/**
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
*
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
* \param sumStep [IN] Integral image line step
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
* \param sqsumStep [IN] Squared integral image line step
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
*
* \return NPP status code
*/
NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep,
NppSt64u *h_sqsum, NppSt32u sqsumStep,
NppSt32f *h_norm, NppSt32u normStep,
NppStSize32u roi, NppStRect32u rect,
NppSt32f scaleArea);
/**
* Transposes an image. 32-bit unsigned pixels, single channel
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStride [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride,
NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride,
NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride,
NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride,
NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride,
NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride,
NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStride [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride,
NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride,
NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride,
NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride,
NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride,
NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,
NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Calculates the size of the temporary buffer for integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Creates an integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,
NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates an integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/**
* Calculates the size of the temporary buffer for squared integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Creates a squared integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep,
NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates a squared integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/*@}*/
/** \defgroup npps NPP Signal Processing
* @{
*/
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
*
* \param srcLen [IN] Length of the input vector in elements
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
* \see nppsStCompactGetSize_32u
*/
NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
* \see nppsStCompactGetSize_32u
*/
NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
*
* \param d_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
* \param elemRemove [IN] The value to be removed
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen,
NppSt32u *d_dst, NppSt32u *p_dstLen,
NppSt32u elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values
* \see nppsStCompact_32u
*/
NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen,
NppSt32s *d_dst, NppSt32u *p_dstLen,
NppSt32s elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values
* \see nppsStCompact_32u
*/
NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen,
NppSt32f *d_dst, NppSt32u *p_dstLen,
NppSt32f elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
*
* \param h_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
* \param elemRemove [IN] The value to be removed
*
* \return NPP status code
*/
NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen,
NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
* \see nppsStCompact_32u_host
*/
NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen,
NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
* \see nppsStCompact_32u_host
*/
NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen,
NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove);
/*@}*/
#ifdef __cplusplus
}
#endif
#endif // _npp_staging_h_

View File

@ -1,15 +1,12 @@
set(name "gpu")
#"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
set(the_target "opencv_${name}")
project(${the_target})
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
add_definitions(-DCVAPI_EXPORTS)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
@ -43,7 +40,7 @@ if (HAVE_CUDA)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
find_package(NPP 3.2.16 REQUIRED)
message(STATUS "NPP detected: " ${NPP_VERSION})
include_directories(${CUDA_INCLUDE_DIRS} ${CUDA_NPP_INCLUDES})
if (UNIX OR APPLE)
@ -79,6 +76,11 @@ endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${cuda_objs})
IF (HAVE_CUDA)
include(FindNPP_staging.cmake)
include_directories(${NPPST_INC})
target_link_libraries(${the_target} ${NPPST_LIB})
endif()
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
@ -108,7 +110,7 @@ set_target_properties(${the_target} PROPERTIES
)
# Add the required libraries for linking:
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS})
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})

View File

@ -0,0 +1,24 @@
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(BIT_SUFF 32)
else()
set(BIT_SUFF 64)
endif()
if (APPLE)
set(PLATFORM_SUFF Darwin)
elseif (UNIX)
set(PLATFORM_SUFF Linux)
else()
set(PLATFORM_SUFF Windows)
endif()
set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1)
find_library(NPPST_LIB
NAMES "${LIB_FILE}" "lib${LIB_FILE}"
PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging"
DOC "NPP staging library"
)
SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging")

View File

@ -1305,8 +1305,67 @@ namespace cv
explicit BruteForceMatcher_GPU() : BruteForceMatcher_GPU_base(L2Dist) {}
explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {}
};
}
////////////////////////////////// CascadeClassifier //////////////////////////////////////////
// The cascade classifier class for object detection.
class CV_EXPORTS CascadeClassifier
{
public:
struct CV_EXPORTS DTreeNode
{
int featureIdx;
float threshold; // for ordered features only
int left;
int right;
};
struct CV_EXPORTS DTree
{
int nodeCount;
};
struct CV_EXPORTS Stage
{
int first;
int ntrees;
float threshold;
};
enum { BOOST = 0 };
enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
CascadeClassifier();
CascadeClassifier(const string& filename);
~CascadeClassifier();
bool empty() const;
bool load(const string& filename);
bool read(const FileNode& node);
void detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor=1.1,
int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size());
bool setImage( Ptr<FeatureEvaluator>&, const Mat& );
int runAt( Ptr<FeatureEvaluator>&, Point );
bool isStumpBased;
int stageType;
int featureType;
int ncategories;
Size origWinSize;
vector<Stage> stages;
vector<DTree> classifiers;
vector<DTreeNode> nodes;
vector<float> leaves;
vector<int> subsets;
Ptr<FeatureEvaluator> feval;
Ptr<CvHaarClassifierCascade> oldCascade;
};
}
//! Speckle filtering - filters small connected components on diparity image.
//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.

View File

@ -0,0 +1,110 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA)
cv::gpu::CascadeClassifier::CascadeClassifier() { throw_nogpu(); }
cv::gpu::CascadeClassifier::CascadeClassifier(const string&) { throw_nogpu(); }
cv::gpu::CascadeClassifier::~CascadeClassifier() { throw_nogpu(); }
bool cv::gpu::CascadeClassifier::empty() const { throw_nogpu(); return true; }
bool cv::gpu::CascadeClassifier::load(const string& filename) { throw_nogpu(); return true; }
bool cv::gpu::CascadeClassifier::read(const FileNode& node) { throw_nogpu(); return true; }
void cv::gpu::CascadeClassifier::detectMultiScale( const Mat&, vector<Rect>&, double, int, int, Size, Size) { throw_nogpu(); }
#else
cv::gpu::CascadeClassifier::CascadeClassifier()
{
}
cv::gpu::CascadeClassifier::CascadeClassifier(const string& filename)
{
}
cv::gpu::CascadeClassifier::~CascadeClassifier()
{
}
bool cv::gpu::CascadeClassifier::empty() const
{
int *a = (int*)&nppiStTranspose_32u_C1R;
return *a == 0xFFFFF;
return true;
}
bool cv::gpu::CascadeClassifier::load(const string& filename)
{
return true;
}
bool cv::gpu::CascadeClassifier::read(const FileNode& node)
{
return true;
}
void cv::gpu::CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor,
int minNeighbors, int flags, Size minSize, Size maxSize)
{
}
#endif

View File

@ -66,6 +66,7 @@
#include "cuda_runtime_api.h"
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "npp_staging.h"
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
#define NPP_MINIMUM_REQUIRED_VERSION 3216
@ -78,6 +79,7 @@
#error "Insufficient NPP version, please update it."
#endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
#else /* defined(HAVE_CUDA) */

View File

@ -55,12 +55,7 @@ struct CV_GpuStereoBMTest : public CvTest
void run_stress()
{
//cv::setBreakOnError(true);
int winsz[] = { 13, 15, 17, 19 };
int disps[] = { 128, 160, 192, 256};
Size res[] = { Size(1027, 768), Size(1280, 1024), Size(1600, 1152), Size(1920, 1080) };
{
RNG rng;
for(int i = 0; i < 10; ++i)