1) NPP_staging as sources. Binaries removed.
2) NVidia tests for GPU 3) FD sample that uses NVidia's interface.
This commit is contained in:
parent
811f6fbe92
commit
0747f2d863
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
784
3rdparty/NPP_staging/npp_staging.h
vendored
784
3rdparty/NPP_staging/npp_staging.h
vendored
@ -1,784 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
#ifndef _npp_staging_h_
|
||||
#define _npp_staging_h_
|
||||
|
||||
|
||||
/**
|
||||
* \file npp_staging.h
|
||||
* NPP Staging Library (will become part of NPP next release)
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
|
||||
/** \defgroup ctassert Compile-time assert functionality
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Compile-time assert namespace
|
||||
*/
|
||||
namespace NppStCTprep
|
||||
{
|
||||
template <bool x>
|
||||
struct CT_ASSERT_FAILURE;
|
||||
|
||||
template <>
|
||||
struct CT_ASSERT_FAILURE<true> {};
|
||||
|
||||
template <int x>
|
||||
struct assertTest{};
|
||||
}
|
||||
|
||||
|
||||
#define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
|
||||
#define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
|
||||
|
||||
|
||||
/**
|
||||
* Performs compile-time assertion of a condition on the file scope
|
||||
*/
|
||||
#define NPPST_CT_ASSERT(X) \
|
||||
typedef NppStCTprep::assertTest<sizeof(NppStCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
|
||||
NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/** \defgroup typedefs NPP Integral and compound types of guaranteed size
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
typedef bool NppStBool; ///< Bool of size less than integer
|
||||
typedef long long NppSt64s; ///< 64-bit signed integer
|
||||
typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer
|
||||
typedef int NppSt32s; ///< 32-bit signed integer
|
||||
typedef unsigned int NppSt32u; ///< 32-bit unsigned integer
|
||||
typedef short NppSt16s; ///< 16-bit signed short
|
||||
typedef unsigned short NppSt16u; ///< 16-bit unsigned short
|
||||
typedef char NppSt8s; ///< 8-bit signed char
|
||||
typedef unsigned char NppSt8u; ///< 8-bit unsigned char
|
||||
typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float
|
||||
typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float
|
||||
|
||||
|
||||
/**
|
||||
* 2D Rectangle, 8-bit unsigned fields
|
||||
* This struct contains position and size information of a rectangle in two space
|
||||
*/
|
||||
struct NppStRect8u
|
||||
{
|
||||
NppSt8u x; ///< x-coordinate of upper left corner
|
||||
NppSt8u y; ///< y-coordinate of upper left corner
|
||||
NppSt8u width; ///< Rectangle width
|
||||
NppSt8u height; ///< Rectangle height
|
||||
#ifdef __cplusplus
|
||||
NppStRect8u() : x(0), y(0), width(0), height(0) {};
|
||||
NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* 2D Rectangle, 32-bit signed fields
|
||||
* This struct contains position and size information of a rectangle in two space
|
||||
*/
|
||||
struct NppStRect32s
|
||||
{
|
||||
NppSt32s x; ///< x-coordinate of upper left corner
|
||||
NppSt32s y; ///< y-coordinate of upper left corner
|
||||
NppSt32s width; ///< Rectangle width
|
||||
NppSt32s height; ///< Rectangle height
|
||||
#ifdef __cplusplus
|
||||
NppStRect32s() : x(0), y(0), width(0), height(0) {};
|
||||
NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* 2D Rectangle, 32-bit unsigned fields
|
||||
* This struct contains position and size information of a rectangle in two space
|
||||
*/
|
||||
struct NppStRect32u
|
||||
{
|
||||
NppSt32u x; ///< x-coordinate of upper left corner
|
||||
NppSt32u y; ///< y-coordinate of upper left corner
|
||||
NppSt32u width; ///< Rectangle width
|
||||
NppSt32u height; ///< Rectangle height
|
||||
#ifdef __cplusplus
|
||||
NppStRect32u() : x(0), y(0), width(0), height(0) {};
|
||||
NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* 2D Size, 32-bit signed fields
|
||||
* This struct typically represents the size of a a rectangular region in two space
|
||||
*/
|
||||
struct NppStSize32s
|
||||
{
|
||||
NppSt32s width; ///< Rectangle width
|
||||
NppSt32s height; ///< Rectangle height
|
||||
#ifdef __cplusplus
|
||||
NppStSize32s() : width(0), height(0) {};
|
||||
NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* 2D Size, 32-bit unsigned fields
|
||||
* This struct typically represents the size of a a rectangular region in two space
|
||||
*/
|
||||
struct NppStSize32u
|
||||
{
|
||||
NppSt32u width; ///< Rectangle width
|
||||
NppSt32u height; ///< Rectangle height
|
||||
#ifdef __cplusplus
|
||||
NppStSize32u() : width(0), height(0) {};
|
||||
NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Error Status Codes
|
||||
*
|
||||
* Almost all NPP function return error-status information using
|
||||
* these return codes.
|
||||
* Negative return codes indicate errors, positive return codes indicate
|
||||
* warnings, a return code of 0 indicates success.
|
||||
*/
|
||||
enum NppStStatus
|
||||
{
|
||||
//already present in NPP
|
||||
__NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
|
||||
__NPP_ERROR = -1, ///< Unknown error
|
||||
__NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
|
||||
__NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
|
||||
__NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
|
||||
__NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
|
||||
__NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
|
||||
__NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error
|
||||
|
||||
//to be added
|
||||
NPP_INVALID_ROI, ///< Invalid region of interest argument
|
||||
NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
|
||||
NPP_INVALID_SCALE, ///< Invalid scale parameter passed
|
||||
NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
|
||||
NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
|
||||
NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error
|
||||
};
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
|
||||
/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check
|
||||
* @{
|
||||
*/
|
||||
NPPST_CT_ASSERT(sizeof(NppStBool) <= 4);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt64s) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt64u) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt32s) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt32u) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt16s) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt16u) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt8s) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt8u) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt32f) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(NppSt64f) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u));
|
||||
NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s));
|
||||
NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u));
|
||||
NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u));
|
||||
/*@}*/
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/** \defgroup core_npp NPP Core
|
||||
* Basic functions for CUDA streams management.
|
||||
* WARNING: These functions couldn't be exported into DLL, so they can be used only with static version of NPP_staging
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Gets an active CUDA stream used by NPP (Not an API yet!)
|
||||
* \return Current CUDA stream
|
||||
*/
|
||||
cudaStream_t nppStGetActiveCUDAstream();
|
||||
|
||||
|
||||
/**
|
||||
* Sets an active CUDA stream used by NPP (Not an API yet!)
|
||||
* \param cudaStream [IN] cudaStream CUDA stream to become current
|
||||
* \return CUDA stream used before
|
||||
*/
|
||||
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup nppi NPP Image Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep,
|
||||
NppSt32u *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep,
|
||||
NppSt32s *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
|
||||
NppSt32f *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep,
|
||||
NppSt64u *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep,
|
||||
NppSt64s *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep,
|
||||
NppSt64f *d_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale,
|
||||
NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep,
|
||||
NppSt32u *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep,
|
||||
NppSt32s *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
|
||||
NppSt32f *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep,
|
||||
NppSt64u *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep,
|
||||
NppSt64s *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep,
|
||||
NppSt64f *h_dst, NppSt32u dstStep,
|
||||
NppStSize32u srcRoi, NppSt32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images.
|
||||
*
|
||||
* \param d_sum [IN] Integral image pointer (CUDA device memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep,
|
||||
NppSt64u *d_sqsum, NppSt32u sqsumStep,
|
||||
NppSt32f *d_norm, NppSt32u normStep,
|
||||
NppStSize32u roi, NppStRect32u rect,
|
||||
NppSt32f scaleArea, NppStBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
|
||||
*
|
||||
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep,
|
||||
NppSt64u *h_sqsum, NppSt32u sqsumStep,
|
||||
NppSt32f *h_norm, NppSt32u normStep,
|
||||
NppStSize32u roi, NppStRect32u rect,
|
||||
NppSt32f scaleArea);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride,
|
||||
NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride,
|
||||
NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride,
|
||||
NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride,
|
||||
NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride,
|
||||
NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride,
|
||||
NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride,
|
||||
NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride,
|
||||
NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride,
|
||||
NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride,
|
||||
NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride,
|
||||
NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,
|
||||
NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
* \see nppiStIntegralGetSize_8u32u
|
||||
*/
|
||||
NppStStatus nppiStIntegralGetSize_32f32f(NppStSize32u roiSize, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,
|
||||
NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
* \see nppiStIntegral_8u32u_C1R
|
||||
*/
|
||||
NppStStatus nppiStIntegral_32f32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
|
||||
NppSt32f *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
|
||||
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
* \see nppiStIntegral_8u32u_C1R_host
|
||||
*/
|
||||
NppStStatus nppiStIntegral_32f32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
|
||||
NppSt32f *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for squared integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep,
|
||||
NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
|
||||
NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup npps NPP Signal Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
|
||||
*
|
||||
* \param srcLen [IN] Length of the input vector in elements
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
|
||||
*
|
||||
* \param d_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen,
|
||||
NppSt32u *d_dst, NppSt32u *p_dstLen,
|
||||
NppSt32u elemRemove,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen,
|
||||
NppSt32s *d_dst, NppSt32u *p_dstLen,
|
||||
NppSt32s elemRemove,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen,
|
||||
NppSt32f *d_dst, NppSt32u *p_dstLen,
|
||||
NppSt32f elemRemove,
|
||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
*
|
||||
* \return NPP status code
|
||||
*/
|
||||
NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen,
|
||||
NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen,
|
||||
NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen,
|
||||
NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif // _npp_staging_h_
|
@ -36,10 +36,12 @@ file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
||||
source_group("Device" FILES ${lib_device_hdrs})
|
||||
|
||||
if (HAVE_CUDA AND MSVC)
|
||||
file(GLOB ncv_srcs "src/nvidia/*.cpp")
|
||||
file(GLOB ncv_hdrs "src/nvidia/*.h*")
|
||||
file(GLOB ncv_cuda "src/nvidia/*.cu")
|
||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
|
||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
||||
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
|
||||
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
|
||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
|
||||
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
|
||||
endif()
|
||||
|
||||
if (HAVE_CUDA)
|
||||
@ -74,17 +76,13 @@ if (HAVE_CUDA)
|
||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||
endif()
|
||||
|
||||
|
||||
include(FindNPP_staging.cmake)
|
||||
include_directories(${NPPST_INC})
|
||||
endif()
|
||||
|
||||
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||
#CUDA_BUILD_CLEAN_TARGET()
|
||||
endif()
|
||||
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
|
||||
|
||||
if(PCHSupport_FOUND)
|
||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
||||
@ -117,8 +115,7 @@ set_target_properties(${the_target} PROPERTIES
|
||||
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
|
||||
|
||||
if (HAVE_CUDA)
|
||||
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
|
||||
target_link_libraries(${the_target} ${NPPST_LIB})
|
||||
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
|
||||
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
|
||||
endif()
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||
set(BIT_SUFF 32)
|
||||
else()
|
||||
set(BIT_SUFF 64)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
set(PLATFORM_SUFF Darwin)
|
||||
elseif (UNIX)
|
||||
set(PLATFORM_SUFF Linux)
|
||||
else()
|
||||
set(PLATFORM_SUFF Windows)
|
||||
endif()
|
||||
|
||||
set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1)
|
||||
|
||||
find_library(NPPST_LIB
|
||||
NAMES "${LIB_FILE}" "lib${LIB_FILE}"
|
||||
PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging"
|
||||
DOC "NPP staging library"
|
||||
)
|
||||
|
||||
SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging")
|
||||
|
@ -83,25 +83,25 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
|
||||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
|
||||
}
|
||||
else if (src.elemSize() == 4)
|
||||
{
|
||||
NppStSize32u sz;
|
||||
NcvSize32u sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<NppSt32u*>(src.ptr<NppSt32u>()), src.step,
|
||||
dst.ptr<NppSt32u>(), dst.step, sz) );
|
||||
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), src.step,
|
||||
dst.ptr<Ncv32u>(), dst.step, sz) );
|
||||
}
|
||||
else // if (src.elemSize() == 8)
|
||||
{
|
||||
NppStSize32u sz;
|
||||
NcvSize32u sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<NppSt64u*>(src.ptr<NppSt64u>()), src.step,
|
||||
dst.ptr<NppSt64u>(), dst.step, sz) );
|
||||
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), src.step,
|
||||
dst.ptr<Ncv64u>(), dst.step, sz) );
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
@ -126,7 +126,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||
minNeighbors,
|
||||
scaleStep, 1,
|
||||
flags,
|
||||
*gpuAllocator, *cpuAllocator, devProp.major, devProp.minor, 0);
|
||||
*gpuAllocator, *cpuAllocator, devProp, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
@ -146,8 +146,8 @@ private:
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
|
||||
|
||||
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
|
||||
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice);
|
||||
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned);
|
||||
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
|
||||
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
|
||||
|
||||
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
|
||||
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
|
||||
@ -212,7 +212,7 @@ private:
|
||||
roi.height = d_src.height();
|
||||
Ncv32u numDetections;
|
||||
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
|
||||
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp.major, devProp.minor, 0);
|
||||
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
|
||||
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
@ -560,16 +560,19 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
|
||||
|
||||
sum.create(src.rows + 1, src.cols + 1, CV_32S);
|
||||
|
||||
NppStSize32u roiSize;
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
NppSt32u bufSize;
|
||||
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
|
||||
|
||||
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step,
|
||||
sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );
|
||||
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step,
|
||||
sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
@ -600,19 +603,20 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8U);
|
||||
|
||||
NppStSize32u roiSize;
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
NppSt32u bufSize;
|
||||
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize));
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
||||
GpuMat buf(1, bufSize, CV_8U);
|
||||
|
||||
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
|
||||
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(
|
||||
const_cast<NppSt8u*>(src.ptr<NppSt8u>(0)), src.step,
|
||||
sqsum.ptr<NppSt64u>(0), sqsum.step, roiSize,
|
||||
buf.ptr<NppSt8u>(0), bufSize));
|
||||
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step,
|
||||
sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
@ -57,8 +57,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "npp.h"
|
||||
#include "NCV.hpp"
|
||||
#include "NPP_staging/NPP_staging.hpp"
|
||||
#include "NCVRuntimeTemplates.hpp"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
@ -970,8 +970,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleArea,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
ncvAssertReturn(d_integralImage.memType() == d_weights.memType() &&
|
||||
@ -1077,15 +1076,15 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleAreaPixels = scaleArea * ((haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER) *
|
||||
(haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER));
|
||||
|
||||
NcvBool bTexCacheCascade = devPropMajor < 2;
|
||||
NcvBool bTexCacheCascade = devProp.major < 2;
|
||||
NcvBool bTexCacheIImg = true; //this works better even on Fermi so far
|
||||
NcvBool bDoAtomicCompaction = devPropMajor >= 2 || (devPropMajor == 1 && devPropMinor >= 3);
|
||||
NcvBool bDoAtomicCompaction = devProp.major >= 2 || (devProp.major == 1 && devProp.minor >= 3);
|
||||
|
||||
NCVVector<Ncv32u> *d_ptrNowData = &d_vecPixelMask;
|
||||
NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;
|
||||
|
||||
Ncv32u szNppCompactTmpBuf;
|
||||
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf);
|
||||
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf, devProp);
|
||||
if (bDoAtomicCompaction)
|
||||
{
|
||||
szNppCompactTmpBuf = 0;
|
||||
@ -1185,11 +1184,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), d_vecPixelMask.length(),
|
||||
d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR);
|
||||
}
|
||||
numDetections = *hp_numDet;
|
||||
}
|
||||
@ -1240,11 +1239,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), d_vecPixelMask.length(),
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@ -1310,11 +1309,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@ -1371,11 +1370,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@ -1715,8 +1714,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
ncvAssertReturn(d_srcImg.memType() == d_dstRects.memType() &&
|
||||
@ -1773,12 +1771,12 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
NCVVectorAlloc<NcvRect32u> h_hypothesesIntermediate(cpuAllocator, d_srcImg.width() * d_srcImg.height());
|
||||
ncvAssertReturn(h_hypothesesIntermediate.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
NppStStatus nppStat;
|
||||
NCVStatus nppStat;
|
||||
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
|
||||
nppStat = nppiStIntegralGetSize_8u32u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
nppStat = nppiStSqrIntegralGetSize_8u64u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(gpuAllocator, std::max(szTmpBufIntegral, szTmpBufSqIntegral));
|
||||
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
@ -1786,15 +1784,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
nppStat = nppiStSqrIntegral_8u64u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufSqIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufSqIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
@ -1859,7 +1857,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
Ncv32u scale = scalesVector[i];
|
||||
|
||||
NcvSize32u srcRoi, scaledIIRoi, searchRoi;
|
||||
NppStSize32u srcIIRoi;
|
||||
NcvSize32u srcIIRoi;
|
||||
srcRoi.width = d_srcImg.width();
|
||||
srcRoi.height = d_srcImg.height();
|
||||
srcIIRoi.width = srcRoi.width + 1;
|
||||
@ -1875,15 +1873,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||
srcIIRoi, scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
nppStat = nppiStDownsampleNearest_64u_C1R(
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
||||
srcIIRoi, scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
const NppStRect32u rect(
|
||||
const NcvRect32u rect(
|
||||
HAAR_STDDEV_BORDER,
|
||||
HAAR_STDDEV_BORDER,
|
||||
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
|
||||
@ -1892,9 +1890,9 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
||||
d_rectStdDev.ptr(), d_rectStdDev.pitch(),
|
||||
NppStSize32u(searchRoi.width, searchRoi.height), rect,
|
||||
NcvSize32u(searchRoi.width, searchRoi.height), rect,
|
||||
(Ncv32f)scale*scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
@ -1904,8 +1902,8 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
detectionsOnThisScale,
|
||||
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
|
||||
searchRoi, pixelStep, (Ncv32f)scale*scale,
|
||||
gpuAllocator, cpuAllocator, devPropMajor, devPropMinor, cuStream);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
gpuAllocator, cpuAllocator, devProp, cuStream);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
@ -2250,6 +2248,10 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
return ncvStat;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &numHypotheses,
|
||||
Ncv32u minNeighbors,
|
||||
@ -2539,7 +2541,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvHaarStoreNVBIN_host(std::string &filename,
|
||||
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||
|
@ -75,13 +75,13 @@ struct HaarFeature64
|
||||
|
||||
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
|
||||
|
||||
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
|
||||
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
|
||||
{
|
||||
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
|
||||
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
|
||||
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
|
||||
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
|
||||
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
|
||||
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
|
||||
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
|
||||
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
|
||||
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
@ -306,11 +306,11 @@ struct HaarStage64
|
||||
};
|
||||
|
||||
|
||||
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||
NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -347,7 +347,7 @@ enum
|
||||
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
||||
};
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
NcvSize32u srcRoi,
|
||||
NCVVector<NcvRect32u> &d_dstRects,
|
||||
@ -367,15 +367,14 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
||||
#define HAAR_STDDEV_BORDER 1
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
||||
NCVMatrix<Ncv32f> &d_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||
@ -391,11 +390,10 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleArea,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||
NCVMatrix<Ncv32f> &h_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
||||
@ -409,7 +407,7 @@ NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||
Ncv32u pixelStep,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@ -419,7 +417,7 @@ NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||
Ncv8u color,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@ -429,7 +427,7 @@ NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||
Ncv32u color,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@ -438,7 +436,7 @@ NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||
Ncv32u numRects,
|
||||
Ncv8u color);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@ -450,7 +448,7 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||
|
||||
#define RECT_SIMILARITY_PROPORTION 0.2f
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
@ -461,7 +459,7 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32f curScale,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
@ -471,18 +469,18 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u rectHeight,
|
||||
Ncv32f curScale);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &numHypotheses,
|
||||
Ncv32u minNeighbors,
|
||||
Ncv32f intersectEps,
|
||||
NCVVector<Ncv32u> *hypothesesWeights);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
|
||||
Ncv32u &numNodes, Ncv32u &numFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
@ -490,6 +488,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
|
1704
modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
Normal file
1704
modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
Normal file
File diff suppressed because it is too large
Load Diff
637
modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
Normal file
637
modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
Normal file
@ -0,0 +1,637 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _npp_staging_hpp_
|
||||
#define _npp_staging_hpp_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
/**
|
||||
* \file NPP_staging.hpp
|
||||
* NPP Staging Library
|
||||
*/
|
||||
|
||||
|
||||
/** \defgroup core_npp NPPST Core
|
||||
* Basic functions for CUDA streams management.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Gets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \return Current CUDA stream
|
||||
*/
|
||||
cudaStream_t nppStGetActiveCUDAstream();
|
||||
|
||||
|
||||
/**
|
||||
* Sets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \param cudaStream [IN] cudaStream CUDA stream to become current
|
||||
* \return CUDA stream used before
|
||||
*/
|
||||
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup nppi NPPST Image Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
||||
Ncv32s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
||||
Ncv64s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
||||
Ncv64f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
|
||||
Ncv32s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
|
||||
Ncv64s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
|
||||
Ncv64f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images.
|
||||
*
|
||||
* \param d_sum [IN] Integral image pointer (CUDA device memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
|
||||
Ncv64u *d_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *d_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea, NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
|
||||
*
|
||||
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
|
||||
Ncv64u *h_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *h_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
|
||||
Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
|
||||
Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
|
||||
Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
|
||||
Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
|
||||
Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
|
||||
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
|
||||
Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
|
||||
Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
|
||||
Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
|
||||
Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
|
||||
Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
|
||||
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
* \see nppiStIntegralGetSize_8u32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
* \see nppiStIntegral_8u32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
* \see nppiStIntegral_8u32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for squared integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup npps NPPST Signal Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
|
||||
*
|
||||
* \param srcLen [IN] Length of the input vector in elements
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
|
||||
*
|
||||
* \param d_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
|
||||
Ncv32u *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32u elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
|
||||
Ncv32s *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32s elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
|
||||
Ncv32f *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32f elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
|
||||
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
|
||||
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
|
||||
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#endif // _npp_staging_hpp_
|
@ -40,15 +40,13 @@
|
||||
//M*/
|
||||
|
||||
|
||||
#include <precomp.hpp>
|
||||
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
||||
#include <ios>
|
||||
#include <stdarg.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
@ -94,17 +92,6 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
|
||||
//==============================================================================
|
||||
|
||||
|
||||
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
|
||||
{
|
||||
int curDev;
|
||||
cudaDeviceProp curProp;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
|
||||
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
|
||||
{
|
||||
Ncv32u alignMask = alignment-1;
|
||||
@ -216,7 +203,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
|
||||
}
|
||||
|
||||
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
@ -229,17 +216,26 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
|
||||
|
||||
allocBegin = NULL;
|
||||
|
||||
switch (memT)
|
||||
if (reusePtr == NULL)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
allocBegin = (Ncv8u *)malloc(capacity);
|
||||
break;
|
||||
bReusesMemory = false;
|
||||
switch (memT)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
allocBegin = (Ncv8u *)malloc(capacity);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bReusesMemory = true;
|
||||
allocBegin = (Ncv8u *)reusePtr;
|
||||
}
|
||||
|
||||
if (capacity == 0)
|
||||
@ -260,18 +256,23 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
|
||||
if (allocBegin != NULL)
|
||||
{
|
||||
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
|
||||
switch (_memType)
|
||||
|
||||
if (!bReusesMemory)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(allocBegin);
|
||||
break;
|
||||
switch (_memType)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(allocBegin);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
allocBegin = NULL;
|
||||
}
|
||||
}
|
||||
@ -356,14 +357,14 @@ size_t NCVMemStackAllocator::maxSize(void) const
|
||||
//===================================================================
|
||||
|
||||
|
||||
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
|
||||
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
_memType(memT)
|
||||
_memType(memT),
|
||||
_alignment(alignment)
|
||||
{
|
||||
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
|
||||
}
|
||||
|
||||
|
@ -42,8 +42,49 @@
|
||||
#ifndef _ncv_hpp_
|
||||
#define _ncv_hpp_
|
||||
|
||||
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__)
|
||||
#define NCV_EXPORTS __declspec(dllexport)
|
||||
#else
|
||||
#define NCV_EXPORTS
|
||||
#endif
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "npp_staging.h"
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Compile-time assert functionality
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
/**
|
||||
* Compile-time assert namespace
|
||||
*/
|
||||
namespace NcvCTprep
|
||||
{
|
||||
template <bool x>
|
||||
struct CT_ASSERT_FAILURE;
|
||||
|
||||
template <>
|
||||
struct CT_ASSERT_FAILURE<true> {};
|
||||
|
||||
template <int x>
|
||||
struct assertTest{};
|
||||
}
|
||||
|
||||
|
||||
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
|
||||
#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
|
||||
|
||||
|
||||
/**
|
||||
* Performs compile-time assertion of a condition on the file scope
|
||||
*/
|
||||
#define NCV_CT_ASSERT(X) \
|
||||
typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
|
||||
NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
|
||||
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -82,62 +123,72 @@ typedef float Ncv32f;
|
||||
typedef double Ncv64f;
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect8u
|
||||
{
|
||||
Ncv8u x;
|
||||
Ncv8u y;
|
||||
Ncv8u width;
|
||||
Ncv8u height;
|
||||
} NcvRect8u;
|
||||
NcvRect8u() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect32s
|
||||
{
|
||||
Ncv32s x; ///< x-coordinate of upper left corner.
|
||||
Ncv32s y; ///< y-coordinate of upper left corner.
|
||||
Ncv32s width; ///< Rectangle width.
|
||||
Ncv32s height; ///< Rectangle height.
|
||||
} NcvRect32s;
|
||||
NcvRect32s() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect32u
|
||||
{
|
||||
Ncv32u x; ///< x-coordinate of upper left corner.
|
||||
Ncv32u y; ///< y-coordinate of upper left corner.
|
||||
Ncv32u width; ///< Rectangle width.
|
||||
Ncv32u height; ///< Rectangle height.
|
||||
} NcvRect32u;
|
||||
NcvRect32u() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvSize32s
|
||||
{
|
||||
Ncv32s width; ///< Rectangle width.
|
||||
Ncv32s height; ///< Rectangle height.
|
||||
} NcvSize32s;
|
||||
NcvSize32s() : width(0), height(0) {};
|
||||
NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvSize32u
|
||||
{
|
||||
Ncv32u width; ///< Rectangle width.
|
||||
Ncv32u height; ///< Rectangle height.
|
||||
} NcvSize32u;
|
||||
NcvSize32u() : width(0), height(0) {};
|
||||
NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
|
||||
NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
|
||||
NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
|
||||
NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
|
||||
NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -162,13 +213,13 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
|
||||
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
|
||||
|
||||
|
||||
void ncvDebugOutput(const char *msg, ...);
|
||||
NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);
|
||||
|
||||
|
||||
typedef void NCVDebugOutputHandler(const char* msg);
|
||||
|
||||
|
||||
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
|
||||
|
||||
#define ncvAssertPrintCheck(pred, msg) \
|
||||
@ -222,6 +273,7 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
*/
|
||||
enum NCVStatus
|
||||
{
|
||||
//NCV statuses
|
||||
NCV_SUCCESS,
|
||||
|
||||
NCV_CUDA_ERROR,
|
||||
@ -257,6 +309,24 @@ enum NCVStatus
|
||||
NCV_NOIMPL_HAAR_TILTED_FEATURES,
|
||||
|
||||
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
|
||||
|
||||
//NPP statuses
|
||||
NPPST_SUCCESS = NCV_SUCCESS, ///< Successful operation (same as NPP_NO_ERROR)
|
||||
NPPST_ERROR, ///< Unknown error
|
||||
NPPST_CUDA_KERNEL_EXECUTION_ERROR, ///< CUDA kernel execution error
|
||||
NPPST_NULL_POINTER_ERROR, ///< NULL pointer argument error
|
||||
NPPST_TEXTURE_BIND_ERROR, ///< CUDA texture binding error or non-zero offset returned
|
||||
NPPST_MEMCPY_ERROR, ///< CUDA memory copy error
|
||||
NPPST_MEM_ALLOC_ERR, ///< CUDA memory allocation error
|
||||
NPPST_MEMFREE_ERR, ///< CUDA memory deallocation error
|
||||
|
||||
//NPPST statuses
|
||||
NPPST_INVALID_ROI, ///< Invalid region of interest argument
|
||||
NPPST_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
|
||||
NPPST_INVALID_SCALE, ///< Invalid scale parameter passed
|
||||
NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
|
||||
NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
|
||||
NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
|
||||
};
|
||||
|
||||
|
||||
@ -285,11 +355,11 @@ enum NCVStatus
|
||||
|
||||
typedef struct _NcvTimer *NcvTimer;
|
||||
|
||||
NcvTimer ncvStartTimer(void);
|
||||
NCV_EXPORTS NcvTimer ncvStartTimer(void);
|
||||
|
||||
double ncvEndQueryTimerUs(NcvTimer t);
|
||||
NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
|
||||
|
||||
double ncvEndQueryTimerMs(NcvTimer t);
|
||||
NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@ -299,16 +369,10 @@ double ncvEndQueryTimerMs(NcvTimer t);
|
||||
//==============================================================================
|
||||
|
||||
|
||||
/**
|
||||
* Alignment of GPU memory chunks in bytes
|
||||
*/
|
||||
NCVStatus GPUAlignmentValue(Ncv32u &alignment);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the aligned top bound value
|
||||
*/
|
||||
Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
|
||||
NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
|
||||
|
||||
|
||||
/**
|
||||
@ -326,7 +390,7 @@ enum NCVMemoryType
|
||||
/**
|
||||
* NCVMemPtr
|
||||
*/
|
||||
struct NCVMemPtr
|
||||
struct NCV_EXPORTS NCVMemPtr
|
||||
{
|
||||
void *ptr;
|
||||
NCVMemoryType memtype;
|
||||
@ -337,7 +401,7 @@ struct NCVMemPtr
|
||||
/**
|
||||
* NCVMemSegment
|
||||
*/
|
||||
struct NCVMemSegment
|
||||
struct NCV_EXPORTS NCVMemSegment
|
||||
{
|
||||
NCVMemPtr begin;
|
||||
size_t size;
|
||||
@ -348,7 +412,7 @@ struct NCVMemSegment
|
||||
/**
|
||||
* INCVMemAllocator (Interface)
|
||||
*/
|
||||
class INCVMemAllocator
|
||||
class NCV_EXPORTS INCVMemAllocator
|
||||
{
|
||||
public:
|
||||
virtual ~INCVMemAllocator() = 0;
|
||||
@ -370,7 +434,7 @@ inline INCVMemAllocator::~INCVMemAllocator() {}
|
||||
/**
|
||||
* NCVMemStackAllocator
|
||||
*/
|
||||
class NCVMemStackAllocator : public INCVMemAllocator
|
||||
class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
|
||||
{
|
||||
NCVMemStackAllocator();
|
||||
NCVMemStackAllocator(const NCVMemStackAllocator &);
|
||||
@ -378,7 +442,7 @@ class NCVMemStackAllocator : public INCVMemAllocator
|
||||
public:
|
||||
|
||||
explicit NCVMemStackAllocator(Ncv32u alignment);
|
||||
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);
|
||||
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
|
||||
virtual ~NCVMemStackAllocator();
|
||||
|
||||
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||
@ -400,17 +464,18 @@ private:
|
||||
Ncv8u *end;
|
||||
size_t currentSize;
|
||||
size_t _maxSize;
|
||||
NcvBool bReusesMemory;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* NCVMemNativeAllocator
|
||||
*/
|
||||
class NCVMemNativeAllocator : public INCVMemAllocator
|
||||
class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
|
||||
{
|
||||
public:
|
||||
|
||||
NCVMemNativeAllocator(NCVMemoryType memT);
|
||||
NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
|
||||
virtual ~NCVMemNativeAllocator();
|
||||
|
||||
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||
@ -438,9 +503,9 @@ private:
|
||||
/**
|
||||
* Copy dispatcher
|
||||
*/
|
||||
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||
const void *src, NCVMemoryType srcType,
|
||||
size_t sz, cudaStream_t cuStream);
|
||||
NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||
const void *src, NCVMemoryType srcType,
|
||||
size_t sz, cudaStream_t cuStream);
|
||||
|
||||
|
||||
/**
|
||||
@ -514,6 +579,7 @@ class NCVVectorAlloc : public NCVVector<T>
|
||||
{
|
||||
NCVVectorAlloc();
|
||||
NCVVectorAlloc(const NCVVectorAlloc &);
|
||||
NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
|
||||
|
||||
public:
|
||||
|
||||
@ -563,8 +629,7 @@ public:
|
||||
return allocatedMem;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
INCVMemAllocator &allocator;
|
||||
NCVMemSegment allocatedMem;
|
||||
};
|
||||
@ -707,7 +772,7 @@ class NCVMatrixAlloc : public NCVMatrix<T>
|
||||
{
|
||||
NCVMatrixAlloc();
|
||||
NCVMatrixAlloc(const NCVMatrixAlloc &);
|
||||
|
||||
NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
|
||||
public:
|
||||
|
||||
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
|
@ -1,3 +1,51 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncvruntimetemplates_hpp_
|
||||
#define _ncvruntimetemplates_hpp_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// The Loki Library
|
||||
// Copyright (c) 2001 by Andrei Alexandrescu
|
||||
@ -14,13 +62,6 @@
|
||||
// http://loki-lib.sourceforge.net/index.php?n=Main.License
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ncvruntimetemplates_hpp_
|
||||
#define _ncvruntimetemplates_hpp_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Loki
|
||||
{
|
||||
//==============================================================================
|
@ -68,51 +68,51 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
//! Read Write Traits
|
||||
|
||||
template <size_t src_elem_size, size_t dst_elem_size>
|
||||
struct UnReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
template <size_t src_elem_size, size_t dst_elem_size>
|
||||
struct UnReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
};
|
||||
template <typename T, typename D> struct UnReadWriteTraits
|
||||
{
|
||||
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T, shift>::vec_t read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_t write_type;
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
};
|
||||
template <typename T, typename D> struct UnReadWriteTraits
|
||||
{
|
||||
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T, shift>::vec_t read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_t write_type;
|
||||
};
|
||||
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
|
||||
struct BinReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
|
||||
struct BinReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
|
||||
{
|
||||
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T1, shift>::vec_t read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_t read_type2;
|
||||
typedef typename TypeVec<D , shift>::vec_t write_type;
|
||||
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
|
||||
{
|
||||
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T1, shift>::vec_t read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_t read_type2;
|
||||
typedef typename TypeVec<D , shift>::vec_t write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
@ -122,14 +122,14 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
@ -138,18 +138,18 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
@ -158,22 +158,22 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
@ -182,65 +182,65 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
|
||||
{
|
||||
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
|
||||
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
|
||||
const int shift = UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src_.cols)
|
||||
{
|
||||
read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
|
||||
{
|
||||
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
|
||||
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
|
||||
const int shift = UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src_.cols)
|
||||
{
|
||||
read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
@ -255,44 +255,44 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
|
||||
const Mask mask, BinOp op)
|
||||
{
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
|
||||
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src1_.cols)
|
||||
{
|
||||
read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
|
||||
const Mask mask, BinOp op)
|
||||
{
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
|
||||
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src1_.cols)
|
||||
{
|
||||
read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@ -355,11 +355,11 @@ namespace cv
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
{
|
||||
const int shift = device::UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x * shift);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
@ -373,7 +373,7 @@ namespace cv
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, const Mask& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
{
|
||||
const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
@ -392,7 +392,7 @@ namespace cv
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
{
|
||||
TransformChooser<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
|
@ -69,9 +69,9 @@
|
||||
#include "cufft.h"
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
#include "npp.h"
|
||||
#include "npp_staging.h"
|
||||
|
||||
#include "nvidia/NCV.hpp"
|
||||
|
||||
#include "nvidia/core/NCV.hpp"
|
||||
#include "nvidia/NPP_staging/npp_staging.hpp"
|
||||
#include "nvidia/NCVHaarObjectDetection.hpp"
|
||||
|
||||
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
|
||||
|
@ -1378,7 +1378,7 @@ cmpEpsFlt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff, size_t
|
||||
{
|
||||
_Tp a = src1[i], b = src2[i];
|
||||
if( a < 0 ) a ^= C; if( b < 0 ) b ^= C;
|
||||
_Tp d = std::abs(a - b);
|
||||
_Tp d = std::abs(double(a - b));
|
||||
if( d > imaxdiff )
|
||||
{
|
||||
idx = i + startidx;
|
||||
|
@ -3,8 +3,8 @@
|
||||
#
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
add_subdirectory(c)
|
||||
add_subdirectory(cpp)
|
||||
#add_subdirectory(c)
|
||||
#add_subdirectory(cpp)
|
||||
add_subdirectory(gpu)
|
||||
|
||||
if(0)
|
||||
|
@ -14,10 +14,12 @@ if (BUILD_EXAMPLES)
|
||||
"${CMAKE_SOURCE_DIR}/modules/legacy/include"
|
||||
"${CMAKE_SOURCE_DIR}/modules/contrib/include"
|
||||
"${CMAKE_SOURCE_DIR}/modules/gpu/include"
|
||||
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
|
||||
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
|
||||
)
|
||||
|
||||
if(HAVE_CUDA)
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core)
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
@ -1,60 +1,25 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma warning( disable : 4201 4408 4127 4100)
|
||||
#include <cstdio>
|
||||
|
||||
#include "cvconfig.h"
|
||||
#if !defined(HAVE_CUDA)
|
||||
int main( int argc, const char** argv ) { return printf("Please compile the librarary with CUDA support."), -1; }
|
||||
#else
|
||||
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#define CV_NO_BACKWARD_COMPATIBILITY
|
||||
|
||||
#include "opencv2/opencv.hpp"
|
||||
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
const Size preferredVideoFrameSize(640, 480);
|
||||
|
||||
string preferredClassifier = "haarcascade_frontalface_alt.xml";
|
||||
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
|
||||
const Size2i preferredVideoFrameSize(640, 480);
|
||||
|
||||
std::string preferredClassifier = "haarcascade_frontalface_alt.xml";
|
||||
std::string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
|
||||
|
||||
|
||||
void printSyntax(void)
|
||||
@ -62,7 +27,6 @@ void printSyntax(void)
|
||||
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");
|
||||
}
|
||||
|
||||
|
||||
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
|
||||
{
|
||||
int fontFace = CV_FONT_HERSHEY_PLAIN;
|
||||
@ -83,7 +47,6 @@ void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
|
||||
va_end(arg_ptr);
|
||||
}
|
||||
|
||||
|
||||
NCVStatus process(Mat *srcdst,
|
||||
Ncv32u width, Ncv32u height,
|
||||
NcvBool bShowAllHypotheses, NcvBool bLargestFace,
|
||||
@ -104,15 +67,16 @@ NCVStatus process(Mat *srcdst,
|
||||
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
|
||||
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
|
||||
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
|
||||
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
|
||||
{
|
||||
memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols);
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
(*srcdst).copyTo(h_src_hdr);
|
||||
|
||||
ncvStat = h_src.copySolid(d_src, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
@ -130,8 +94,9 @@ NCVStatus process(Mat *srcdst,
|
||||
haar.ClassifierSize,
|
||||
bShowAllHypotheses ? 0 : 4,
|
||||
1.2f, 1,
|
||||
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,
|
||||
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);
|
||||
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0)
|
||||
| NCVPipeObjDet_VisualizeInPlace,
|
||||
gpuAllocator, cpuAllocator, devProp, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
@ -141,14 +106,16 @@ NCVStatus process(Mat *srcdst,
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
h_src_hdr.copyTo(*srcdst);
|
||||
|
||||
for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
|
||||
{
|
||||
memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols);
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int main( int argc, const char** argv )
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
@ -160,14 +127,19 @@ int main( int argc, const char** argv )
|
||||
printf(" Space - Switch between NCV and OpenCV\n");
|
||||
printf(" L - Switch between FullSearch and LargestFace modes\n");
|
||||
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");
|
||||
|
||||
if (argc != 4 && argc != 1)
|
||||
return printSyntax(), -1;
|
||||
|
||||
|
||||
VideoCapture capture;
|
||||
Size frameSize;
|
||||
bool bQuit = false;
|
||||
|
||||
if (argc == 1 || strcmp(argv[1], "-c") == 0)
|
||||
Size2i frameSize;
|
||||
|
||||
if (argc != 4 && argc != 1)
|
||||
{
|
||||
printSyntax();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (argc == 1 || strcmp(argv[1], "-c") == 0)
|
||||
{
|
||||
// Camera input is specified
|
||||
int camIdx = (argc == 3) ? atoi(argv[2]) : 0;
|
||||
@ -192,14 +164,26 @@ int main( int argc, const char** argv )
|
||||
return printSyntax(), -1;
|
||||
|
||||
NcvBool bUseOpenCV = true;
|
||||
NcvBool bLargestFace = true;
|
||||
NcvBool bShowAllHypotheses = false;
|
||||
NcvBool bLargestFace = false; //LargestFace=true is used usually during training
|
||||
NcvBool bShowAllHypotheses = false;
|
||||
|
||||
string classifierFile = (argc == 1) ? preferredClassifier : argv[3];
|
||||
|
||||
CascadeClassifier classifierOpenCV;
|
||||
std::string classifierFile;
|
||||
if (argc == 1)
|
||||
{
|
||||
classifierFile = preferredClassifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
classifierFile.assign(argv[3]);
|
||||
}
|
||||
|
||||
if (!classifierOpenCV.load(classifierFile))
|
||||
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;
|
||||
{
|
||||
printf("Error (in OpenCV) opening classifier\n");
|
||||
printSyntax();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int devId;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
|
||||
@ -214,9 +198,9 @@ int main( int argc, const char** argv )
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);
|
||||
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
|
||||
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);
|
||||
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
|
||||
|
||||
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
|
||||
@ -278,32 +262,36 @@ int main( int argc, const char** argv )
|
||||
// Main processing loop
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
namedWindow(wndTitle, 1);
|
||||
|
||||
namedWindow(wndTitle, 1);
|
||||
Mat frame, gray, frameDisp;
|
||||
|
||||
for(;;)
|
||||
do
|
||||
{
|
||||
// For camera and video file, capture the next image
|
||||
// For camera and video file, capture the next image
|
||||
capture >> frame;
|
||||
if (frame.empty())
|
||||
break;
|
||||
|
||||
|
||||
Mat gray;
|
||||
cvtColor(frame, gray, CV_BGR2GRAY);
|
||||
|
||||
//
|
||||
// process
|
||||
//
|
||||
|
||||
NcvSize32u minSize = haar.ClassifierSize;
|
||||
if (bLargestFace)
|
||||
{
|
||||
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
|
||||
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
|
||||
Ncv32u ratioSmallest = std::min(ratioX, ratioY);
|
||||
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);
|
||||
ratioSmallest = std::max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1);
|
||||
minSize.width *= ratioSmallest;
|
||||
minSize.height *= ratioSmallest;
|
||||
}
|
||||
|
||||
|
||||
Ncv32f avgTime;
|
||||
NcvTimer timer = ncvStartTimer();
|
||||
|
||||
if (!bUseOpenCV)
|
||||
@ -324,15 +312,16 @@ int main( int argc, const char** argv )
|
||||
rectsOpenCV,
|
||||
1.2f,
|
||||
bShowAllHypotheses && !bLargestFace ? 0 : 4,
|
||||
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,
|
||||
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0)
|
||||
| CV_HAAR_SCALE_IMAGE,
|
||||
Size(minSize.width, minSize.height));
|
||||
|
||||
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
|
||||
rectangle(gray, rectsOpenCV[rt], Scalar(255));
|
||||
}
|
||||
|
||||
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
|
||||
|
||||
avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
|
||||
|
||||
cvtColor(gray, frameDisp, CV_GRAY2BGR);
|
||||
|
||||
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");
|
||||
@ -347,16 +336,25 @@ int main( int argc, const char** argv )
|
||||
case ' ':
|
||||
bUseOpenCV = !bUseOpenCV;
|
||||
break;
|
||||
case 'L':case 'l':
|
||||
case 'L':
|
||||
case 'l':
|
||||
bLargestFace = !bLargestFace;
|
||||
break;
|
||||
case 'U':case 'u':
|
||||
case 'U':
|
||||
case 'u':
|
||||
bShowAllHypotheses = !bShowAllHypotheses;
|
||||
break;
|
||||
case 27:
|
||||
return 0;
|
||||
bQuit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} while (!bQuit);
|
||||
|
||||
cvDestroyWindow(wndTitle.c_str());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -3,12 +3,15 @@
|
||||
# ----------------------------------------------------------------------------
|
||||
project(opencv_test_gpu)
|
||||
|
||||
set(the_target "opencv_test_gpu")
|
||||
|
||||
file(GLOB test_srcs "src/*.cpp")
|
||||
source_group("Src" FILES ${test_srcs})
|
||||
file(GLOB test_hdrs "src/*.h*")
|
||||
source_group("Src" FILES ${test_srcs})
|
||||
source_group("Include" FILES ${test_hdrs})
|
||||
|
||||
set(the_target "opencv_test_gpu")
|
||||
|
||||
|
||||
|
||||
include_directories (
|
||||
"${CMAKE_SOURCE_DIR}/include/opencv"
|
||||
@ -26,11 +29,21 @@ include_directories (
|
||||
"${CMAKE_SOURCE_DIR}/modules/ml/include"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/src"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
|
||||
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
|
||||
)
|
||||
|
||||
include_directories(../cxts)
|
||||
|
||||
add_executable(${the_target} ${test_srcs} ${test_hdrs})
|
||||
if(HAVE_CUDA)
|
||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
||||
|
||||
file(GLOB nvidia "src/nvidia/*.*")
|
||||
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
|
||||
source_group("Src\\nvidia" FILES ${nvidia})
|
||||
endif()
|
||||
|
||||
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
|
||||
|
||||
# Additional target properties
|
||||
set_target_properties(${the_target} PROPERTIES
|
||||
|
127
tests/gpu/src/nvidia/NCVAutoTestLister.hpp
Normal file
127
tests/gpu/src/nvidia/NCVAutoTestLister.hpp
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _ncvautotestlister_hpp_
|
||||
#define _ncvautotestlister_hpp_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
|
||||
class NCVAutoTestLister
|
||||
{
|
||||
public:
|
||||
|
||||
NCVAutoTestLister(std::string testSuiteName, NcvBool bStopOnFirstFail=false, NcvBool bCompactOutput=true)
|
||||
:
|
||||
testSuiteName(testSuiteName),
|
||||
bStopOnFirstFail(bStopOnFirstFail),
|
||||
bCompactOutput(bCompactOutput)
|
||||
{
|
||||
}
|
||||
|
||||
void add(INCVTest *test)
|
||||
{
|
||||
this->tests.push_back(test);
|
||||
}
|
||||
|
||||
void invoke()
|
||||
{
|
||||
Ncv32u nPassed = 0;
|
||||
Ncv32u nFailed = 0;
|
||||
Ncv32u nFailedMem = 0;
|
||||
|
||||
if (bCompactOutput)
|
||||
{
|
||||
printf("Test suite '%s' with %d tests\n",
|
||||
testSuiteName.c_str(),
|
||||
(int)(this->tests.size()));
|
||||
}
|
||||
|
||||
for (Ncv32u i=0; i<this->tests.size(); i++)
|
||||
{
|
||||
INCVTest &curTest = *tests[i];
|
||||
|
||||
NCVTestReport curReport;
|
||||
bool res = curTest.executeTest(curReport);
|
||||
|
||||
if (!bCompactOutput)
|
||||
{
|
||||
printf("Test %3i %16s; Consumed mem GPU = %8d, CPU = %8d; %s\n",
|
||||
i,
|
||||
curTest.getName().c_str(),
|
||||
curReport.statsNums["MemGPU"],
|
||||
curReport.statsNums["MemCPU"],
|
||||
curReport.statsText["rcode"].c_str());
|
||||
}
|
||||
|
||||
if (res)
|
||||
{
|
||||
nPassed++;
|
||||
if (bCompactOutput)
|
||||
{
|
||||
printf(".");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!curReport.statsText["rcode"].compare("FAILED"))
|
||||
{
|
||||
nFailed++;
|
||||
if (bCompactOutput)
|
||||
{
|
||||
printf("x");
|
||||
}
|
||||
if (bStopOnFirstFail)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nFailedMem++;
|
||||
if (bCompactOutput)
|
||||
{
|
||||
printf("m");
|
||||
}
|
||||
}
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
if (bCompactOutput)
|
||||
{
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n",
|
||||
testSuiteName.c_str(),
|
||||
(int)(this->tests.size()),
|
||||
nPassed,
|
||||
nFailedMem,
|
||||
nFailed);
|
||||
}
|
||||
|
||||
~NCVAutoTestLister()
|
||||
{
|
||||
for (Ncv32u i=0; i<this->tests.size(); i++)
|
||||
{
|
||||
delete tests[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
NcvBool bStopOnFirstFail;
|
||||
NcvBool bCompactOutput;
|
||||
std::string testSuiteName;
|
||||
std::vector<INCVTest *> tests;
|
||||
};
|
||||
|
||||
#endif // _ncvautotestlister_hpp_
|
211
tests/gpu/src/nvidia/NCVTest.hpp
Normal file
211
tests/gpu/src/nvidia/NCVTest.hpp
Normal file
@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _ncvtest_hpp_
|
||||
#define _ncvtest_hpp_
|
||||
|
||||
#pragma warning( disable : 4201 4408 4127 4100)
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "NPP_staging.hpp"
|
||||
|
||||
|
||||
struct NCVTestReport
|
||||
{
|
||||
std::map<std::string, Ncv32u> statsNums;
|
||||
std::map<std::string, std::string> statsText;
|
||||
};
|
||||
|
||||
|
||||
class INCVTest
|
||||
{
|
||||
public:
|
||||
virtual bool executeTest(NCVTestReport &report) = 0;
|
||||
virtual std::string getName() const = 0;
|
||||
};
|
||||
|
||||
|
||||
class NCVTestProvider : public INCVTest
|
||||
{
|
||||
public:
|
||||
|
||||
NCVTestProvider(std::string testName)
|
||||
:
|
||||
testName(testName)
|
||||
{
|
||||
int devId;
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&this->devProp, devId), "Error returned from cudaGetDeviceProperties", );
|
||||
}
|
||||
|
||||
virtual bool init() = 0;
|
||||
virtual bool process() = 0;
|
||||
virtual bool deinit() = 0;
|
||||
virtual bool toString(std::ofstream &strOut) = 0;
|
||||
|
||||
virtual std::string getName() const
|
||||
{
|
||||
return this->testName;
|
||||
}
|
||||
|
||||
virtual ~NCVTestProvider()
|
||||
{
|
||||
deinitMemory();
|
||||
}
|
||||
|
||||
virtual bool executeTest(NCVTestReport &report)
|
||||
{
|
||||
bool res;
|
||||
report.statsText["rcode"] = "FAILED";
|
||||
|
||||
res = initMemory(report);
|
||||
if (!res)
|
||||
{
|
||||
dumpToFile(report);
|
||||
deinitMemory();
|
||||
return false;
|
||||
}
|
||||
|
||||
res = init();
|
||||
if (!res)
|
||||
{
|
||||
dumpToFile(report);
|
||||
deinit();
|
||||
deinitMemory();
|
||||
return false;
|
||||
}
|
||||
|
||||
res = process();
|
||||
if (!res)
|
||||
{
|
||||
dumpToFile(report);
|
||||
deinit();
|
||||
deinitMemory();
|
||||
return false;
|
||||
}
|
||||
|
||||
res = deinit();
|
||||
if (!res)
|
||||
{
|
||||
dumpToFile(report);
|
||||
deinitMemory();
|
||||
return false;
|
||||
}
|
||||
|
||||
deinitMemory();
|
||||
|
||||
report.statsText["rcode"] = "Passed";
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
std::auto_ptr<INCVMemAllocator> allocatorGPU;
|
||||
std::auto_ptr<INCVMemAllocator> allocatorCPU;
|
||||
|
||||
private:
|
||||
|
||||
std::string testName;
|
||||
|
||||
bool initMemory(NCVTestReport &report)
|
||||
{
|
||||
this->allocatorGPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
|
||||
this->allocatorCPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
|
||||
|
||||
if (!this->allocatorGPU.get()->isInitialized() ||
|
||||
!this->allocatorCPU.get()->isInitialized())
|
||||
{
|
||||
report.statsText["rcode"] = "Memory FAILED";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->process())
|
||||
{
|
||||
report.statsText["rcode"] = "Memory FAILED";
|
||||
return false;
|
||||
}
|
||||
|
||||
Ncv32u maxGPUsize = (Ncv32u)this->allocatorGPU.get()->maxSize();
|
||||
Ncv32u maxCPUsize = (Ncv32u)this->allocatorCPU.get()->maxSize();
|
||||
|
||||
report.statsNums["MemGPU"] = maxGPUsize;
|
||||
report.statsNums["MemCPU"] = maxCPUsize;
|
||||
|
||||
this->allocatorGPU.reset(new NCVMemStackAllocator(NCVMemoryTypeDevice, maxGPUsize, devProp.textureAlignment));
|
||||
|
||||
this->allocatorCPU.reset(new NCVMemStackAllocator(NCVMemoryTypeHostPinned, maxCPUsize, devProp.textureAlignment));
|
||||
|
||||
if (!this->allocatorGPU.get()->isInitialized() ||
|
||||
!this->allocatorCPU.get()->isInitialized())
|
||||
{
|
||||
report.statsText["rcode"] = "Memory FAILED";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void deinitMemory()
|
||||
{
|
||||
this->allocatorGPU.reset();
|
||||
this->allocatorCPU.reset();
|
||||
}
|
||||
|
||||
void dumpToFile(NCVTestReport &report)
|
||||
{
|
||||
bool bReasonMem = (0 == report.statsText["rcode"].compare("Memory FAILED"));
|
||||
std::string fname = "TestDump_";
|
||||
fname += (bReasonMem ? "m_" : "") + this->testName + ".log";
|
||||
std::ofstream stream(fname.c_str(), std::ios::trunc | std::ios::out);
|
||||
if (!stream.is_open()) return;
|
||||
|
||||
stream << "NCV Test Failure Log: " << this->testName << std::endl;
|
||||
stream << "====================================================" << std::endl << std::endl;
|
||||
stream << "Test initialization report: " << std::endl;
|
||||
for (std::map<std::string,std::string>::iterator it=report.statsText.begin();
|
||||
it != report.statsText.end(); it++)
|
||||
{
|
||||
stream << it->first << "=" << it->second << std::endl;
|
||||
}
|
||||
for (std::map<std::string,Ncv32u>::iterator it=report.statsNums.begin();
|
||||
it != report.statsNums.end(); it++)
|
||||
{
|
||||
stream << it->first << "=" << it->second << std::endl;
|
||||
}
|
||||
stream << std::endl;
|
||||
|
||||
stream << "Test initialization parameters: " << std::endl;
|
||||
bool bSerializeRes = false;
|
||||
try
|
||||
{
|
||||
bSerializeRes = this->toString(stream);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
if (!bSerializeRes)
|
||||
{
|
||||
stream << "Couldn't retrieve object dump" << std::endl;
|
||||
}
|
||||
|
||||
stream.flush();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // _ncvtest_hpp_
|
161
tests/gpu/src/nvidia/NCVTestSourceProvider.hpp
Normal file
161
tests/gpu/src/nvidia/NCVTestSourceProvider.hpp
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _ncvtestsourceprovider_hpp_
|
||||
#define _ncvtestsourceprovider_hpp_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "NCV.hpp"
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
|
||||
|
||||
template <class T>
|
||||
class NCVTestSourceProvider
|
||||
{
|
||||
public:
|
||||
|
||||
NCVTestSourceProvider(Ncv32u seed, T rangeLow, T rangeHigh, Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
:
|
||||
bInit(false)
|
||||
{
|
||||
ncvAssertPrintReturn(rangeLow < rangeHigh, "NCVTestSourceProvider ctor:: Invalid range", );
|
||||
|
||||
int devId;
|
||||
cudaDeviceProp devProp;
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
|
||||
|
||||
//Ncv32u maxWpitch = alignUp(maxWidth * sizeof(T), devProp.textureAlignment);
|
||||
|
||||
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
|
||||
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), maxWidth, maxHeight));
|
||||
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
|
||||
|
||||
this->dataWidth = maxWidth;
|
||||
this->dataHeight = maxHeight;
|
||||
|
||||
srand(seed);
|
||||
|
||||
for (Ncv32u i=0; i<maxHeight; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<data.get()->stride(); j++)
|
||||
{
|
||||
data.get()->ptr()[i * data.get()->stride() + j] =
|
||||
(T)(((1.0 * rand()) / RAND_MAX) * (rangeHigh - rangeLow) + rangeLow);
|
||||
}
|
||||
}
|
||||
|
||||
this->bInit = true;
|
||||
}
|
||||
|
||||
NCVTestSourceProvider(std::string pgmFilename)
|
||||
:
|
||||
bInit(false)
|
||||
{
|
||||
ncvAssertPrintReturn(sizeof(T) == 1, "NCVTestSourceProvider ctor:: PGM constructor complies only with 8bit types", );
|
||||
|
||||
cv::Mat image = cv::imread(pgmFilename);
|
||||
ncvAssertPrintReturn(!image.empty(), "NCVTestSourceProvider ctor:: PGM file error", );
|
||||
|
||||
int devId;
|
||||
cudaDeviceProp devProp;
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
|
||||
|
||||
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
|
||||
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), image.cols, image.rows));
|
||||
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
|
||||
|
||||
this->dataWidth = image.cols;
|
||||
this->dataHeight = image.rows;
|
||||
|
||||
cv::Mat hdr(image.size(), CV_8UC1, data.get()->ptr(), data.get()->pitch());
|
||||
image.copyTo(hdr);
|
||||
|
||||
this->bInit = true;
|
||||
}
|
||||
|
||||
NcvBool fill(NCVMatrix<T> &dst)
|
||||
{
|
||||
ncvAssertReturn(this->isInit() &&
|
||||
dst.memType() == allocatorCPU.get()->memType(), false);
|
||||
|
||||
if (dst.width() == 0 || dst.height() == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
for (Ncv32u i=0; i<dst.height(); i++)
|
||||
{
|
||||
Ncv32u srcLine = i % this->dataHeight;
|
||||
|
||||
Ncv32u srcFullChunks = dst.width() / this->dataWidth;
|
||||
for (Ncv32u j=0; j<srcFullChunks; j++)
|
||||
{
|
||||
memcpy(dst.ptr() + i * dst.stride() + j * this->dataWidth,
|
||||
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
|
||||
this->dataWidth * sizeof(T));
|
||||
}
|
||||
|
||||
Ncv32u srcLastChunk = dst.width() % this->dataWidth;
|
||||
memcpy(dst.ptr() + i * dst.stride() + srcFullChunks * this->dataWidth,
|
||||
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
|
||||
srcLastChunk * sizeof(T));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
NcvBool fill(NCVVector<T> &dst)
|
||||
{
|
||||
ncvAssertReturn(this->isInit() &&
|
||||
dst.memType() == allocatorCPU.get()->memType(), false);
|
||||
|
||||
if (dst.length() == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Ncv32u srcLen = this->dataWidth * this->dataHeight;
|
||||
|
||||
Ncv32u srcFullChunks = (Ncv32u)dst.length() / srcLen;
|
||||
for (Ncv32u j=0; j<srcFullChunks; j++)
|
||||
{
|
||||
memcpy(dst.ptr() + j * srcLen, this->data.get()->ptr(), srcLen * sizeof(T));
|
||||
}
|
||||
|
||||
Ncv32u srcLastChunk = dst.length() % srcLen;
|
||||
memcpy(dst.ptr() + srcFullChunks * srcLen, this->data.get()->ptr(), srcLastChunk * sizeof(T));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
~NCVTestSourceProvider()
|
||||
{
|
||||
data.reset();
|
||||
allocatorCPU.reset();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
NcvBool isInit(void)
|
||||
{
|
||||
return this->bInit;
|
||||
}
|
||||
|
||||
NcvBool bInit;
|
||||
std::auto_ptr< INCVMemAllocator > allocatorCPU;
|
||||
std::auto_ptr< NCVMatrixAlloc<T> > data;
|
||||
Ncv32u dataWidth;
|
||||
Ncv32u dataHeight;
|
||||
};
|
||||
|
||||
#endif // _ncvtestsourceprovider_hpp_
|
129
tests/gpu/src/nvidia/TestCompact.cpp
Normal file
129
tests/gpu/src/nvidia/TestCompact.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestCompact.h"
|
||||
|
||||
|
||||
TestCompact::TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
length(length),
|
||||
badElem(badElem),
|
||||
badElemPercentage(badElemPercentage > 100 ? 100 : badElemPercentage)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestCompact::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "length=" << length << std::endl;
|
||||
strOut << "badElem=" << badElem << std::endl;
|
||||
strOut << "badElemPercentage=" << badElemPercentage << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestCompact::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestCompact::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->length);
|
||||
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
|
||||
NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->length);
|
||||
ncvAssertReturn(d_vecSrc.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<Ncv32u> h_vecDst(*this->allocatorCPU.get(), this->length);
|
||||
ncvAssertReturn(h_vecDst.isMemAllocated(), false);
|
||||
NCVVectorAlloc<Ncv32u> d_vecDst(*this->allocatorGPU.get(), this->length);
|
||||
ncvAssertReturn(d_vecDst.isMemAllocated(), false);
|
||||
NCVVectorAlloc<Ncv32u> h_vecDst_d(*this->allocatorCPU.get(), this->length);
|
||||
ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_vecSrc), false);
|
||||
for (Ncv32u i=0; i<this->length; i++)
|
||||
{
|
||||
Ncv32u tmp = (h_vecSrc.ptr()[i]) & 0xFF;
|
||||
tmp = tmp * 99 / 255;
|
||||
if (tmp < this->badElemPercentage)
|
||||
{
|
||||
h_vecSrc.ptr()[i] = this->badElem;
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
NCVVectorAlloc<Ncv32u> h_dstLen(*this->allocatorCPU.get(), 1);
|
||||
ncvAssertReturn(h_dstLen.isMemAllocated(), false);
|
||||
Ncv32u bufSize;
|
||||
ncvStat = nppsStCompactGetSize_32u(this->length, &bufSize, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
|
||||
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
|
||||
|
||||
Ncv32u h_outElemNum_h = 0;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
ncvStat = nppsStCompact_32u(d_vecSrc.ptr(), this->length,
|
||||
d_vecDst.ptr(), h_dstLen.ptr(), this->badElem,
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppsStCompact_32u_host(h_vecSrc.ptr(), this->length, h_vecDst.ptr(), &h_outElemNum_h, this->badElem);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (h_dstLen.ptr()[0] != h_outElemNum_h)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_outElemNum_h; i++)
|
||||
{
|
||||
if (h_vecDst.ptr()[i] != h_vecDst_d.ptr()[i])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestCompact::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
41
tests/gpu/src/nvidia/TestCompact.h
Normal file
41
tests/gpu/src/nvidia/TestCompact.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testhypothesescompact_h_
|
||||
#define _testhypothesescompact_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestCompact : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestCompact(const TestCompact&);
|
||||
TestCompact& operator=(const TestCompact&);
|
||||
|
||||
|
||||
NCVTestSourceProvider<Ncv32u> &src;
|
||||
Ncv32u length;
|
||||
Ncv32u badElem;
|
||||
Ncv32u badElemPercentage;
|
||||
};
|
||||
|
||||
#endif // _testhypothesescompact_h_
|
163
tests/gpu/src/nvidia/TestDrawRects.cpp
Normal file
163
tests/gpu/src/nvidia/TestDrawRects.cpp
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestDrawRects.h"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
TestDrawRects<T>::TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
|
||||
Ncv32u width, Ncv32u height, Ncv32u numRects, T color)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
src32u(src32u),
|
||||
width(width),
|
||||
height(height),
|
||||
numRects(numRects),
|
||||
color(color)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestDrawRects<T>::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "height=" << height << std::endl;
|
||||
strOut << "numRects=" << numRects << std::endl;
|
||||
strOut << "color=" << color << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestDrawRects<T>::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestDrawRects<T>::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img_d.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects);
|
||||
ncvAssertReturn(d_rects.isMemAllocated(), false);
|
||||
NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects);
|
||||
ncvAssertReturn(h_rects.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
//fill vector of rectangles with random rects covering the input
|
||||
NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment());
|
||||
ncvAssertReturn(h_rects_as32u.isMemReused(), false);
|
||||
ncvAssertReturn(this->src32u.fill(h_rects_as32u), false);
|
||||
for (Ncv32u i=0; i<this->numRects; i++)
|
||||
{
|
||||
h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2));
|
||||
h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2));
|
||||
h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x));
|
||||
h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y));
|
||||
}
|
||||
ncvStat = h_rects.copySolid(d_rects, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height,
|
||||
(NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0);
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv8u))
|
||||
{
|
||||
ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height,
|
||||
(NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
ncvStat = d_img.copySolid(h_img_d, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height,
|
||||
(NcvRect32u *)h_rects.ptr(), this->numRects, this->color);
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv8u))
|
||||
{
|
||||
ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height,
|
||||
(NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
//const Ncv64f relEPS = 0.005;
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++)
|
||||
{
|
||||
if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestDrawRects<T>::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class TestDrawRects<Ncv8u>;
|
||||
template class TestDrawRects<Ncv32u>;
|
44
tests/gpu/src/nvidia/TestDrawRects.h
Normal file
44
tests/gpu/src/nvidia/TestDrawRects.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testdrawrects_h_
|
||||
#define _testdrawrects_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
class TestDrawRects : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
|
||||
Ncv32u width, Ncv32u height, Ncv32u numRects, T color);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
|
||||
TestDrawRects(const TestDrawRects&);
|
||||
TestDrawRects& operator=(const TestDrawRects&);
|
||||
|
||||
NCVTestSourceProvider<T> &src;
|
||||
NCVTestSourceProvider<Ncv32u> &src32u;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
Ncv32u numRects;
|
||||
T color;
|
||||
};
|
||||
|
||||
#endif // _testdrawrects_h_
|
267
tests/gpu/src/nvidia/TestHaarCascadeApplication.cpp
Normal file
267
tests/gpu/src/nvidia/TestHaarCascadeApplication.cpp
Normal file
@ -0,0 +1,267 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include "TestHaarCascadeApplication.h"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
TestHaarCascadeApplication::TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
std::string cascadeName, Ncv32u width, Ncv32u height)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
cascadeName(cascadeName),
|
||||
width(width),
|
||||
height(height)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeApplication::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "cascadeName=" << cascadeName << std::endl;
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "height=" << height << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeApplication::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeApplication::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32u numStages, numNodes, numFeatures;
|
||||
|
||||
ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);
|
||||
ncvAssertReturn(h_HaarStages.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);
|
||||
ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);
|
||||
ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages);
|
||||
ncvAssertReturn(d_HaarStages.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes);
|
||||
ncvAssertReturn(d_HaarNodes.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures);
|
||||
ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false);
|
||||
|
||||
HaarClassifierCascadeDescriptor haar;
|
||||
haar.ClassifierSize.width = haar.ClassifierSize.height = 1;
|
||||
haar.bNeedsTiltedII = false;
|
||||
haar.NumClassifierRootNodes = numNodes;
|
||||
haar.NumClassifierTotalNodes = numNodes;
|
||||
haar.NumFeatures = numFeatures;
|
||||
haar.NumStages = numStages;
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false);
|
||||
ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false);
|
||||
ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
NcvSize32s srcRoi, srcIIRoi, searchRoi;
|
||||
srcRoi.width = this->width;
|
||||
srcRoi.height = this->height;
|
||||
srcIIRoi.width = srcRoi.width + 1;
|
||||
srcIIRoi.height = srcRoi.height + 1;
|
||||
searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width;
|
||||
searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height;
|
||||
if (searchRoi.width <= 0 || searchRoi.height <= 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
NcvSize32u searchRoiU(searchRoi.width, searchRoi.height);
|
||||
|
||||
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
|
||||
Ncv32u integralWidth = this->width + 1;
|
||||
Ncv32u integralHeight = this->height + 1;
|
||||
|
||||
NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
|
||||
ncvAssertReturn(d_integralImage.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
|
||||
ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
|
||||
ncvAssertReturn(h_integralImage.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
|
||||
ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_rectStdDev.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_pixelMask.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_rectStdDev.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_pixelMask.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height);
|
||||
ncvAssertReturn(d_hypotheses.isMemAllocated(), false);
|
||||
NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height);
|
||||
ncvAssertReturn(h_hypotheses.isMemAllocated(), false);
|
||||
|
||||
NCVStatus nppStat;
|
||||
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
|
||||
nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp);
|
||||
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
|
||||
nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp);
|
||||
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
|
||||
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral));
|
||||
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false);
|
||||
|
||||
Ncv32u detectionsOnThisScale_d = 0;
|
||||
Ncv32u detectionsOnThisScale_h = 0;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
NcvSize32u(d_img.width(), d_img.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp);
|
||||
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
|
||||
|
||||
nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
NcvSize32u(d_img.width(), d_img.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp);
|
||||
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
|
||||
|
||||
const NcvRect32u rect(
|
||||
HAAR_STDDEV_BORDER,
|
||||
HAAR_STDDEV_BORDER,
|
||||
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
|
||||
haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER);
|
||||
nppStat = nppiStRectStdDev_32f_C1R(
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
d_rectStdDev.ptr(), d_rectStdDev.pitch(),
|
||||
NcvSize32u(searchRoi.width, searchRoi.height), rect,
|
||||
1.0f, true);
|
||||
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = d_integralImage.copySolid(h_integralImage, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
for (Ncv32u i=0; i<searchRoiU.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<h_pixelMask.stride(); j++)
|
||||
{
|
||||
if (j<searchRoiU.width)
|
||||
{
|
||||
h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j;
|
||||
}
|
||||
else
|
||||
{
|
||||
h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U;
|
||||
}
|
||||
}
|
||||
}
|
||||
ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false);
|
||||
|
||||
Ncv32u fpu_oldcw, fpu_cw;
|
||||
_controlfp_s(&fpu_cw, 0, 0);
|
||||
fpu_oldcw = fpu_cw;
|
||||
_controlfp_s(&fpu_cw, _PC_24, _MCW_PC);
|
||||
ncvStat = ncvApplyHaarClassifierCascade_host(
|
||||
h_integralImage, h_rectStdDev, h_pixelMask,
|
||||
detectionsOnThisScale_h,
|
||||
haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false,
|
||||
searchRoiU, 1, 1.0f);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
_controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
int devId;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&devId), false);
|
||||
cudaDeviceProp devProp;
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), false);
|
||||
|
||||
ncvStat = ncvApplyHaarClassifierCascade_device(
|
||||
d_integralImage, d_rectStdDev, d_pixelMask,
|
||||
detectionsOnThisScale_d,
|
||||
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
|
||||
searchRoiU, 1, 1.0f,
|
||||
*this->allocatorGPU.get(), *this->allocatorCPU.get(),
|
||||
devProp, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false);
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
if (detectionsOnThisScale_d != detectionsOnThisScale_h)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d);
|
||||
for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++)
|
||||
{
|
||||
if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeApplication::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
41
tests/gpu/src/nvidia/TestHaarCascadeApplication.h
Normal file
41
tests/gpu/src/nvidia/TestHaarCascadeApplication.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testhaarcascadeapplication_h_
|
||||
#define _testhaarcascadeapplication_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestHaarCascadeApplication : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
std::string cascadeName, Ncv32u width, Ncv32u height);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestHaarCascadeApplication(const TestHaarCascadeApplication&);
|
||||
TestHaarCascadeApplication& operator=(const TestHaarCascadeApplication&);
|
||||
|
||||
|
||||
NCVTestSourceProvider<Ncv8u> &src;
|
||||
std::string cascadeName;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
};
|
||||
|
||||
#endif // _testhaarcascadeapplication_h_
|
123
tests/gpu/src/nvidia/TestHaarCascadeLoader.cpp
Normal file
123
tests/gpu/src/nvidia/TestHaarCascadeLoader.cpp
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestHaarCascadeLoader.h"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
TestHaarCascadeLoader::TestHaarCascadeLoader(std::string testName, std::string cascadeName)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
cascadeName(cascadeName)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeLoader::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "cascadeName=" << cascadeName << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeLoader::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeLoader::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32u numStages, numNodes, numFeatures;
|
||||
Ncv32u numStages_2 = 0, numNodes_2 = 0, numFeatures_2 = 0;
|
||||
|
||||
ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);
|
||||
ncvAssertReturn(h_HaarStages.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);
|
||||
ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);
|
||||
ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> h_HaarStages_2(*this->allocatorCPU.get(), numStages);
|
||||
ncvAssertReturn(h_HaarStages_2.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes_2(*this->allocatorCPU.get(), numNodes);
|
||||
ncvAssertReturn(h_HaarNodes_2.isMemAllocated(), false);
|
||||
NCVVectorAlloc<HaarFeature64> h_HaarFeatures_2(*this->allocatorCPU.get(), numFeatures);
|
||||
ncvAssertReturn(h_HaarFeatures_2.isMemAllocated(), false);
|
||||
|
||||
HaarClassifierCascadeDescriptor haar;
|
||||
HaarClassifierCascadeDescriptor haar_2;
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
const std::string testNvbinName = "test.nvbin";
|
||||
ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
ncvStat = ncvHaarStoreNVBIN_host(testNvbinName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
ncvStat = ncvHaarGetClassifierSize(testNvbinName, numStages_2, numNodes_2, numFeatures_2);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
ncvStat = ncvHaarLoadFromFile_host(testNvbinName, haar_2, h_HaarStages_2, h_HaarNodes_2, h_HaarFeatures_2);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
if (
|
||||
numStages_2 != numStages ||
|
||||
numNodes_2 != numNodes ||
|
||||
numFeatures_2 != numFeatures ||
|
||||
haar.NumStages != haar_2.NumStages ||
|
||||
haar.NumClassifierRootNodes != haar_2.NumClassifierRootNodes ||
|
||||
haar.NumClassifierTotalNodes != haar_2.NumClassifierTotalNodes ||
|
||||
haar.NumFeatures != haar_2.NumFeatures ||
|
||||
haar.ClassifierSize.width != haar_2.ClassifierSize.width ||
|
||||
haar.ClassifierSize.height != haar_2.ClassifierSize.height ||
|
||||
haar.bNeedsTiltedII != haar_2.bNeedsTiltedII ||
|
||||
haar.bHasStumpsOnly != haar_2.bHasStumpsOnly )
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
if (memcmp(h_HaarStages.ptr(), h_HaarStages_2.ptr(), haar.NumStages * sizeof(HaarStage64)) ||
|
||||
memcmp(h_HaarNodes.ptr(), h_HaarNodes_2.ptr(), haar.NumClassifierTotalNodes * sizeof(HaarClassifierNode128)) ||
|
||||
memcmp(h_HaarFeatures.ptr(), h_HaarFeatures_2.ptr(), haar.NumFeatures * sizeof(HaarFeature64)) )
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestHaarCascadeLoader::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
34
tests/gpu/src/nvidia/TestHaarCascadeLoader.h
Normal file
34
tests/gpu/src/nvidia/TestHaarCascadeLoader.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testhaarcascadeloader_h_
|
||||
#define _testhaarcascadeloader_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestHaarCascadeLoader : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestHaarCascadeLoader(std::string testName, std::string cascadeName);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
|
||||
std::string cascadeName;
|
||||
};
|
||||
|
||||
#endif // _testhaarcascadeloader_h_
|
176
tests/gpu/src/nvidia/TestHypothesesFilter.cpp
Normal file
176
tests/gpu/src/nvidia/TestHypothesesFilter.cpp
Normal file
@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestHypothesesFilter.h"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
TestHypothesesFilter::TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
numDstRects(numDstRects),
|
||||
minNeighbors(minNeighbors),
|
||||
eps(eps)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesFilter::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "numDstRects=" << numDstRects << std::endl;
|
||||
strOut << "minNeighbors=" << minNeighbors << std::endl;
|
||||
strOut << "eps=" << eps << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesFilter::init()
|
||||
{
|
||||
this->canvasWidth = 4096;
|
||||
this->canvasHeight = 4096;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool compareRects(const NcvRect32u &r1, const NcvRect32u &r2, Ncv32f eps)
|
||||
{
|
||||
double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
|
||||
return std::abs((Ncv32s)r1.x - (Ncv32s)r2.x) <= delta &&
|
||||
std::abs((Ncv32s)r1.y - (Ncv32s)r2.y) <= delta &&
|
||||
std::abs((Ncv32s)r1.x + (Ncv32s)r1.width - (Ncv32s)r2.x - (Ncv32s)r2.width) <= delta &&
|
||||
std::abs((Ncv32s)r1.y + (Ncv32s)r1.height - (Ncv32s)r2.y - (Ncv32s)r2.height) <= delta;
|
||||
}
|
||||
|
||||
|
||||
inline bool operator < (const NcvRect32u &a, const NcvRect32u &b)
|
||||
{
|
||||
return a.x < b.x;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesFilter::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
NCVVectorAlloc<Ncv32u> h_random32u(*this->allocatorCPU.get(), this->numDstRects * sizeof(NcvRect32u) / sizeof(Ncv32u));
|
||||
ncvAssertReturn(h_random32u.isMemAllocated(), false);
|
||||
|
||||
Ncv32u srcSlotSize = 2 * this->minNeighbors + 1;
|
||||
|
||||
NCVVectorAlloc<NcvRect32u> h_vecSrc(*this->allocatorCPU.get(), this->numDstRects*srcSlotSize);
|
||||
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
|
||||
NCVVectorAlloc<NcvRect32u> h_vecDst_groundTruth(*this->allocatorCPU.get(), this->numDstRects);
|
||||
ncvAssertReturn(h_vecDst_groundTruth.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorCPU.get()->isCounting());
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_random32u), false);
|
||||
Ncv32u randCnt = 0;
|
||||
Ncv64f randVal;
|
||||
|
||||
for (Ncv32u i=0; i<this->numDstRects; i++)
|
||||
{
|
||||
h_vecDst_groundTruth.ptr()[i].x = i * this->canvasWidth / this->numDstRects + this->canvasWidth / (this->numDstRects * 4);
|
||||
h_vecDst_groundTruth.ptr()[i].y = i * this->canvasHeight / this->numDstRects + this->canvasHeight / (this->numDstRects * 4);
|
||||
h_vecDst_groundTruth.ptr()[i].width = this->canvasWidth / (this->numDstRects * 2);
|
||||
h_vecDst_groundTruth.ptr()[i].height = this->canvasHeight / (this->numDstRects * 2);
|
||||
|
||||
Ncv32u numNeighbors = this->minNeighbors + 1 + (Ncv32u)(((1.0 * h_random32u.ptr()[i]) * (this->minNeighbors + 1)) / 0xFFFFFFFF);
|
||||
numNeighbors = (numNeighbors > srcSlotSize) ? srcSlotSize : numNeighbors;
|
||||
|
||||
//fill in strong hypotheses (2 * ((1.0 * randVal) / 0xFFFFFFFF) - 1)
|
||||
for (Ncv32u j=0; j<numNeighbors; j++)
|
||||
{
|
||||
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].x =
|
||||
h_vecDst_groundTruth.ptr()[i].x +
|
||||
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));
|
||||
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].y =
|
||||
h_vecDst_groundTruth.ptr()[i].y +
|
||||
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;
|
||||
}
|
||||
|
||||
//generate weak hypotheses (to be removed in processing)
|
||||
for (Ncv32u j=numNeighbors; j<srcSlotSize; j++)
|
||||
{
|
||||
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].x =
|
||||
this->canvasWidth + h_vecDst_groundTruth.ptr()[i].x +
|
||||
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));
|
||||
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].y =
|
||||
this->canvasHeight + h_vecDst_groundTruth.ptr()[i].y +
|
||||
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;
|
||||
h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;
|
||||
}
|
||||
}
|
||||
|
||||
//shuffle
|
||||
for (Ncv32u i=0; i<this->numDstRects*srcSlotSize-1; i++)
|
||||
{
|
||||
Ncv32u randVal = h_random32u.ptr()[randCnt++]; randCnt = randCnt % h_random32u.length();
|
||||
Ncv32u secondSwap = randVal % (this->numDstRects*srcSlotSize-1 - i);
|
||||
NcvRect32u tmp = h_vecSrc.ptr()[i + secondSwap];
|
||||
h_vecSrc.ptr()[i + secondSwap] = h_vecSrc.ptr()[i];
|
||||
h_vecSrc.ptr()[i] = tmp;
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
Ncv32u numHypothesesSrc = h_vecSrc.length();
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvStat = ncvFilterHypotheses_host(h_vecSrc, numHypothesesSrc, this->minNeighbors, this->eps, NULL);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//verification
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (numHypothesesSrc != this->numDstRects)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<NcvRect32u> tmpRects(numHypothesesSrc);
|
||||
memcpy(&tmpRects[0], h_vecSrc.ptr(), numHypothesesSrc * sizeof(NcvRect32u));
|
||||
std::sort(tmpRects.begin(), tmpRects.end());
|
||||
for (Ncv32u i=0; i<numHypothesesSrc && bLoopVirgin; i++)
|
||||
{
|
||||
if (!compareRects(tmpRects[i], h_vecDst_groundTruth.ptr()[i], this->eps))
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesFilter::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
44
tests/gpu/src/nvidia/TestHypothesesFilter.h
Normal file
44
tests/gpu/src/nvidia/TestHypothesesFilter.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testhypothesesfilter_h_
|
||||
#define _testhypothesesfilter_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestHypothesesFilter : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
|
||||
TestHypothesesFilter(const TestHypothesesFilter&);
|
||||
TestHypothesesFilter& operator=(const TestHypothesesFilter&);
|
||||
|
||||
NCVTestSourceProvider<Ncv32u> &src;
|
||||
Ncv32u numDstRects;
|
||||
Ncv32u minNeighbors;
|
||||
Ncv32f eps;
|
||||
|
||||
Ncv32u canvasWidth;
|
||||
Ncv32u canvasHeight;
|
||||
};
|
||||
|
||||
#endif // _testhypothesesfilter_h_
|
134
tests/gpu/src/nvidia/TestHypothesesGrow.cpp
Normal file
134
tests/gpu/src/nvidia/TestHypothesesGrow.cpp
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestHypothesesGrow.h"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
|
||||
TestHypothesesGrow::TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale,
|
||||
Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
rectWidth(rectWidth),
|
||||
rectHeight(rectHeight),
|
||||
rectScale(rectScale),
|
||||
maxLenSrc(maxLenSrc),
|
||||
lenSrc(lenSrc),
|
||||
maxLenDst(maxLenDst),
|
||||
lenDst(lenDst)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesGrow::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "rectWidth=" << rectWidth << std::endl;
|
||||
strOut << "rectHeight=" << rectHeight << std::endl;
|
||||
strOut << "rectScale=" << rectScale << std::endl;
|
||||
strOut << "maxLenSrc=" << maxLenSrc << std::endl;
|
||||
strOut << "lenSrc=" << lenSrc << std::endl;
|
||||
strOut << "maxLenDst=" << maxLenDst << std::endl;
|
||||
strOut << "lenDst=" << lenDst << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesGrow::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesGrow::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->maxLenSrc);
|
||||
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
|
||||
NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->maxLenSrc);
|
||||
ncvAssertReturn(d_vecSrc.isMemAllocated(), false);
|
||||
|
||||
NCVVectorAlloc<NcvRect32u> h_vecDst(*this->allocatorCPU.get(), this->maxLenDst);
|
||||
ncvAssertReturn(h_vecDst.isMemAllocated(), false);
|
||||
NCVVectorAlloc<NcvRect32u> d_vecDst(*this->allocatorGPU.get(), this->maxLenDst);
|
||||
ncvAssertReturn(d_vecDst.isMemAllocated(), false);
|
||||
NCVVectorAlloc<NcvRect32u> h_vecDst_d(*this->allocatorCPU.get(), this->maxLenDst);
|
||||
ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_vecSrc), false);
|
||||
memset(h_vecDst.ptr(), 0, h_vecDst.length() * sizeof(NcvRect32u));
|
||||
NCVVectorReuse<Ncv32u> h_vecDst_as32u(h_vecDst.getSegment(), lenDst * sizeof(NcvRect32u) / sizeof(Ncv32u));
|
||||
ncvAssertReturn(h_vecDst_as32u.isMemReused(), false);
|
||||
ncvAssertReturn(this->src.fill(h_vecDst_as32u), false);
|
||||
memcpy(h_vecDst_d.ptr(), h_vecDst.ptr(), h_vecDst.length() * sizeof(NcvRect32u));
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvStat = h_vecDst.copySolid(d_vecDst, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
Ncv32u h_outElemNum_d = 0;
|
||||
Ncv32u h_outElemNum_h = 0;
|
||||
NCV_SKIP_COND_BEGIN
|
||||
h_outElemNum_d = this->lenDst;
|
||||
ncvStat = ncvGrowDetectionsVector_device(d_vecSrc, this->lenSrc,
|
||||
d_vecDst, h_outElemNum_d, this->maxLenDst,
|
||||
this->rectWidth, this->rectHeight, this->rectScale, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
|
||||
|
||||
h_outElemNum_h = this->lenDst;
|
||||
ncvStat = ncvGrowDetectionsVector_host(h_vecSrc, this->lenSrc,
|
||||
h_vecDst, h_outElemNum_h, this->maxLenDst,
|
||||
this->rectWidth, this->rectHeight, this->rectScale);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (h_outElemNum_d != h_outElemNum_h)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (memcmp(h_vecDst.ptr(), h_vecDst_d.ptr(), this->maxLenDst * sizeof(NcvRect32u)))
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestHypothesesGrow::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
46
tests/gpu/src/nvidia/TestHypothesesGrow.h
Normal file
46
tests/gpu/src/nvidia/TestHypothesesGrow.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testhypothesesgrow_h_
|
||||
#define _testhypothesesgrow_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestHypothesesGrow : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
|
||||
Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale,
|
||||
Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestHypothesesGrow(const TestHypothesesGrow&);
|
||||
TestHypothesesGrow& operator=(const TestHypothesesGrow&);
|
||||
|
||||
|
||||
NCVTestSourceProvider<Ncv32u> &src;
|
||||
Ncv32u rectWidth;
|
||||
Ncv32u rectHeight;
|
||||
Ncv32f rectScale;
|
||||
Ncv32u maxLenSrc;
|
||||
Ncv32u lenSrc;
|
||||
Ncv32u maxLenDst;
|
||||
Ncv32u lenDst;
|
||||
};
|
||||
|
||||
#endif // _testhypothesesgrow_h_
|
185
tests/gpu/src/nvidia/TestIntegralImage.cpp
Normal file
185
tests/gpu/src/nvidia/TestIntegralImage.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "TestIntegralImage.h"
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
TestIntegralImage<T_in, T_out>::TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,
|
||||
Ncv32u width, Ncv32u height)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
width(width),
|
||||
height(height)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
bool TestIntegralImage<T_in, T_out>::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "sizeof(T_in)=" << sizeof(T_in) << std::endl;
|
||||
strOut << "sizeof(T_out)=" << sizeof(T_out) << std::endl;
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "height=" << height << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
bool TestIntegralImage<T_in, T_out>::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
bool TestIntegralImage<T_in, T_out>::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32u widthII = this->width + 1;
|
||||
Ncv32u heightII = this->height + 1;
|
||||
|
||||
NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
|
||||
ncvAssertReturn(d_imgII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
|
||||
ncvAssertReturn(h_imgII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII);
|
||||
ncvAssertReturn(h_imgII_d.isMemAllocated(), false);
|
||||
|
||||
Ncv32u bufSize;
|
||||
if (sizeof(T_in) == sizeof(Ncv8u))
|
||||
{
|
||||
ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
}
|
||||
else if (sizeof(T_in) == sizeof(Ncv32f))
|
||||
{
|
||||
ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
|
||||
}
|
||||
|
||||
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
|
||||
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
if (sizeof(T_in) == sizeof(Ncv8u))
|
||||
{
|
||||
ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv32u *)d_imgII.ptr(), d_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height),
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
}
|
||||
else if (sizeof(T_in) == sizeof(Ncv32f))
|
||||
{
|
||||
ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv32f *)d_imgII.ptr(), d_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height),
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
|
||||
}
|
||||
|
||||
ncvStat = d_imgII.copySolid(h_imgII_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
if (sizeof(T_in) == sizeof(Ncv8u))
|
||||
{
|
||||
ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv32u *)h_imgII.ptr(), h_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
}
|
||||
else if (sizeof(T_in) == sizeof(Ncv32f))
|
||||
{
|
||||
ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv32f *)h_imgII.ptr(), h_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
|
||||
}
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
|
||||
{
|
||||
if (sizeof(T_in) == sizeof(Ncv8u))
|
||||
{
|
||||
if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
else if (sizeof(T_in) == sizeof(Ncv32f))
|
||||
{
|
||||
if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
bool TestIntegralImage<T_in, T_out>::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class TestIntegralImage<Ncv8u, Ncv32u>;
|
||||
template class TestIntegralImage<Ncv32f, Ncv32f>;
|
40
tests/gpu/src/nvidia/TestIntegralImage.h
Normal file
40
tests/gpu/src/nvidia/TestIntegralImage.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testintegralimage_h_
|
||||
#define _testintegralimage_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
class TestIntegralImage : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,
|
||||
Ncv32u width, Ncv32u height);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestIntegralImage(const TestIntegralImage&);
|
||||
TestIntegralImage& operator=(const TestIntegralImage&);
|
||||
|
||||
NCVTestSourceProvider<T_in> &src;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
};
|
||||
|
||||
#endif // _testintegralimage_h_
|
117
tests/gpu/src/nvidia/TestIntegralImageSquared.cpp
Normal file
117
tests/gpu/src/nvidia/TestIntegralImageSquared.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include "TestIntegralImageSquared.h"
|
||||
|
||||
|
||||
TestIntegralImageSquared::TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u width, Ncv32u height)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
width(width),
|
||||
height(height)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestIntegralImageSquared::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "height=" << height << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestIntegralImageSquared::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestIntegralImageSquared::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32u widthSII = this->width + 1;
|
||||
Ncv32u heightSII = this->height + 1;
|
||||
|
||||
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
|
||||
ncvAssertReturn(d_imgSII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
|
||||
ncvAssertReturn(h_imgSII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> h_imgSII_d(*this->allocatorCPU.get(), widthSII, heightSII);
|
||||
ncvAssertReturn(h_imgSII_d.isMemAllocated(), false);
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
|
||||
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
|
||||
d_imgSII.ptr(), d_imgSII.pitch(),
|
||||
NcvSize32u(this->width, this->height),
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = d_imgSII.copySolid(h_imgSII_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
|
||||
h_imgSII.ptr(), h_imgSII.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
|
||||
{
|
||||
if (h_imgSII.ptr()[h_imgSII.stride()*i+j] != h_imgSII_d.ptr()[h_imgSII_d.stride()*i+j])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestIntegralImageSquared::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
39
tests/gpu/src/nvidia/TestIntegralImageSquared.h
Normal file
39
tests/gpu/src/nvidia/TestIntegralImageSquared.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testintegralimagesquared_h_
|
||||
#define _testintegralimagesquared_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestIntegralImageSquared : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u width, Ncv32u height);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestIntegralImageSquared(const TestIntegralImageSquared&);
|
||||
TestIntegralImageSquared& operator=(const TestIntegralImageSquared&);
|
||||
|
||||
NCVTestSourceProvider<Ncv8u> &src;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
};
|
||||
|
||||
#endif // _testintegralimagesquared_h_
|
180
tests/gpu/src/nvidia/TestRectStdDev.cpp
Normal file
180
tests/gpu/src/nvidia/TestRectStdDev.cpp
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "TestRectStdDev.h"
|
||||
|
||||
|
||||
TestRectStdDev::TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,
|
||||
NcvBool bTextureCache)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
width(width),
|
||||
height(height),
|
||||
rect(rect),
|
||||
scaleFactor(scaleFactor),
|
||||
bTextureCache(bTextureCache)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool TestRectStdDev::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "height=" << height << std::endl;
|
||||
strOut << "rect=[" << rect.x << ", " << rect.y << ", " << rect.width << ", " << rect.height << "]\n";
|
||||
strOut << "scaleFactor=" << scaleFactor << std::endl;
|
||||
strOut << "bTextureCache=" << bTextureCache << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestRectStdDev::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TestRectStdDev::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32s _normWidth = (Ncv32s)this->width - this->rect.x - this->rect.width + 1;
|
||||
Ncv32s _normHeight = (Ncv32s)this->height - this->rect.y - this->rect.height + 1;
|
||||
if (_normWidth <= 0 || _normHeight <= 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
Ncv32u normWidth = (Ncv32u)_normWidth;
|
||||
Ncv32u normHeight = (Ncv32u)_normHeight;
|
||||
NcvSize32u szNormRoi(normWidth, normHeight);
|
||||
|
||||
Ncv32u widthII = this->width + 1;
|
||||
Ncv32u heightII = this->height + 1;
|
||||
Ncv32u widthSII = this->width + 1;
|
||||
Ncv32u heightSII = this->height + 1;
|
||||
|
||||
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<Ncv32u> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
|
||||
ncvAssertReturn(d_imgII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32u> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
|
||||
ncvAssertReturn(h_imgII.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
|
||||
ncvAssertReturn(d_imgSII.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
|
||||
ncvAssertReturn(h_imgSII.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<Ncv32f> d_norm(*this->allocatorGPU.get(), normWidth, normHeight);
|
||||
ncvAssertReturn(d_norm.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32f> h_norm(*this->allocatorCPU.get(), normWidth, normHeight);
|
||||
ncvAssertReturn(h_norm.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<Ncv32f> h_norm_d(*this->allocatorCPU.get(), normWidth, normHeight);
|
||||
ncvAssertReturn(h_norm_d.isMemAllocated(), false);
|
||||
|
||||
Ncv32u bufSizeII, bufSizeSII;
|
||||
ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSizeII, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSizeSII, this->devProp);
|
||||
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
|
||||
Ncv32u bufSize = bufSizeII > bufSizeSII ? bufSizeII : bufSizeSII;
|
||||
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
|
||||
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
|
||||
d_imgII.ptr(), d_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height),
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
|
||||
d_imgSII.ptr(), d_imgSII.pitch(),
|
||||
NcvSize32u(this->width, this->height),
|
||||
d_tmpBuf.ptr(), bufSize, this->devProp);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStRectStdDev_32f_C1R(d_imgII.ptr(), d_imgII.pitch(),
|
||||
d_imgSII.ptr(), d_imgSII.pitch(),
|
||||
d_norm.ptr(), d_norm.pitch(),
|
||||
szNormRoi, this->rect,
|
||||
this->scaleFactor,
|
||||
this->bTextureCache);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = d_norm.copySolid(h_norm_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStIntegral_8u32u_C1R_host(h_img.ptr(), h_img.pitch(),
|
||||
h_imgII.ptr(), h_imgII.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
|
||||
h_imgSII.ptr(), h_imgSII.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
ncvStat = nppiStRectStdDev_32f_C1R_host(h_imgII.ptr(), h_imgII.pitch(),
|
||||
h_imgSII.ptr(), h_imgSII.pitch(),
|
||||
h_norm.ptr(), h_norm.pitch(),
|
||||
szNormRoi, this->rect,
|
||||
this->scaleFactor);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
const Ncv64f relEPS = 0.005;
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_norm.height(); i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < h_norm.width(); j++)
|
||||
{
|
||||
Ncv64f absErr = fabs(h_norm.ptr()[h_norm.stride()*i+j] - h_norm_d.ptr()[h_norm_d.stride()*i+j]);
|
||||
Ncv64f relErr = absErr / h_norm.ptr()[h_norm.stride()*i+j];
|
||||
|
||||
if (relErr > relEPS)
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
bool TestRectStdDev::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
44
tests/gpu/src/nvidia/TestRectStdDev.h
Normal file
44
tests/gpu/src/nvidia/TestRectStdDev.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testrectstddev_h_
|
||||
#define _testrectstddev_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
class TestRectStdDev : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,
|
||||
NcvBool bTextureCache);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestRectStdDev(const TestRectStdDev&);
|
||||
TestRectStdDev& operator=(const TestRectStdDev&);
|
||||
|
||||
NCVTestSourceProvider<Ncv8u> &src;
|
||||
NcvRect32u rect;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
Ncv32f scaleFactor;
|
||||
|
||||
NcvBool bTextureCache;
|
||||
};
|
||||
|
||||
#endif // _testrectstddev_h_
|
161
tests/gpu/src/nvidia/TestResize.cpp
Normal file
161
tests/gpu/src/nvidia/TestResize.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "TestResize.h"
|
||||
|
||||
|
||||
template <class T>
|
||||
TestResize<T>::TestResize(std::string testName, NCVTestSourceProvider<T> &src,
|
||||
Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
width(width),
|
||||
height(height),
|
||||
scaleFactor(scaleFactor),
|
||||
bTextureCache(bTextureCache)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestResize<T>::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
|
||||
strOut << "width=" << width << std::endl;
|
||||
strOut << "scaleFactor=" << scaleFactor << std::endl;
|
||||
strOut << "bTextureCache=" << bTextureCache << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestResize<T>::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestResize<T>::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
Ncv32s smallWidth = this->width / this->scaleFactor;
|
||||
Ncv32s smallHeight = this->height / this->scaleFactor;
|
||||
if (smallWidth == 0 || smallHeight == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
NcvSize32u srcSize(this->width, this->height);
|
||||
|
||||
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight);
|
||||
ncvAssertReturn(d_small.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight);
|
||||
ncvAssertReturn(h_small.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight);
|
||||
ncvAssertReturn(h_small_d.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = nppiStDownsampleNearest_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv32u *)d_small.ptr(), d_small.pitch(),
|
||||
srcSize, this->scaleFactor,
|
||||
this->bTextureCache);
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv64u))
|
||||
{
|
||||
ncvStat = nppiStDownsampleNearest_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv64u *)d_small.ptr(), d_small.pitch(),
|
||||
srcSize, this->scaleFactor,
|
||||
this->bTextureCache);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
ncvStat = d_small.copySolid(h_small_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = nppiStDownsampleNearest_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv32u *)h_small.ptr(), h_small.pitch(),
|
||||
srcSize, this->scaleFactor);
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv64u))
|
||||
{
|
||||
ncvStat = nppiStDownsampleNearest_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv64u *)h_small.ptr(), h_small.pitch(),
|
||||
srcSize, this->scaleFactor);
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
//const Ncv64f relEPS = 0.005;
|
||||
for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++)
|
||||
{
|
||||
if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestResize<T>::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class TestResize<Ncv32u>;
|
||||
template class TestResize<Ncv64u>;
|
42
tests/gpu/src/nvidia/TestResize.h
Normal file
42
tests/gpu/src/nvidia/TestResize.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testresize_h_
|
||||
#define _testresize_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
template <class T>
|
||||
class TestResize : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestResize(std::string testName, NCVTestSourceProvider<T> &src,
|
||||
Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
TestResize(const TestResize&);
|
||||
TestResize& operator=(const TestResize&);
|
||||
|
||||
NCVTestSourceProvider<T> &src;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
Ncv32u scaleFactor;
|
||||
|
||||
NcvBool bTextureCache;
|
||||
};
|
||||
|
||||
#endif // _testresize_h_
|
148
tests/gpu/src/nvidia/TestTranspose.cpp
Normal file
148
tests/gpu/src/nvidia/TestTranspose.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "TestTranspose.h"
|
||||
|
||||
|
||||
template <class T>
|
||||
TestTranspose<T>::TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,
|
||||
Ncv32u width, Ncv32u height)
|
||||
:
|
||||
NCVTestProvider(testName),
|
||||
src(src),
|
||||
width(width),
|
||||
height(height)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestTranspose<T>::toString(std::ofstream &strOut)
|
||||
{
|
||||
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
|
||||
strOut << "width=" << width << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestTranspose<T>::init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestTranspose<T>::process()
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
bool rcode = false;
|
||||
|
||||
NcvSize32u srcSize(this->width, this->height);
|
||||
|
||||
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(d_img.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
|
||||
ncvAssertReturn(h_img.isMemAllocated(), false);
|
||||
|
||||
NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width);
|
||||
ncvAssertReturn(d_dst.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width);
|
||||
ncvAssertReturn(h_dst.isMemAllocated(), false);
|
||||
NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width);
|
||||
ncvAssertReturn(h_dst_d.isMemAllocated(), false);
|
||||
|
||||
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
|
||||
NCV_SKIP_COND_BEGIN
|
||||
ncvAssertReturn(this->src.fill(h_img), false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
ncvStat = h_img.copySolid(d_img, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv32u *)d_dst.ptr(), d_dst.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv64u))
|
||||
{
|
||||
ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
|
||||
(Ncv64u *)d_dst.ptr(), d_dst.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect transpose test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
ncvStat = d_dst.copySolid(h_dst_d, 0);
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv32u *)h_dst.ptr(), h_dst.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv64u))
|
||||
{
|
||||
ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
|
||||
(Ncv64u *)h_dst.ptr(), h_dst.pitch(),
|
||||
NcvSize32u(this->width, this->height));
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
|
||||
}
|
||||
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
//bit-to-bit check
|
||||
bool bLoopVirgin = true;
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
//const Ncv64f relEPS = 0.005;
|
||||
for (Ncv32u i=0; bLoopVirgin && i < this->width; i++)
|
||||
{
|
||||
for (Ncv32u j=0; bLoopVirgin && j < this->height; j++)
|
||||
{
|
||||
if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j])
|
||||
{
|
||||
bLoopVirgin = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
if (bLoopVirgin)
|
||||
{
|
||||
rcode = true;
|
||||
}
|
||||
|
||||
return rcode;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool TestTranspose<T>::deinit()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class TestTranspose<Ncv32u>;
|
||||
template class TestTranspose<Ncv64u>;
|
41
tests/gpu/src/nvidia/TestTranspose.h
Normal file
41
tests/gpu/src/nvidia/TestTranspose.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual
|
||||
* property and proprietary rights in and to this software and
|
||||
* related documentation and any modifications thereto.
|
||||
* Any use, reproduction, disclosure, or distribution of this
|
||||
* software and related documentation without an express license
|
||||
* agreement from NVIDIA Corporation is strictly prohibited.
|
||||
*/
|
||||
#ifndef _testtranspose_h_
|
||||
#define _testtranspose_h_
|
||||
|
||||
#include "NCVTest.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
class TestTranspose : public NCVTestProvider
|
||||
{
|
||||
public:
|
||||
|
||||
TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,
|
||||
Ncv32u width, Ncv32u height);
|
||||
|
||||
virtual bool init();
|
||||
virtual bool process();
|
||||
virtual bool deinit();
|
||||
virtual bool toString(std::ofstream &strOut);
|
||||
|
||||
private:
|
||||
|
||||
TestTranspose(const TestTranspose&);
|
||||
TestTranspose& operator=(const TestTranspose&);
|
||||
|
||||
NCVTestSourceProvider<T> &src;
|
||||
Ncv32u width;
|
||||
Ncv32u height;
|
||||
};
|
||||
|
||||
#endif // _testtranspose_h_
|
346
tests/gpu/src/nvidia/main_nvidia.cpp
Normal file
346
tests/gpu/src/nvidia/main_nvidia.cpp
Normal file
@ -0,0 +1,346 @@
|
||||
#pragma warning (disable : 4408 4201 4100)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "NCV.hpp"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
#include "TestIntegralImage.h"
|
||||
#include "TestIntegralImageSquared.h"
|
||||
#include "TestRectStdDev.h"
|
||||
#include "TestResize.h"
|
||||
#include "TestCompact.h"
|
||||
#include "TestTranspose.h"
|
||||
|
||||
#include "TestDrawRects.h"
|
||||
#include "TestHypothesesGrow.h"
|
||||
#include "TestHypothesesFilter.h"
|
||||
#include "TestHaarCascadeLoader.h"
|
||||
#include "TestHaarCascadeApplication.h"
|
||||
|
||||
#include "NCVAutoTestLister.hpp"
|
||||
#include "NCVTestSourceProvider.hpp"
|
||||
|
||||
|
||||
template <class T_in, class T_out>
|
||||
void generateIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T_in> &src,
|
||||
Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
{
|
||||
for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", i, 2);
|
||||
testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, i, 2));
|
||||
}
|
||||
for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", 2, i);
|
||||
testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, 2, i));
|
||||
}
|
||||
|
||||
//test VGA
|
||||
testLister.add(new TestIntegralImage<T_in, T_out>("LinIntImg_VGA", src, 640, 480));
|
||||
|
||||
//TODO: add tests of various resolutions up to 4096x4096
|
||||
}
|
||||
|
||||
|
||||
void generateSquaredIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
{
|
||||
for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", i, 32);
|
||||
testLister.add(new TestIntegralImageSquared(testName, src, i, 32));
|
||||
}
|
||||
for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", 32, i);
|
||||
testLister.add(new TestIntegralImageSquared(testName, src, 32, i));
|
||||
}
|
||||
|
||||
//test VGA
|
||||
testLister.add(new TestIntegralImageSquared("SqLinIntImg_VGA", src, 640, 480));
|
||||
|
||||
//TODO: add tests of various resolutions up to 4096x4096
|
||||
}
|
||||
|
||||
|
||||
void generateRectStdDevTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
{
|
||||
NcvRect32u rect(1,1,18,18);
|
||||
|
||||
for (Ncv32f _i=32; _i<maxHeight/2 && _i < maxWidth/2; _i*=1.2f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "RectStdDevW%dH%d", i*2, i);
|
||||
testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1, true));
|
||||
testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1.5, false));
|
||||
testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 1, false));
|
||||
testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 2.5, true));
|
||||
}
|
||||
|
||||
//test VGA
|
||||
testLister.add(new TestRectStdDev("RectStdDev_VGA", src, 640, 480, rect, 1, true));
|
||||
|
||||
//TODO: add tests of various resolutions up to 4096x4096
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void generateResizeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)
|
||||
{
|
||||
//test VGA
|
||||
for (Ncv32u i=1; i<480; i+=3)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "TestResize_VGA_s%d", i);
|
||||
testLister.add(new TestResize<T>(testName, src, 640, 480, i, true));
|
||||
testLister.add(new TestResize<T>(testName, src, 640, 480, i, false));
|
||||
}
|
||||
|
||||
//test HD
|
||||
for (Ncv32u i=1; i<1080; i+=5)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "TestResize_1080_s%d", i);
|
||||
testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, true));
|
||||
testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, false));
|
||||
}
|
||||
|
||||
//TODO: add tests of various resolutions up to 4096x4096
|
||||
}
|
||||
|
||||
|
||||
void generateNPPSTVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
|
||||
{
|
||||
//compaction
|
||||
for (Ncv32f _i=256.0; _i<maxLength; _i*=1.1f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
|
||||
testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 30));
|
||||
}
|
||||
for (Ncv32u i=1; i<260; i++)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
|
||||
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 70));
|
||||
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 0));
|
||||
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 100));
|
||||
}
|
||||
for (Ncv32u i=256*256-256; i<256*256+257; i++)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
|
||||
testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 40));
|
||||
}
|
||||
for (Ncv32u i=256*256*256-10; i<256*256*256+10; i++)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
|
||||
testLister.add(new TestCompact(testName, src, i, 0x00000000, 2));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)
|
||||
{
|
||||
for (int i=2; i<64; i+=4)
|
||||
{
|
||||
for (int j=2; j<64; j+=4)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);
|
||||
testLister.add(new TestTranspose<T>(testName, src, i, j));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=1; i<128; i+=1)
|
||||
{
|
||||
for (int j=1; j<2; j+=1)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);
|
||||
testLister.add(new TestTranspose<T>(testName, src, i, j));
|
||||
}
|
||||
}
|
||||
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void generateDrawRectsTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
|
||||
Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
{
|
||||
for (Ncv32f _i=16.0; _i<maxWidth; _i*=1.1f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
Ncv32u j = maxHeight * i / maxWidth;
|
||||
if (!j) continue;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "DrawRectsW%dH%d", i, j);
|
||||
|
||||
if (sizeof(T) == sizeof(Ncv32u))
|
||||
{
|
||||
testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFFFFFFFF));
|
||||
}
|
||||
else if (sizeof(T) == sizeof(Ncv8u))
|
||||
{
|
||||
testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFF));
|
||||
}
|
||||
else
|
||||
{
|
||||
ncvAssertPrintCheck(false, "Attempted to instantiate non-existing DrawRects test suite");
|
||||
}
|
||||
}
|
||||
|
||||
//test VGA
|
||||
testLister.add(new TestDrawRects<T>("DrawRects_VGA", src, src32u, 640, 480, 640*480/1000, (T)0xFF));
|
||||
|
||||
//TODO: add tests of various resolutions up to 4096x4096
|
||||
}
|
||||
|
||||
|
||||
void generateVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
|
||||
{
|
||||
//growth
|
||||
for (Ncv32f _i=10.0; _i<maxLength; _i*=1.1f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "VectorGrow%d", i);
|
||||
testLister.add(new TestHypothesesGrow(testName, src, 20, 20, 2.2f, i, i/2, i, i/4));
|
||||
testLister.add(new TestHypothesesGrow(testName, src, 10, 42, 1.2f, i, i, i, 0));
|
||||
}
|
||||
testLister.add(new TestHypothesesGrow("VectorGrow01b", src, 10, 42, 1.2f, 10, 0, 10, 1));
|
||||
testLister.add(new TestHypothesesGrow("VectorGrow11b", src, 10, 42, 1.2f, 10, 1, 10, 1));
|
||||
testLister.add(new TestHypothesesGrow("VectorGrow10b", src, 10, 42, 1.2f, 10, 1, 10, 0));
|
||||
testLister.add(new TestHypothesesGrow("VectorGrow00b", src, 10, 42, 1.2f, 10, 0, 10, 0));
|
||||
}
|
||||
|
||||
|
||||
void generateHypothesesFiltrationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
|
||||
{
|
||||
for (Ncv32f _i=1.0; _i<maxLength; _i*=1.1f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "HypFilter%d", i);
|
||||
testLister.add(new TestHypothesesFilter(testName, src, i, 3, 0.2f));
|
||||
testLister.add(new TestHypothesesFilter(testName, src, i, 0, 0.2f));
|
||||
testLister.add(new TestHypothesesFilter(testName, src, i, 1, 0.1f));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void generateHaarLoaderTests(NCVAutoTestLister &testLister)
|
||||
{
|
||||
testLister.add(new TestHaarCascadeLoader("haarcascade_eye.xml", "haarcascade_eye.xml"));
|
||||
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt.xml", "haarcascade_frontalface_alt.xml"));
|
||||
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt2.xml", "haarcascade_frontalface_alt2.xml"));
|
||||
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt_tree.xml", "haarcascade_frontalface_alt_tree.xml"));
|
||||
testLister.add(new TestHaarCascadeLoader("haarcascade_eye_tree_eyeglasses.xml", "haarcascade_eye_tree_eyeglasses.xml"));
|
||||
}
|
||||
|
||||
|
||||
void generateHaarApplicationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
|
||||
Ncv32u maxWidth, Ncv32u maxHeight)
|
||||
{
|
||||
for (Ncv32u i=20; i<512; i+=11)
|
||||
{
|
||||
for (Ncv32u j=20; j<128; j+=5)
|
||||
{
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "HaarAppl%d_%d", i, j);
|
||||
testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", j, i));
|
||||
}
|
||||
}
|
||||
for (Ncv32f _i=20.0; _i<maxWidth; _i*=1.1f)
|
||||
{
|
||||
Ncv32u i = (Ncv32u)_i;
|
||||
char testName[80];
|
||||
sprintf_s(testName, sizeof(testName), "HaarAppl%d", i);
|
||||
testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", i, i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void devNullOutput(const char *msg)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
int main_nvidia()
|
||||
{
|
||||
printf("Testing NVIDIA Computer Vision SDK\n");
|
||||
printf("==================================\n");
|
||||
|
||||
ncvSetDebugOutputHandler(devNullOutput);
|
||||
|
||||
NCVAutoTestLister testListerII("NPPST Integral Image" );//,,true, false);
|
||||
NCVAutoTestLister testListerSII("NPPST Squared Integral Image" );//,,true, false);
|
||||
NCVAutoTestLister testListerRStdDev("NPPST RectStdDev" );//,,true, false);
|
||||
NCVAutoTestLister testListerResize("NPPST Resize" );//,,true, false);
|
||||
NCVAutoTestLister testListerNPPSTVectorOperations("NPPST Vector Operations" );//,,true, false);
|
||||
NCVAutoTestLister testListerTranspose("NPPST Transpose" );//,,true, false);
|
||||
|
||||
NCVAutoTestLister testListerVectorOperations("Vector Operations" );//,,true, false);
|
||||
NCVAutoTestLister testListerHaarLoader("Haar Cascade Loader" );//,,true, false);
|
||||
NCVAutoTestLister testListerHaarAppl("Haar Cascade Application" );//,,true, false);
|
||||
NCVAutoTestLister testListerHypFiltration("Hypotheses Filtration" );//,,true, false);
|
||||
NCVAutoTestLister testListerVisualize("Visualization" );//,,true, false);
|
||||
|
||||
printf("Initializing data source providers\n");
|
||||
NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
|
||||
NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
|
||||
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, 0xFFFFFFFFFFFFFFFF, 4096, 4096);
|
||||
NCVTestSourceProvider<Ncv8u> testSrcFacesVGA_8u("../../data/group_1_640x480_VGA.pgm");
|
||||
NCVTestSourceProvider<Ncv32f> testSrcRandom_32f(2010, -1.0f, 1.0f, 4096, 4096);
|
||||
|
||||
printf("Generating NPPST test suites\n");
|
||||
generateIntegralTests<Ncv8u, Ncv32u>(testListerII, testSrcRandom_8u, 4096, 4096);
|
||||
generateIntegralTests<Ncv32f, Ncv32f>(testListerII, testSrcRandom_32f, 4096, 4096);
|
||||
generateSquaredIntegralTests(testListerSII, testSrcRandom_8u, 4096, 4096);
|
||||
generateRectStdDevTests(testListerRStdDev, testSrcRandom_8u, 4096, 4096);
|
||||
generateResizeTests(testListerResize, testSrcRandom_32u);
|
||||
generateResizeTests(testListerResize, testSrcRandom_64u);
|
||||
generateNPPSTVectorTests(testListerNPPSTVectorOperations, testSrcRandom_32u, 4096*4096);
|
||||
generateTransposeTests(testListerTranspose, testSrcRandom_32u);
|
||||
generateTransposeTests(testListerTranspose, testSrcRandom_64u);
|
||||
|
||||
printf("Generating NCV test suites\n");
|
||||
generateDrawRectsTests(testListerVisualize, testSrcRandom_8u, testSrcRandom_32u, 4096, 4096);
|
||||
generateDrawRectsTests(testListerVisualize, testSrcRandom_32u, testSrcRandom_32u, 4096, 4096);
|
||||
generateVectorTests(testListerVectorOperations, testSrcRandom_32u, 4096*4096);
|
||||
generateHypothesesFiltrationTests(testListerHypFiltration, testSrcRandom_32u, 1024);
|
||||
generateHaarLoaderTests(testListerHaarLoader);
|
||||
generateHaarApplicationTests(testListerHaarAppl, testSrcFacesVGA_8u, 1280, 720);
|
||||
|
||||
testListerII.invoke();
|
||||
testListerSII.invoke();
|
||||
testListerRStdDev.invoke();
|
||||
testListerResize.invoke();
|
||||
testListerNPPSTVectorOperations.invoke();
|
||||
testListerTranspose.invoke();
|
||||
testListerVisualize.invoke();
|
||||
testListerVectorOperations.invoke();
|
||||
testListerHypFiltration.invoke();
|
||||
testListerHaarLoader.invoke();
|
||||
testListerHaarAppl.invoke();
|
||||
|
||||
return 0;
|
||||
}
|
63
tests/gpu/src/nvidia_tests.cpp
Normal file
63
tests/gpu/src/nvidia_tests.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "gputest.hpp"
|
||||
#include "cvconfig.h"
|
||||
|
||||
class CV_NVidiaTestsCaller : public CvTest
|
||||
{
|
||||
public:
|
||||
CV_NVidiaTestsCaller() : CvTest("GPU-NVidia", "NVidia") {}
|
||||
virtual ~CV_NVidiaTestsCaller() {}
|
||||
|
||||
protected:
|
||||
|
||||
void run( int )
|
||||
{
|
||||
#if defined(HAVE_CUDA)
|
||||
int main_nvidia();
|
||||
main_nvidia();
|
||||
ts->set_failed_test_info(CvTS::OK);
|
||||
#else
|
||||
ts->set_failed_test_info(CvTS::SKIPPED);
|
||||
#endif
|
||||
}
|
||||
} CV_NVidiaTestsCaller_test;
|
Loading…
x
Reference in New Issue
Block a user