[~] Minor refactoring, clean-up
[+] Added 128-bit transpose
This commit is contained in:
@@ -63,8 +63,6 @@
|
|||||||
#include "NCVRuntimeTemplates.hpp"
|
#include "NCVRuntimeTemplates.hpp"
|
||||||
#include "NCVHaarObjectDetection.hpp"
|
#include "NCVHaarObjectDetection.hpp"
|
||||||
|
|
||||||
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights);
|
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
//
|
//
|
||||||
@@ -785,7 +783,6 @@ void applyHaarClassifierAnchorParallelDynTemplate(NcvBool tbInitMaskPositively,
|
|||||||
//Second parameter is the number of "dynamic" template parameters
|
//Second parameter is the number of "dynamic" template parameters
|
||||||
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 5, applyHaarClassifierAnchorParallelFunctor>
|
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 5, applyHaarClassifierAnchorParallelFunctor>
|
||||||
::call( &functor,
|
::call( &functor,
|
||||||
0xC001C0DE, //this is dummy int for the va_args C compatibility
|
|
||||||
tbInitMaskPositively,
|
tbInitMaskPositively,
|
||||||
tbCacheTextureIImg,
|
tbCacheTextureIImg,
|
||||||
tbCacheTextureCascade,
|
tbCacheTextureCascade,
|
||||||
@@ -890,7 +887,6 @@ void applyHaarClassifierClassifierParallelDynTemplate(NcvBool tbCacheTextureIImg
|
|||||||
//Second parameter is the number of "dynamic" template parameters
|
//Second parameter is the number of "dynamic" template parameters
|
||||||
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 3, applyHaarClassifierClassifierParallelFunctor>
|
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 3, applyHaarClassifierClassifierParallelFunctor>
|
||||||
::call( &functor,
|
::call( &functor,
|
||||||
0xC001C0DE, //this is dummy int for the va_args C compatibility
|
|
||||||
tbCacheTextureIImg,
|
tbCacheTextureIImg,
|
||||||
tbCacheTextureCascade,
|
tbCacheTextureCascade,
|
||||||
tbDoAtomicCompaction);
|
tbDoAtomicCompaction);
|
||||||
@@ -957,7 +953,6 @@ void initializeMaskVectorDynTemplate(NcvBool tbMaskByInmask,
|
|||||||
//Second parameter is the number of "dynamic" template parameters
|
//Second parameter is the number of "dynamic" template parameters
|
||||||
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 2, initializeMaskVectorFunctor>
|
NCVRuntimeTemplateBool::KernelCaller<Loki::NullType, 2, initializeMaskVectorFunctor>
|
||||||
::call( &functor,
|
::call( &functor,
|
||||||
0xC001C0DE, //this is dummy int for the va_args C compatibility
|
|
||||||
tbMaskByInmask,
|
tbMaskByInmask,
|
||||||
tbDoAtomicCompaction);
|
tbDoAtomicCompaction);
|
||||||
}
|
}
|
||||||
@@ -1554,172 +1549,6 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
|
||||||
//
|
|
||||||
// Visualize file
|
|
||||||
//
|
|
||||||
//==============================================================================
|
|
||||||
|
|
||||||
|
|
||||||
const Ncv32u NUMTHREADS_DRAWRECTS = 32;
|
|
||||||
const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
|
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
__global__ void drawRects(T *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
T color)
|
|
||||||
{
|
|
||||||
Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
|
|
||||||
if (blockId > numRects * 4)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
NcvRect32u curRect = d_rects[blockId >> 2];
|
|
||||||
NcvBool bVertical = blockId & 0x1;
|
|
||||||
NcvBool bTopLeft = blockId & 0x2;
|
|
||||||
|
|
||||||
Ncv32u pt0x, pt0y;
|
|
||||||
if (bVertical)
|
|
||||||
{
|
|
||||||
Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
|
||||||
|
|
||||||
pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
|
|
||||||
pt0y = curRect.y;
|
|
||||||
|
|
||||||
if (pt0x < dstWidth)
|
|
||||||
{
|
|
||||||
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
|
||||||
{
|
|
||||||
Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
|
||||||
if (ptY < pt0y + curRect.height && ptY < dstHeight)
|
|
||||||
{
|
|
||||||
d_dst[ptY * dstStride + pt0x] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
|
||||||
|
|
||||||
pt0x = curRect.x;
|
|
||||||
pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
|
|
||||||
|
|
||||||
if (pt0y < dstHeight)
|
|
||||||
{
|
|
||||||
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
|
||||||
{
|
|
||||||
Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
|
||||||
if (ptX < pt0x + curRect.width && ptX < dstWidth)
|
|
||||||
{
|
|
||||||
d_dst[pt0y * dstStride + ptX] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
static NCVStatus drawRectsWrapperDevice(T *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
T color,
|
|
||||||
cudaStream_t cuStream)
|
|
||||||
{
|
|
||||||
ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
|
|
||||||
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
|
||||||
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
|
||||||
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
|
||||||
|
|
||||||
if (numRects == 0)
|
|
||||||
{
|
|
||||||
return NCV_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
T *h_dst;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStride * dstHeight * sizeof(T)), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStride * dstHeight * sizeof(T), cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
|
||||||
NcvRect32s *h_rects;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_rects, numRects * sizeof(NcvRect32s)), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_rects, d_rects, numRects * sizeof(NcvRect32s), cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertReturnNcvStat(drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
dim3 grid(numRects * 4);
|
|
||||||
dim3 block(NUMTHREADS_DRAWRECTS);
|
|
||||||
if (grid.x > 65535)
|
|
||||||
{
|
|
||||||
grid.y = (grid.x + 65534) / 65535;
|
|
||||||
grid.x = 65535;
|
|
||||||
}
|
|
||||||
|
|
||||||
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
|
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
T *h_dst_after;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_dst_after, dstStride * dstHeight * sizeof(T)), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_dst_after, d_dst, dstStride * dstHeight * sizeof(T), cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
|
||||||
bool bPass = true;
|
|
||||||
for (Ncv32u i=0; i<dstHeight && bPass; i++)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=0; j<dstWidth && bPass; j++)
|
|
||||||
{
|
|
||||||
if (h_dst[i*dstStride+j] != h_dst_after[i*dstStride+j])
|
|
||||||
{
|
|
||||||
printf("::drawRectsWrapperDevice self test failed: i=%d, j=%d, cpu=%d, gpu=%d\n", i, j, h_dst[i*dstStride+j], h_dst_after[i*dstStride+j]);
|
|
||||||
bPass = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_dst_after), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_dst), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_rects), NCV_CUDA_ERROR);
|
|
||||||
printf("::drawRectsWrapperDevice %s\n", bPass?"PASSED":"FAILED");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return NCV_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv8u color,
|
|
||||||
cudaStream_t cuStream)
|
|
||||||
{
|
|
||||||
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv32u color,
|
|
||||||
cudaStream_t cuStream)
|
|
||||||
{
|
|
||||||
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
//
|
//
|
||||||
// Pipeline file
|
// Pipeline file
|
||||||
@@ -1901,13 +1730,13 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
|
|
||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
|
|
||||||
nppStat = nppiStDownsampleNearest_32u_C1R(
|
nppStat = nppiStDecimate_32u_C1R(
|
||||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||||
srcIIRoi, scale, true);
|
srcIIRoi, scale, true);
|
||||||
ncvAssertReturnNcvStat(nppStat);
|
ncvAssertReturnNcvStat(nppStat);
|
||||||
|
|
||||||
nppStat = nppiStDownsampleNearest_64u_C1R(
|
nppStat = nppiStDecimate_64u_C1R(
|
||||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||||
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
||||||
srcIIRoi, scale, true);
|
srcIIRoi, scale, true);
|
||||||
@@ -1969,7 +1798,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ncv32u numStrongHypothesesNow = dstNumRects;
|
Ncv32u numStrongHypothesesNow = dstNumRects;
|
||||||
ncvStat = ncvFilterHypotheses_host(
|
ncvStat = ncvGroupRectangles_host(
|
||||||
h_hypothesesIntermediate,
|
h_hypothesesIntermediate,
|
||||||
numStrongHypothesesNow,
|
numStrongHypothesesNow,
|
||||||
minNeighbors,
|
minNeighbors,
|
||||||
@@ -2031,7 +1860,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
ncvAssertCUDAReturn(cudaStreamSynchronize(cuStream), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaStreamSynchronize(cuStream), NCV_CUDA_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
ncvStat = ncvFilterHypotheses_host(
|
ncvStat = ncvGroupRectangles_host(
|
||||||
h_hypothesesIntermediate,
|
h_hypothesesIntermediate,
|
||||||
dstNumRects,
|
dstNumRects,
|
||||||
minNeighbors,
|
minNeighbors,
|
||||||
@@ -2285,133 +2114,6 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
|
||||||
Ncv32u &numHypotheses,
|
|
||||||
Ncv32u minNeighbors,
|
|
||||||
Ncv32f intersectEps,
|
|
||||||
NCVVector<Ncv32u> *hypothesesWeights)
|
|
||||||
{
|
|
||||||
ncvAssertReturn(hypotheses.memType() == NCVMemoryTypeHostPageable ||
|
|
||||||
hypotheses.memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
|
||||||
if (hypothesesWeights != NULL)
|
|
||||||
{
|
|
||||||
ncvAssertReturn(hypothesesWeights->memType() == NCVMemoryTypeHostPageable ||
|
|
||||||
hypothesesWeights->memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numHypotheses == 0)
|
|
||||||
{
|
|
||||||
return NCV_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<NcvRect32u> rects(numHypotheses);
|
|
||||||
memcpy(&rects[0], hypotheses.ptr(), numHypotheses * sizeof(NcvRect32u));
|
|
||||||
|
|
||||||
std::vector<Ncv32u> weights;
|
|
||||||
if (hypothesesWeights != NULL)
|
|
||||||
{
|
|
||||||
groupRectangles(rects, minNeighbors, intersectEps, &weights);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
groupRectangles(rects, minNeighbors, intersectEps, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
numHypotheses = (Ncv32u)rects.size();
|
|
||||||
if (numHypotheses > 0)
|
|
||||||
{
|
|
||||||
memcpy(hypotheses.ptr(), &rects[0], numHypotheses * sizeof(NcvRect32u));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hypothesesWeights != NULL)
|
|
||||||
{
|
|
||||||
memcpy(hypothesesWeights->ptr(), &weights[0], numHypotheses * sizeof(Ncv32u));
|
|
||||||
}
|
|
||||||
|
|
||||||
return NCV_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
static NCVStatus drawRectsWrapperHost(T *h_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *h_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
T color)
|
|
||||||
{
|
|
||||||
ncvAssertReturn(h_dst != NULL && h_rects != NULL, NCV_NULL_PTR);
|
|
||||||
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
|
||||||
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
|
||||||
ncvAssertReturn(numRects != 0, NCV_SUCCESS);
|
|
||||||
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
|
||||||
|
|
||||||
for (Ncv32u i=0; i<numRects; i++)
|
|
||||||
{
|
|
||||||
NcvRect32u rect = h_rects[i];
|
|
||||||
|
|
||||||
if (rect.x < dstWidth)
|
|
||||||
{
|
|
||||||
for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
|
|
||||||
{
|
|
||||||
h_dst[i*dstStride+rect.x] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rect.x+rect.width-1 < dstWidth)
|
|
||||||
{
|
|
||||||
for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
|
|
||||||
{
|
|
||||||
h_dst[i*dstStride+rect.x+rect.width-1] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rect.y < dstHeight)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
|
||||||
{
|
|
||||||
h_dst[rect.y*dstStride+j] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rect.y + rect.height - 1 < dstHeight)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
|
||||||
{
|
|
||||||
h_dst[(rect.y+rect.height-1)*dstStride+j] = color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return NCV_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *h_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv8u color)
|
|
||||||
{
|
|
||||||
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *h_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv32u color)
|
|
||||||
{
|
|
||||||
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
NCVStatus loadFromXML(const std::string &filename,
|
NCVStatus loadFromXML(const std::string &filename,
|
||||||
HaarClassifierCascadeDescriptor &haar,
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
std::vector<HaarStage64> &haarStages,
|
std::vector<HaarStage64> &haarStages,
|
||||||
|
@@ -346,8 +346,8 @@ enum
|
|||||||
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
||||||
};
|
};
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||||
NcvSize32u srcRoi,
|
NcvSize32u srcRoi,
|
||||||
NCVVector<NcvRect32u> &d_dstRects,
|
NCVVector<NcvRect32u> &d_dstRects,
|
||||||
Ncv32u &dstNumRects,
|
Ncv32u &dstNumRects,
|
||||||
@@ -373,8 +373,8 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
||||||
#define HAAR_STDDEV_BORDER 1
|
#define HAAR_STDDEV_BORDER 1
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
||||||
NCVMatrix<Ncv32f> &d_weights,
|
NCVMatrix<Ncv32f> &d_weights,
|
||||||
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||||
Ncv32u &numDetections,
|
Ncv32u &numDetections,
|
||||||
@@ -392,8 +392,8 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
cudaDeviceProp &devProp,
|
cudaDeviceProp &devProp,
|
||||||
cudaStream_t cuStream);
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||||
NCVMatrix<Ncv32f> &h_weights,
|
NCVMatrix<Ncv32f> &h_weights,
|
||||||
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
||||||
Ncv32u &numDetections,
|
Ncv32u &numDetections,
|
||||||
@@ -406,49 +406,11 @@ NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
|||||||
Ncv32u pixelStep,
|
Ncv32u pixelStep,
|
||||||
Ncv32f scaleArea);
|
Ncv32f scaleArea);
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv8u color,
|
|
||||||
cudaStream_t cuStream);
|
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *d_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv32u color,
|
|
||||||
cudaStream_t cuStream);
|
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *h_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv8u color);
|
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
|
||||||
Ncv32u dstStride,
|
|
||||||
Ncv32u dstWidth,
|
|
||||||
Ncv32u dstHeight,
|
|
||||||
NcvRect32u *h_rects,
|
|
||||||
Ncv32u numRects,
|
|
||||||
Ncv32u color);
|
|
||||||
|
|
||||||
|
|
||||||
#define RECT_SIMILARITY_PROPORTION 0.2f
|
#define RECT_SIMILARITY_PROPORTION 0.2f
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||||
Ncv32u numPixelMaskDetections,
|
Ncv32u numPixelMaskDetections,
|
||||||
NCVVector<NcvRect32u> &hypotheses,
|
NCVVector<NcvRect32u> &hypotheses,
|
||||||
Ncv32u &totalDetections,
|
Ncv32u &totalDetections,
|
||||||
@@ -458,8 +420,8 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
|||||||
Ncv32f curScale,
|
Ncv32f curScale,
|
||||||
cudaStream_t cuStream);
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||||
Ncv32u numPixelMaskDetections,
|
Ncv32u numPixelMaskDetections,
|
||||||
NCVVector<NcvRect32u> &hypotheses,
|
NCVVector<NcvRect32u> &hypotheses,
|
||||||
Ncv32u &totalDetections,
|
Ncv32u &totalDetections,
|
||||||
@@ -468,27 +430,19 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
|||||||
Ncv32u rectHeight,
|
Ncv32u rectHeight,
|
||||||
Ncv32f curScale);
|
Ncv32f curScale);
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
|
||||||
Ncv32u &numHypotheses,
|
|
||||||
Ncv32u minNeighbors,
|
|
||||||
Ncv32f intersectEps,
|
|
||||||
NCVVector<Ncv32u> *hypothesesWeights);
|
|
||||||
|
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
|
||||||
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
|
|
||||||
Ncv32u &numNodes, Ncv32u &numFeatures);
|
Ncv32u &numNodes, Ncv32u &numFeatures);
|
||||||
|
|
||||||
NCV_EXPORTS
|
|
||||||
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
NCV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||||
HaarClassifierCascadeDescriptor &haar,
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
NCVVector<HaarStage64> &h_HaarStages,
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||||
NCVVector<HaarFeature64> &h_HaarFeatures);
|
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||||
|
|
||||||
|
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||||
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
|
||||||
HaarClassifierCascadeDescriptor haar,
|
HaarClassifierCascadeDescriptor haar,
|
||||||
NCVVector<HaarStage64> &h_HaarStages,
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||||
|
@@ -44,10 +44,6 @@
|
|||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include "NPP_staging.hpp"
|
#include "NPP_staging.hpp"
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
#include <stdio.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
texture<Ncv8u, 1, cudaReadModeElementType> tex8u;
|
texture<Ncv8u, 1, cudaReadModeElementType> tex8u;
|
||||||
texture<Ncv32u, 1, cudaReadModeElementType> tex32u;
|
texture<Ncv32u, 1, cudaReadModeElementType> tex32u;
|
||||||
@@ -161,12 +157,6 @@ const Ncv32u NUM_SCAN_THREADS = 256;
|
|||||||
const Ncv32u LOG2_NUM_SCAN_THREADS = 8;
|
const Ncv32u LOG2_NUM_SCAN_THREADS = 8;
|
||||||
|
|
||||||
|
|
||||||
struct T_true {};
|
|
||||||
struct T_false {};
|
|
||||||
template <typename T, typename U> struct is_same : T_false {};
|
|
||||||
template <typename T> struct is_same<T, T> : T_true {};
|
|
||||||
|
|
||||||
|
|
||||||
template<class T_in, class T_out>
|
template<class T_in, class T_out>
|
||||||
struct _scanElemOp
|
struct _scanElemOp
|
||||||
{
|
{
|
||||||
@@ -175,13 +165,16 @@ struct _scanElemOp
|
|||||||
{
|
{
|
||||||
return scanElemOp( elem, Int2Type<(int)tbDoSqr>() );
|
return scanElemOp( elem, Int2Type<(int)tbDoSqr>() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
template <int v> struct Int2Type { enum { value = v }; };
|
template <int v> struct Int2Type { enum { value = v }; };
|
||||||
|
|
||||||
static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<0>)
|
static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<0>)
|
||||||
{
|
{
|
||||||
return (T_out)elem;
|
return (T_out)elem;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<1>)
|
static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<1>)
|
||||||
{
|
{
|
||||||
return (T_out)(elem*elem);
|
return (T_out)(elem*elem);
|
||||||
@@ -190,25 +183,25 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
inline __device__ T readElem(T *d_src, Ncv32u srcStride, Ncv32u curElemOffs);
|
inline __device__ T readElem(T *d_src, Ncv32u texOffs, Ncv32u srcStride, Ncv32u curElemOffs);
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline __device__ Ncv8u readElem<Ncv8u>(Ncv8u *d_src, Ncv32u srcStride, Ncv32u curElemOffs)
|
inline __device__ Ncv8u readElem<Ncv8u>(Ncv8u *d_src, Ncv32u texOffs, Ncv32u srcStride, Ncv32u curElemOffs)
|
||||||
{
|
{
|
||||||
return tex1Dfetch(tex8u, srcStride * blockIdx.x + curElemOffs);
|
return tex1Dfetch(tex8u, texOffs + srcStride * blockIdx.x + curElemOffs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline __device__ Ncv32u readElem<Ncv32u>(Ncv32u *d_src, Ncv32u srcStride, Ncv32u curElemOffs)
|
inline __device__ Ncv32u readElem<Ncv32u>(Ncv32u *d_src, Ncv32u texOffs, Ncv32u srcStride, Ncv32u curElemOffs)
|
||||||
{
|
{
|
||||||
return d_src[curElemOffs];
|
return d_src[curElemOffs];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline __device__ Ncv32f readElem<Ncv32f>(Ncv32f *d_src, Ncv32u srcStride, Ncv32u curElemOffs)
|
inline __device__ Ncv32f readElem<Ncv32f>(Ncv32f *d_src, Ncv32u texOffs, Ncv32u srcStride, Ncv32u curElemOffs)
|
||||||
{
|
{
|
||||||
return d_src[curElemOffs];
|
return d_src[curElemOffs];
|
||||||
}
|
}
|
||||||
@@ -233,7 +226,7 @@ inline __device__ Ncv32f readElem<Ncv32f>(Ncv32f *d_src, Ncv32u srcStride, Ncv32
|
|||||||
* \return None
|
* \return None
|
||||||
*/
|
*/
|
||||||
template <class T_in, class T_out, bool tbDoSqr>
|
template <class T_in, class T_out, bool tbDoSqr>
|
||||||
__global__ void scanRows(T_in *d_src, Ncv32u srcWidth, Ncv32u srcStride,
|
__global__ void scanRows(T_in *d_src, Ncv32u texOffs, Ncv32u srcWidth, Ncv32u srcStride,
|
||||||
T_out *d_II, Ncv32u IIstride)
|
T_out *d_II, Ncv32u IIstride)
|
||||||
{
|
{
|
||||||
//advance pointers to the current line
|
//advance pointers to the current line
|
||||||
@@ -263,7 +256,7 @@ __global__ void scanRows(T_in *d_src, Ncv32u srcWidth, Ncv32u srcStride,
|
|||||||
if (curElemOffs < srcWidth)
|
if (curElemOffs < srcWidth)
|
||||||
{
|
{
|
||||||
//load elements
|
//load elements
|
||||||
curElem = readElem<T_in>(d_src, srcStride, curElemOffs);
|
curElem = readElem<T_in>(d_src, texOffs, srcStride, curElemOffs);
|
||||||
}
|
}
|
||||||
curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr>(curElem);
|
curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr>(curElem);
|
||||||
|
|
||||||
@@ -298,55 +291,28 @@ NCVStatus scanRowsWrapperDevice(T_in *d_src, Ncv32u srcStride,
|
|||||||
T_out *d_dst, Ncv32u dstStride, NcvSize32u roi)
|
T_out *d_dst, Ncv32u dstStride, NcvSize32u roi)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc cfdTex;
|
cudaChannelFormatDesc cfdTex;
|
||||||
|
size_t alignmentOffset = 0;
|
||||||
if (sizeof(T_in) == 1)
|
if (sizeof(T_in) == 1)
|
||||||
{
|
{
|
||||||
cfdTex = cudaCreateChannelDesc<Ncv8u>();
|
cfdTex = cudaCreateChannelDesc<Ncv8u>();
|
||||||
size_t alignmentOffset;
|
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex8u, d_src, cfdTex, roi.height * srcStride), NPPST_TEXTURE_BIND_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex8u, d_src, cfdTex, roi.height * srcStride), NPPST_TEXTURE_BIND_ERROR);
|
||||||
ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);
|
if (alignmentOffset > 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaUnbindTexture(tex8u), NCV_CUDA_ERROR);
|
||||||
|
ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex8u, d_src, cfdTex, alignmentOffset + roi.height * srcStride), NPPST_TEXTURE_BIND_ERROR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
scanRows
|
scanRows
|
||||||
<T_in, T_out, tbDoSqr>
|
<T_in, T_out, tbDoSqr>
|
||||||
<<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>
|
<<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, roi.width, srcStride, d_dst, dstStride);
|
(d_src, (Ncv32u)alignmentOffset, roi.width, srcStride, d_dst, dstStride);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
T_in *h_src;
|
|
||||||
T_out *h_dst;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStride * roi.height * sizeof(T_in)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStride * roi.height * sizeof(T_out)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
memset(h_src, 0, srcStride * roi.height * sizeof(T_in));
|
|
||||||
memset(h_dst, 0, dstStride * roi.height * sizeof(T_out));
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStride * roi.height * sizeof(T_in), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStride * roi.height * sizeof(T_out), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
NcvBool bPass = true;
|
|
||||||
for (Ncv32u i=0; i<roi.height && bPass; i++)
|
|
||||||
{
|
|
||||||
T_out curElem = 0;
|
|
||||||
for (Ncv32u j=0; j<roi.width+1 && bPass; j++)
|
|
||||||
{
|
|
||||||
if (curElem != h_dst[i * dstStride + j])
|
|
||||||
{
|
|
||||||
printf("CIntegralImage::scanRowsWrapperDevice self test failed: i=%d, j=%d, cpu=%d, gpu=%d\n", i, j, curElem, h_dst[i * dstStride + j]);
|
|
||||||
bPass = false;
|
|
||||||
}
|
|
||||||
if (j < roi.width)
|
|
||||||
{
|
|
||||||
curElem += scanElemOp<T_op>(h_src[i*srcStride+j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);
|
|
||||||
printf("CIntegralImage::scanRowsWrapperDevice %s\n", bPass?"PASSED":"FAILED");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Ncv32u getPaddedDimension(Ncv32u dim, Ncv32u elemTypeSize, Ncv32u allocatorAlignment)
|
static Ncv32u getPaddedDimension(Ncv32u dim, Ncv32u elemTypeSize, Ncv32u allocatorAlignment)
|
||||||
{
|
{
|
||||||
Ncv32u alignMask = allocatorAlignment-1;
|
Ncv32u alignMask = allocatorAlignment-1;
|
||||||
Ncv32u inverseAlignMask = ~alignMask;
|
Ncv32u inverseAlignMask = ~alignMask;
|
||||||
@@ -676,7 +642,7 @@ NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
//
|
//
|
||||||
// DownsampleNearest.cu
|
// Decimate.cu
|
||||||
//
|
//
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
|
|
||||||
@@ -686,25 +652,25 @@ const Ncv32u NUM_DOWNSAMPLE_NEAREST_THREADS_Y = 8;
|
|||||||
|
|
||||||
|
|
||||||
template<class T, NcvBool tbCacheTexture>
|
template<class T, NcvBool tbCacheTexture>
|
||||||
__device__ T getElem_DownsampleNearest(Ncv32u x, T *d_src);
|
__device__ T getElem_Decimate(Ncv32u x, T *d_src);
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
__device__ Ncv32u getElem_DownsampleNearest<Ncv32u, true>(Ncv32u x, Ncv32u *d_src)
|
__device__ Ncv32u getElem_Decimate<Ncv32u, true>(Ncv32u x, Ncv32u *d_src)
|
||||||
{
|
{
|
||||||
return tex1Dfetch(tex32u, x);
|
return tex1Dfetch(tex32u, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
__device__ Ncv32u getElem_DownsampleNearest<Ncv32u, false>(Ncv32u x, Ncv32u *d_src)
|
__device__ Ncv32u getElem_Decimate<Ncv32u, false>(Ncv32u x, Ncv32u *d_src)
|
||||||
{
|
{
|
||||||
return d_src[x];
|
return d_src[x];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
__device__ Ncv64u getElem_DownsampleNearest<Ncv64u, true>(Ncv32u x, Ncv64u *d_src)
|
__device__ Ncv64u getElem_Decimate<Ncv64u, true>(Ncv32u x, Ncv64u *d_src)
|
||||||
{
|
{
|
||||||
uint2 tmp = tex1Dfetch(tex64u, x);
|
uint2 tmp = tex1Dfetch(tex64u, x);
|
||||||
Ncv64u res = (Ncv64u)tmp.y;
|
Ncv64u res = (Ncv64u)tmp.y;
|
||||||
@@ -715,14 +681,14 @@ __device__ Ncv64u getElem_DownsampleNearest<Ncv64u, true>(Ncv32u x, Ncv64u *d_sr
|
|||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
__device__ Ncv64u getElem_DownsampleNearest<Ncv64u, false>(Ncv32u x, Ncv64u *d_src)
|
__device__ Ncv64u getElem_Decimate<Ncv64u, false>(Ncv32u x, Ncv64u *d_src)
|
||||||
{
|
{
|
||||||
return d_src[x];
|
return d_src[x];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class T, NcvBool tbCacheTexture>
|
template <class T, NcvBool tbCacheTexture>
|
||||||
__global__ void downsampleNearest_C1R(T *d_src, Ncv32u srcStep, T *d_dst, Ncv32u dstStep,
|
__global__ void decimate_C1R(T *d_src, Ncv32u srcStep, T *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u dstRoi, Ncv32u scale)
|
NcvSize32u dstRoi, Ncv32u scale)
|
||||||
{
|
{
|
||||||
int curX = blockIdx.x * blockDim.x + threadIdx.x;
|
int curX = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@@ -733,12 +699,12 @@ __global__ void downsampleNearest_C1R(T *d_src, Ncv32u srcStep, T *d_dst, Ncv32u
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
d_dst[curY * dstStep + curX] = getElem_DownsampleNearest<T, tbCacheTexture>((curY * srcStep + curX) * scale, d_src);
|
d_dst[curY * dstStep + curX] = getElem_Decimate<T, tbCacheTexture>((curY * srcStep + curX) * scale, d_src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
static NCVStatus downsampleNearestWrapperDevice(T *d_src, Ncv32u srcStep,
|
static NCVStatus decimateWrapperDevice(T *d_src, Ncv32u srcStep,
|
||||||
T *d_dst, Ncv32u dstStep,
|
T *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture)
|
NcvBool readThruTexture)
|
||||||
@@ -761,7 +727,7 @@ static NCVStatus downsampleNearestWrapperDevice(T *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
if (!readThruTexture)
|
if (!readThruTexture)
|
||||||
{
|
{
|
||||||
downsampleNearest_C1R
|
decimate_C1R
|
||||||
<T, false>
|
<T, false>
|
||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
||||||
@@ -787,7 +753,7 @@ static NCVStatus downsampleNearestWrapperDevice(T *d_src, Ncv32u srcStep,
|
|||||||
ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);
|
ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
downsampleNearest_C1R
|
decimate_C1R
|
||||||
<T, true>
|
<T, true>
|
||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
||||||
@@ -795,39 +761,12 @@ static NCVStatus downsampleNearestWrapperDevice(T *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
T *h_src;
|
|
||||||
T *h_dst;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStep * srcRoi.height * sizeof(T)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStep * dstRoi.height * sizeof(T)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStep * srcRoi.height * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStep * dstRoi.height * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
|
|
||||||
bool bPass = true;
|
|
||||||
|
|
||||||
for (Ncv32u i=0; i<dstRoi.height && bPass; i++)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=0; j<dstRoi.width && bPass; j++)
|
|
||||||
{
|
|
||||||
if (h_dst[i*dstStep+j] != h_src[i*scale*srcStep + j*scale])
|
|
||||||
{
|
|
||||||
printf("::downsampleNearestWrapperDevice self test failed: i=%d, j=%d, cpu=%ld, gpu=%ld\n", i, j, (long long)h_src[i*scale*srcStep + j*scale], (long long)h_dst[i*dstStep+j]);
|
|
||||||
bPass = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);
|
|
||||||
printf("::downsampleNearestWrapperDevice %s\n", bPass?"PASSED":"FAILED");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
static NCVStatus downsampleNearestWrapperHost(T *h_src, Ncv32u srcStep,
|
static NCVStatus decimateWrapperHost(T *h_src, Ncv32u srcStep,
|
||||||
T *h_dst, Ncv32u dstStep,
|
T *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale)
|
NcvSize32u srcRoi, Ncv32u scale)
|
||||||
{
|
{
|
||||||
@@ -856,40 +795,40 @@ static NCVStatus downsampleNearestWrapperHost(T *h_src, Ncv32u srcStep,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define implementNppDownsampleNearest(bit, typ) \
|
#define implementNppDecimate(bit, typ) \
|
||||||
NCVStatus nppiStDownsampleNearest_##bit##typ##_C1R(Ncv##bit##typ *d_src, Ncv32u srcStep, \
|
NCVStatus nppiStDecimate_##bit##typ##_C1R(Ncv##bit##typ *d_src, Ncv32u srcStep, \
|
||||||
Ncv##bit##typ *d_dst, Ncv32u dstStep, \
|
Ncv##bit##typ *d_dst, Ncv32u dstStep, \
|
||||||
NcvSize32u srcRoi, Ncv32u scale, NcvBool readThruTexture) \
|
NcvSize32u srcRoi, Ncv32u scale, NcvBool readThruTexture) \
|
||||||
{ \
|
{ \
|
||||||
return downsampleNearestWrapperDevice<Ncv##bit##u>((Ncv##bit##u *)d_src, srcStep, \
|
return decimateWrapperDevice<Ncv##bit##u>((Ncv##bit##u *)d_src, srcStep, \
|
||||||
(Ncv##bit##u *)d_dst, dstStep, \
|
(Ncv##bit##u *)d_dst, dstStep, \
|
||||||
srcRoi, scale, readThruTexture); \
|
srcRoi, scale, readThruTexture); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define implementNppDownsampleNearestHost(bit, typ) \
|
#define implementNppDecimateHost(bit, typ) \
|
||||||
NCVStatus nppiStDownsampleNearest_##bit##typ##_C1R_host(Ncv##bit##typ *h_src, Ncv32u srcStep, \
|
NCVStatus nppiStDecimate_##bit##typ##_C1R_host(Ncv##bit##typ *h_src, Ncv32u srcStep, \
|
||||||
Ncv##bit##typ *h_dst, Ncv32u dstStep, \
|
Ncv##bit##typ *h_dst, Ncv32u dstStep, \
|
||||||
NcvSize32u srcRoi, Ncv32u scale) \
|
NcvSize32u srcRoi, Ncv32u scale) \
|
||||||
{ \
|
{ \
|
||||||
return downsampleNearestWrapperHost<Ncv##bit##u>((Ncv##bit##u *)h_src, srcStep, \
|
return decimateWrapperHost<Ncv##bit##u>((Ncv##bit##u *)h_src, srcStep, \
|
||||||
(Ncv##bit##u *)h_dst, dstStep, \
|
(Ncv##bit##u *)h_dst, dstStep, \
|
||||||
srcRoi, scale); \
|
srcRoi, scale); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
implementNppDownsampleNearest(32, u)
|
implementNppDecimate(32, u)
|
||||||
implementNppDownsampleNearest(32, s)
|
implementNppDecimate(32, s)
|
||||||
implementNppDownsampleNearest(32, f)
|
implementNppDecimate(32, f)
|
||||||
implementNppDownsampleNearest(64, u)
|
implementNppDecimate(64, u)
|
||||||
implementNppDownsampleNearest(64, s)
|
implementNppDecimate(64, s)
|
||||||
implementNppDownsampleNearest(64, f)
|
implementNppDecimate(64, f)
|
||||||
implementNppDownsampleNearestHost(32, u)
|
implementNppDecimateHost(32, u)
|
||||||
implementNppDownsampleNearestHost(32, s)
|
implementNppDecimateHost(32, s)
|
||||||
implementNppDownsampleNearestHost(32, f)
|
implementNppDecimateHost(32, f)
|
||||||
implementNppDownsampleNearestHost(64, u)
|
implementNppDecimateHost(64, u)
|
||||||
implementNppDownsampleNearestHost(64, s)
|
implementNppDecimateHost(64, s)
|
||||||
implementNppDownsampleNearestHost(64, f)
|
implementNppDecimateHost(64, f)
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
@@ -1051,46 +990,6 @@ NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
|
|||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
Ncv32u *h_sum;
|
|
||||||
Ncv64u *h_sqsum;
|
|
||||||
Ncv32f *h_norm_d;
|
|
||||||
Ncv32u ExtHeight = roi.height + rect.y + rect.height;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_sum, sumStep * ExtHeight * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_sqsum, sqsumStep * ExtHeight * sizeof(Ncv64u)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_norm_d, normStep * roi.height * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_sum, d_sum, sumStep * ExtHeight * sizeof(Ncv32u), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_sqsum, d_sqsum, sqsumStep * ExtHeight * sizeof(Ncv64u), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_norm_d, d_norm, normStep * roi.height * sizeof(Ncv32f), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
|
|
||||||
Ncv32f *h_norm_h;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_norm_h, normStep * roi.height * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
|
|
||||||
ncvAssertReturnNcvStat(nppRectStdDev_32f_C1R_host(h_sum, sqsumStep, h_sqsum, sqsumStep, h_norm_h, normStep, roi, rect, scaleArea));
|
|
||||||
|
|
||||||
const Ncv64f relEPS = 0.005;
|
|
||||||
bool bPass = true;
|
|
||||||
for (Ncv32u i=0; i<roi.height && bPass; i++)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=0; j<roi.width && bPass; j++)
|
|
||||||
{
|
|
||||||
Ncv64f absErr = fabs(h_norm_h[i * normStep + j] - h_norm_d[i * normStep + j]);
|
|
||||||
Ncv64f relErr = absErr / h_norm_h[i * normStep + j];
|
|
||||||
|
|
||||||
if (relErr > relEPS)
|
|
||||||
{
|
|
||||||
printf("::ncvRectStdDev_32f_C1R self test failed: i=%d, j=%d, cpu=%f, gpu=%f\n", i, j, h_norm_h[i * normStep + j], h_norm_d[i * normStep + j]);
|
|
||||||
bPass = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_sum), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_sqsum), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_norm_d), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_norm_h), NPPST_MEMFREE_ERR);
|
|
||||||
printf("::ncvRectStdDev_32f_C1R %s\n", bPass?"PASSED":"FAILED");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1251,34 +1150,6 @@ NCVStatus transposeWrapperDevice(T *d_src, Ncv32u srcStride,
|
|||||||
(d_src, srcStride, d_dst, dstStride, srcRoi);
|
(d_src, srcStride, d_dst, dstStride, srcRoi);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
#if defined _SELF_TEST_
|
|
||||||
Ncv32u widthExt = grid.x * TRANSPOSE_TILE_DIM;
|
|
||||||
Ncv32u heightExt = grid.y * TRANSPOSE_TILE_DIM;
|
|
||||||
T *h_src;
|
|
||||||
T *h_dst;
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStride * heightExt * sizeof(T)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStride * widthExt * sizeof(T)), NPPST_MEM_ALLOC_ERR);
|
|
||||||
memset(h_src, 0, srcStride * heightExt * sizeof(T));
|
|
||||||
memset(h_dst, 0, dstStride * widthExt * sizeof(T));
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStride * heightExt * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStride * widthExt * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);
|
|
||||||
NcvBool bPass = true;
|
|
||||||
for (Ncv32u i=0; i<srcRoi.height && bPass; i++)
|
|
||||||
{
|
|
||||||
for (Ncv32u j=0; j<srcRoi.width && bPass; j++)
|
|
||||||
{
|
|
||||||
if (h_src[i * srcStride + j] != h_dst[j * dstStride + i])
|
|
||||||
{
|
|
||||||
printf("CIntegralImage::transposeWrapperDevice self test failed: i=%d, j=%d, cpu=%d, gpu=%d\n", i, j, h_src[j * srcStride + i], h_dst[i * dstStride + j]);
|
|
||||||
bPass = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);
|
|
||||||
ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);
|
|
||||||
printf("CIntegralImage::transposeWrapperDevice %s\n", bPass?"PASSED":"FAILED");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1341,6 +1212,20 @@ implementNppTransposeHost(64,s)
|
|||||||
implementNppTransposeHost(64,f)
|
implementNppTransposeHost(64,f)
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
|
||||||
|
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi)
|
||||||
|
{
|
||||||
|
return transposeWrapperDevice<uint4>((uint4 *)d_src, srcStep, (uint4 *)d_dst, dstStep, srcRoi);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
|
||||||
|
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi)
|
||||||
|
{
|
||||||
|
return transposeWrapperHost<uint4>((uint4 *)d_src, srcStep, (uint4 *)d_dst, dstStep, srcRoi);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
//
|
//
|
||||||
// Compact.cu
|
// Compact.cu
|
||||||
|
@@ -96,7 +96,7 @@ cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
|||||||
* \return NCV status code
|
* \return NCV status code
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
||||||
Ncv32u *d_dst, Ncv32u dstStep,
|
Ncv32u *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -104,10 +104,10 @@ NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R
|
* \see nppiStDecimate_32u_C1R
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
||||||
Ncv32s *d_dst, Ncv32u dstStep,
|
Ncv32s *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -115,10 +115,10 @@ NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R
|
* \see nppiStDecimate_32u_C1R
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||||
Ncv32f *d_dst, Ncv32u dstStep,
|
Ncv32f *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -126,10 +126,10 @@ NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R
|
* \see nppiStDecimate_32u_C1R
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
||||||
Ncv64u *d_dst, Ncv32u dstStep,
|
Ncv64u *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -137,10 +137,10 @@ NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R
|
* \see nppiStDecimate_32u_C1R
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
||||||
Ncv64s *d_dst, Ncv32u dstStep,
|
Ncv64s *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -148,10 +148,10 @@ NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R
|
* \see nppiStDecimate_32u_C1R
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
||||||
Ncv64f *d_dst, Ncv32u dstStep,
|
Ncv64f *d_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale,
|
NcvSize32u srcRoi, Ncv32u scale,
|
||||||
NcvBool readThruTexture);
|
NcvBool readThruTexture);
|
||||||
@@ -170,57 +170,57 @@ NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
|||||||
* \return NCV status code
|
* \return NCV status code
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
|
||||||
Ncv32u *h_dst, Ncv32u dstStep,
|
Ncv32u *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
* \see nppiStDecimate_32u_C1R_host
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
|
||||||
Ncv32s *h_dst, Ncv32u dstStep,
|
Ncv32s *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
* \see nppiStDecimate_32u_C1R_host
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||||
Ncv32f *h_dst, Ncv32u dstStep,
|
Ncv32f *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
* \see nppiStDecimate_32u_C1R_host
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
|
||||||
Ncv64u *h_dst, Ncv32u dstStep,
|
Ncv64u *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
* \see nppiStDecimate_32u_C1R_host
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
|
||||||
Ncv64s *h_dst, Ncv32u dstStep,
|
Ncv64s *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
||||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
* \see nppiStDecimate_32u_C1R_host
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS
|
NCV_EXPORTS
|
||||||
NCVStatus nppiStDownsampleNearest_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
|
NCVStatus nppiStDecimate_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
|
||||||
Ncv64f *h_dst, Ncv32u dstStep,
|
Ncv64f *h_dst, Ncv32u dstStep,
|
||||||
NcvSize32u srcRoi, Ncv32u scale);
|
NcvSize32u srcRoi, Ncv32u scale);
|
||||||
|
|
||||||
@@ -333,6 +333,15 @@ NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
|
|||||||
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transposes an image. 128-bit pixels of any type, single channel
|
||||||
|
* \see nppiStTranspose_32u_C1R
|
||||||
|
*/
|
||||||
|
NCV_EXPORTS
|
||||||
|
NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
|
||||||
|
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
||||||
*
|
*
|
||||||
@@ -394,6 +403,15 @@ NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
|
|||||||
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transposes an image. 128-bit pixels of any type, single channel. Host implementation
|
||||||
|
* \see nppiStTranspose_32u_C1R_host
|
||||||
|
*/
|
||||||
|
NCV_EXPORTS
|
||||||
|
NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
|
||||||
|
void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the size of the temporary buffer for integral image creation
|
* Calculates the size of the temporary buffer for integral image creation
|
||||||
*
|
*
|
||||||
|
@@ -40,14 +40,9 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
|
||||||
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
|
||||||
|
|
||||||
|
|
||||||
#include <ios>
|
#include <ios>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
#include <vector>
|
||||||
#include "NCV.hpp"
|
#include "NCV.hpp"
|
||||||
|
|
||||||
|
|
||||||
@@ -182,6 +177,78 @@ NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NC
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
|
||||||
|
const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
|
||||||
|
Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream)
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
switch (dstType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
switch (srcType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
for (Ncv32u i=0; i<height; i++)
|
||||||
|
{
|
||||||
|
memcpy((char*)dst + i * dstPitch, (char*)src + i * srcPitch, widthbytes);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
switch (srcType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ncvStat;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//===================================================================
|
//===================================================================
|
||||||
//
|
//
|
||||||
// NCVMemStackAllocator class members implementation
|
// NCVMemStackAllocator class members implementation
|
||||||
@@ -195,8 +262,10 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
|
|||||||
_maxSize(0),
|
_maxSize(0),
|
||||||
allocBegin(NULL),
|
allocBegin(NULL),
|
||||||
begin(NULL),
|
begin(NULL),
|
||||||
|
end(NULL),
|
||||||
_memType(NCVMemoryTypeNone),
|
_memType(NCVMemoryTypeNone),
|
||||||
_alignment(alignment)
|
_alignment(alignment),
|
||||||
|
bReusesMemory(false)
|
||||||
{
|
{
|
||||||
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
|
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
|
||||||
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
|
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
|
||||||
@@ -573,4 +642,264 @@ double ncvEndQueryTimerMs(NcvTimer t)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// Operations with rectangles
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
//from OpenCV
|
||||||
|
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses,
|
||||||
|
Ncv32u &numHypotheses,
|
||||||
|
Ncv32u minNeighbors,
|
||||||
|
Ncv32f intersectEps,
|
||||||
|
NCVVector<Ncv32u> *hypothesesWeights)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(hypotheses.memType() == NCVMemoryTypeHostPageable ||
|
||||||
|
hypotheses.memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
if (hypothesesWeights != NULL)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(hypothesesWeights->memType() == NCVMemoryTypeHostPageable ||
|
||||||
|
hypothesesWeights->memType() == NCVMemoryTypeHostPinned, NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numHypotheses == 0)
|
||||||
|
{
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NcvRect32u> rects(numHypotheses);
|
||||||
|
memcpy(&rects[0], hypotheses.ptr(), numHypotheses * sizeof(NcvRect32u));
|
||||||
|
|
||||||
|
std::vector<Ncv32u> weights;
|
||||||
|
if (hypothesesWeights != NULL)
|
||||||
|
{
|
||||||
|
groupRectangles(rects, minNeighbors, intersectEps, &weights);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
groupRectangles(rects, minNeighbors, intersectEps, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
numHypotheses = (Ncv32u)rects.size();
|
||||||
|
if (numHypotheses > 0)
|
||||||
|
{
|
||||||
|
memcpy(hypotheses.ptr(), &rects[0], numHypotheses * sizeof(NcvRect32u));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hypothesesWeights != NULL)
|
||||||
|
{
|
||||||
|
memcpy(hypothesesWeights->ptr(), &weights[0], numHypotheses * sizeof(Ncv32u));
|
||||||
|
}
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
static NCVStatus drawRectsWrapperHost(T *h_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
T color)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(h_dst != NULL && h_rects != NULL, NCV_NULL_PTR);
|
||||||
|
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
||||||
|
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
||||||
|
ncvAssertReturn(numRects != 0, NCV_SUCCESS);
|
||||||
|
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
|
for (Ncv32u i=0; i<numRects; i++)
|
||||||
|
{
|
||||||
|
NcvRect32u rect = h_rects[i];
|
||||||
|
|
||||||
|
if (rect.x < dstWidth)
|
||||||
|
{
|
||||||
|
for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
|
||||||
|
{
|
||||||
|
h_dst[i*dstStride+rect.x] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rect.x+rect.width-1 < dstWidth)
|
||||||
|
{
|
||||||
|
for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
|
||||||
|
{
|
||||||
|
h_dst[i*dstStride+rect.x+rect.width-1] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rect.y < dstHeight)
|
||||||
|
{
|
||||||
|
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
||||||
|
{
|
||||||
|
h_dst[rect.y*dstStride+j] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rect.y + rect.height - 1 < dstHeight)
|
||||||
|
{
|
||||||
|
for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
|
||||||
|
{
|
||||||
|
h_dst[(rect.y+rect.height-1)*dstStride+j] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv8u color)
|
||||||
|
{
|
||||||
|
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv32u color)
|
||||||
|
{
|
||||||
|
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const Ncv32u NUMTHREADS_DRAWRECTS = 32;
|
||||||
|
const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__global__ void drawRects(T *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
T color)
|
||||||
|
{
|
||||||
|
Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
|
||||||
|
if (blockId > numRects * 4)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
NcvRect32u curRect = d_rects[blockId >> 2];
|
||||||
|
NcvBool bVertical = blockId & 0x1;
|
||||||
|
NcvBool bTopLeft = blockId & 0x2;
|
||||||
|
|
||||||
|
Ncv32u pt0x, pt0y;
|
||||||
|
if (bVertical)
|
||||||
|
{
|
||||||
|
Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
||||||
|
|
||||||
|
pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
|
||||||
|
pt0y = curRect.y;
|
||||||
|
|
||||||
|
if (pt0x < dstWidth)
|
||||||
|
{
|
||||||
|
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
||||||
|
{
|
||||||
|
Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
||||||
|
if (ptY < pt0y + curRect.height && ptY < dstHeight)
|
||||||
|
{
|
||||||
|
d_dst[ptY * dstStride + pt0x] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
|
||||||
|
|
||||||
|
pt0x = curRect.x;
|
||||||
|
pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
|
||||||
|
|
||||||
|
if (pt0y < dstHeight)
|
||||||
|
{
|
||||||
|
for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
|
||||||
|
{
|
||||||
|
Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
|
||||||
|
if (ptX < pt0x + curRect.width && ptX < dstWidth)
|
||||||
|
{
|
||||||
|
d_dst[pt0y * dstStride + ptX] = color;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
static NCVStatus drawRectsWrapperDevice(T *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
T color,
|
||||||
|
cudaStream_t cuStream)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
|
||||||
|
ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
|
||||||
|
ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
|
||||||
|
ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
|
if (numRects == 0)
|
||||||
|
{
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
dim3 grid(numRects * 4);
|
||||||
|
dim3 block(NUMTHREADS_DRAWRECTS);
|
||||||
|
if (grid.x > 65535)
|
||||||
|
{
|
||||||
|
grid.y = (grid.x + 65534) / 65535;
|
||||||
|
grid.x = 65535;
|
||||||
|
}
|
||||||
|
|
||||||
|
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
|
||||||
|
|
||||||
|
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv8u color,
|
||||||
|
cudaStream_t cuStream)
|
||||||
|
{
|
||||||
|
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv32u color,
|
||||||
|
cudaStream_t cuStream)
|
||||||
|
{
|
||||||
|
return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
|
||||||
|
}
|
@@ -129,8 +129,8 @@ struct NcvRect8u
|
|||||||
Ncv8u y;
|
Ncv8u y;
|
||||||
Ncv8u width;
|
Ncv8u width;
|
||||||
Ncv8u height;
|
Ncv8u height;
|
||||||
NcvRect8u() : x(0), y(0), width(0), height(0) {};
|
__host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {};
|
||||||
NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
|
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -140,8 +140,8 @@ struct NcvRect32s
|
|||||||
Ncv32s y; ///< y-coordinate of upper left corner.
|
Ncv32s y; ///< y-coordinate of upper left corner.
|
||||||
Ncv32s width; ///< Rectangle width.
|
Ncv32s width; ///< Rectangle width.
|
||||||
Ncv32s height; ///< Rectangle height.
|
Ncv32s height; ///< Rectangle height.
|
||||||
NcvRect32s() : x(0), y(0), width(0), height(0) {};
|
__host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {};
|
||||||
NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
|
__host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -151,8 +151,8 @@ struct NcvRect32u
|
|||||||
Ncv32u y; ///< y-coordinate of upper left corner.
|
Ncv32u y; ///< y-coordinate of upper left corner.
|
||||||
Ncv32u width; ///< Rectangle width.
|
Ncv32u width; ///< Rectangle width.
|
||||||
Ncv32u height; ///< Rectangle height.
|
Ncv32u height; ///< Rectangle height.
|
||||||
NcvRect32u() : x(0), y(0), width(0), height(0) {};
|
__host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {};
|
||||||
NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
|
__host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -160,8 +160,8 @@ struct NcvSize32s
|
|||||||
{
|
{
|
||||||
Ncv32s width; ///< Rectangle width.
|
Ncv32s width; ///< Rectangle width.
|
||||||
Ncv32s height; ///< Rectangle height.
|
Ncv32s height; ///< Rectangle height.
|
||||||
NcvSize32s() : width(0), height(0) {};
|
__host__ __device__ NcvSize32s() : width(0), height(0) {};
|
||||||
NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
|
__host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -169,8 +169,8 @@ struct NcvSize32u
|
|||||||
{
|
{
|
||||||
Ncv32u width; ///< Rectangle width.
|
Ncv32u width; ///< Rectangle width.
|
||||||
Ncv32u height; ///< Rectangle height.
|
Ncv32u height; ///< Rectangle height.
|
||||||
NcvSize32u() : width(0), height(0) {};
|
__host__ __device__ NcvSize32u() : width(0), height(0) {};
|
||||||
NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
|
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -275,6 +275,7 @@ enum NCVStatus
|
|||||||
{
|
{
|
||||||
//NCV statuses
|
//NCV statuses
|
||||||
NCV_SUCCESS,
|
NCV_SUCCESS,
|
||||||
|
NCV_UNKNOWN_ERROR,
|
||||||
|
|
||||||
NCV_CUDA_ERROR,
|
NCV_CUDA_ERROR,
|
||||||
NCV_NPP_ERROR,
|
NCV_NPP_ERROR,
|
||||||
@@ -501,13 +502,18 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy dispatcher
|
* Copy dispatchers
|
||||||
*/
|
*/
|
||||||
NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||||
const void *src, NCVMemoryType srcType,
|
const void *src, NCVMemoryType srcType,
|
||||||
size_t sz, cudaStream_t cuStream);
|
size_t sz, cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
|
||||||
|
const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
|
||||||
|
Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NCVVector (1D)
|
* NCVVector (1D)
|
||||||
*/
|
*/
|
||||||
@@ -532,7 +538,7 @@ public:
|
|||||||
_memtype = NCVMemoryTypeNone;
|
_memtype = NCVMemoryTypeNone;
|
||||||
}
|
}
|
||||||
|
|
||||||
NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
|
NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
|
||||||
{
|
{
|
||||||
if (howMuch == 0)
|
if (howMuch == 0)
|
||||||
{
|
{
|
||||||
@@ -600,7 +606,6 @@ public:
|
|||||||
this->_memtype = this->allocatedMem.begin.memtype;
|
this->_memtype = this->allocatedMem.begin.memtype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
~NCVVectorAlloc()
|
~NCVVectorAlloc()
|
||||||
{
|
{
|
||||||
NCVStatus ncvStat;
|
NCVStatus ncvStat;
|
||||||
@@ -611,19 +616,16 @@ public:
|
|||||||
this->clear();
|
this->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NcvBool isMemAllocated() const
|
NcvBool isMemAllocated() const
|
||||||
{
|
{
|
||||||
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Ncv32u getAllocatorsAlignment() const
|
Ncv32u getAllocatorsAlignment() const
|
||||||
{
|
{
|
||||||
return allocator.alignment();
|
return allocator.alignment();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NCVMemSegment getSegment() const
|
NCVMemSegment getSegment() const
|
||||||
{
|
{
|
||||||
return allocatedMem;
|
return allocatedMem;
|
||||||
@@ -658,7 +660,6 @@ public:
|
|||||||
this->bReused = true;
|
this->bReused = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
|
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
|
||||||
{
|
{
|
||||||
this->bReused = false;
|
this->bReused = false;
|
||||||
@@ -674,7 +675,6 @@ public:
|
|||||||
this->bReused = true;
|
this->bReused = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NcvBool isMemReused() const
|
NcvBool isMemReused() const
|
||||||
{
|
{
|
||||||
return this->bReused;
|
return this->bReused;
|
||||||
@@ -703,7 +703,6 @@ public:
|
|||||||
|
|
||||||
virtual ~NCVMatrix() {}
|
virtual ~NCVMatrix() {}
|
||||||
|
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
_ptr = NULL;
|
_ptr = NULL;
|
||||||
@@ -713,14 +712,13 @@ public:
|
|||||||
_memtype = NCVMemoryTypeNone;
|
_memtype = NCVMemoryTypeNone;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Ncv32u stride() const
|
Ncv32u stride() const
|
||||||
{
|
{
|
||||||
return _pitch / sizeof(T);
|
return _pitch / sizeof(T);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten
|
||||||
NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
|
NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
|
||||||
{
|
{
|
||||||
if (howMuch == 0)
|
if (howMuch == 0)
|
||||||
{
|
{
|
||||||
@@ -748,6 +746,24 @@ public:
|
|||||||
return ncvStat;
|
return ncvStat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const
|
||||||
|
{
|
||||||
|
ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height &&
|
||||||
|
dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR);
|
||||||
|
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
|
||||||
|
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
|
||||||
|
|
||||||
|
NCVStatus ncvStat = NCV_SUCCESS;
|
||||||
|
if (this->_memtype != NCVMemoryTypeNone)
|
||||||
|
{
|
||||||
|
ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype,
|
||||||
|
this->_ptr, this->_pitch, this->_memtype,
|
||||||
|
roi.width * sizeof(T), roi.height, cuStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ncvStat;
|
||||||
|
}
|
||||||
|
|
||||||
T *ptr() const {return this->_ptr;}
|
T *ptr() const {return this->_ptr;}
|
||||||
Ncv32u width() const {return this->_width;}
|
Ncv32u width() const {return this->_width;}
|
||||||
Ncv32u height() const {return this->_height;}
|
Ncv32u height() const {return this->_height;}
|
||||||
@@ -817,19 +833,16 @@ public:
|
|||||||
this->clear();
|
this->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NcvBool isMemAllocated() const
|
NcvBool isMemAllocated() const
|
||||||
{
|
{
|
||||||
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Ncv32u getAllocatorsAlignment() const
|
Ncv32u getAllocatorsAlignment() const
|
||||||
{
|
{
|
||||||
return allocator.alignment();
|
return allocator.alignment();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NCVMemSegment getSegment() const
|
NCVMemSegment getSegment() const
|
||||||
{
|
{
|
||||||
return allocatedMem;
|
return allocatedMem;
|
||||||
@@ -888,6 +901,23 @@ public:
|
|||||||
this->bReused = true;
|
this->bReused = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NCVMatrixReuse(const NCVMatrix<T> &mat, NcvRect32u roi)
|
||||||
|
{
|
||||||
|
this->bReused = false;
|
||||||
|
this->clear();
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \
|
||||||
|
roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(),
|
||||||
|
"NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
|
||||||
|
|
||||||
|
this->_width = roi.width;
|
||||||
|
this->_height = roi.height;
|
||||||
|
this->_pitch = mat.pitch();
|
||||||
|
this->_ptr = mat.ptr() + roi.y * mat.stride() + roi.x;
|
||||||
|
this->_memtype = mat.memType();
|
||||||
|
|
||||||
|
this->bReused = true;
|
||||||
|
}
|
||||||
|
|
||||||
NcvBool isMemReused() const
|
NcvBool isMemReused() const
|
||||||
{
|
{
|
||||||
@@ -899,4 +929,27 @@ private:
|
|||||||
NcvBool bReused;
|
NcvBool bReused;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Operations with rectangles
|
||||||
|
*/
|
||||||
|
NCV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
|
||||||
|
Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
|
||||||
|
|
||||||
|
|
||||||
|
NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
|
||||||
|
|
||||||
|
|
||||||
|
NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
|
||||||
|
|
||||||
|
|
||||||
|
NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
|
||||||
|
|
||||||
#endif // _ncv_hpp_
|
#endif // _ncv_hpp_
|
||||||
|
@@ -150,14 +150,14 @@ namespace NCVRuntimeTemplateBool
|
|||||||
{
|
{
|
||||||
//Convenience function used by the user
|
//Convenience function used by the user
|
||||||
//Takes a variable argument list, transforms it into a list
|
//Takes a variable argument list, transforms it into a list
|
||||||
static void call(Func *functor, int dummy, ...)
|
static void call(Func *functor, ...)
|
||||||
{
|
{
|
||||||
//Vector used to collect arguments
|
//Vector used to collect arguments
|
||||||
std::vector<int> templateParamList;
|
std::vector<int> templateParamList;
|
||||||
|
|
||||||
//Variable argument list manipulation
|
//Variable argument list manipulation
|
||||||
va_list listPointer;
|
va_list listPointer;
|
||||||
va_start(listPointer, dummy);
|
va_start(listPointer, functor);
|
||||||
//Collect parameters into the list
|
//Collect parameters into the list
|
||||||
for(int i=0; i<NumArguments; i++)
|
for(int i=0; i<NumArguments; i++)
|
||||||
{
|
{
|
||||||
|
@@ -134,7 +134,7 @@ bool TestHypothesesFilter::process()
|
|||||||
|
|
||||||
Ncv32u numHypothesesSrc = h_vecSrc.length();
|
Ncv32u numHypothesesSrc = h_vecSrc.length();
|
||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
ncvStat = ncvFilterHypotheses_host(h_vecSrc, numHypothesesSrc, this->minNeighbors, this->eps, NULL);
|
ncvStat = ncvGroupRectangles_host(h_vecSrc, numHypothesesSrc, this->minNeighbors, this->eps, NULL);
|
||||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
|
||||||
NCV_SKIP_COND_END
|
NCV_SKIP_COND_END
|
||||||
|
|
||||||
|
@@ -83,14 +83,14 @@ bool TestResize<T>::process()
|
|||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
if (sizeof(T) == sizeof(Ncv32u))
|
if (sizeof(T) == sizeof(Ncv32u))
|
||||||
{
|
{
|
||||||
ncvStat = nppiStDownsampleNearest_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
|
ncvStat = nppiStDecimate_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
|
||||||
(Ncv32u *)d_small.ptr(), d_small.pitch(),
|
(Ncv32u *)d_small.ptr(), d_small.pitch(),
|
||||||
srcSize, this->scaleFactor,
|
srcSize, this->scaleFactor,
|
||||||
this->bTextureCache);
|
this->bTextureCache);
|
||||||
}
|
}
|
||||||
else if (sizeof(T) == sizeof(Ncv64u))
|
else if (sizeof(T) == sizeof(Ncv64u))
|
||||||
{
|
{
|
||||||
ncvStat = nppiStDownsampleNearest_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
|
ncvStat = nppiStDecimate_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
|
||||||
(Ncv64u *)d_small.ptr(), d_small.pitch(),
|
(Ncv64u *)d_small.ptr(), d_small.pitch(),
|
||||||
srcSize, this->scaleFactor,
|
srcSize, this->scaleFactor,
|
||||||
this->bTextureCache);
|
this->bTextureCache);
|
||||||
@@ -107,13 +107,13 @@ bool TestResize<T>::process()
|
|||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
if (sizeof(T) == sizeof(Ncv32u))
|
if (sizeof(T) == sizeof(Ncv32u))
|
||||||
{
|
{
|
||||||
ncvStat = nppiStDownsampleNearest_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
|
ncvStat = nppiStDecimate_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
|
||||||
(Ncv32u *)h_small.ptr(), h_small.pitch(),
|
(Ncv32u *)h_small.ptr(), h_small.pitch(),
|
||||||
srcSize, this->scaleFactor);
|
srcSize, this->scaleFactor);
|
||||||
}
|
}
|
||||||
else if (sizeof(T) == sizeof(Ncv64u))
|
else if (sizeof(T) == sizeof(Ncv64u))
|
||||||
{
|
{
|
||||||
ncvStat = nppiStDownsampleNearest_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
|
ncvStat = nppiStDecimate_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
|
||||||
(Ncv64u *)h_small.ptr(), h_small.pitch(),
|
(Ncv64u *)h_small.ptr(), h_small.pitch(),
|
||||||
srcSize, this->scaleFactor);
|
srcSize, this->scaleFactor);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user