added additional error check
This commit is contained in:
parent
79cc05d062
commit
ddf56fa629
@ -876,12 +876,12 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
NcvRect32u dstROI (0, 0, level_width, level_height);
|
NcvRect32u dstROI (0, 0, level_width, level_height);
|
||||||
|
|
||||||
// frame 0
|
// frame 0
|
||||||
nppiStResize_32f_C1R (I0->ptr(), srcSize, prev_level_pitch, srcROI,
|
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (I0->ptr(), srcSize, prev_level_pitch, srcROI,
|
||||||
level_frame0->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample);
|
level_frame0->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample) );
|
||||||
|
|
||||||
// frame 1
|
// frame 1
|
||||||
nppiStResize_32f_C1R (I1->ptr(), srcSize, prev_level_pitch, srcROI,
|
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (I1->ptr(), srcSize, prev_level_pitch, srcROI,
|
||||||
level_frame1->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample);
|
level_frame1->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample) );
|
||||||
}
|
}
|
||||||
|
|
||||||
I0 = level_frame0.release();
|
I0 = level_frame0.release();
|
||||||
@ -962,32 +962,32 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
NcvRect32u oROI(0, 0, kLevelWidth, kLevelHeight);
|
NcvRect32u oROI(0, 0, kLevelWidth, kLevelHeight);
|
||||||
|
|
||||||
// Ix0
|
// Ix0
|
||||||
nppiStFilterRowBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Ix0.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Ix0.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Iy0
|
// Iy0
|
||||||
nppiStFilterColumnBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Iy0.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Iy0.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Ix
|
// Ix
|
||||||
nppiStFilterRowBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Ix.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Ix.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Iy
|
// Iy
|
||||||
nppiStFilterColumnBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Iy.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Iy.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Ixx
|
// Ixx
|
||||||
nppiStFilterRowBorder_32f_C1R (Ix.ptr(), srcSize, nSrcStep, Ixx.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (Ix.ptr(), srcSize, nSrcStep, Ixx.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Iyy
|
// Iyy
|
||||||
nppiStFilterColumnBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Iyy.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Iyy.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
// Ixy
|
// Ixy
|
||||||
nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI,
|
ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI,
|
||||||
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f);
|
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
|
||||||
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
|
||||||
@ -1029,6 +1029,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
kLevelStride,
|
kLevelStride,
|
||||||
alpha,
|
alpha,
|
||||||
gamma);
|
gamma);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
@ -1039,6 +1041,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
prepare_sor_stage_2<<<psor_blocks, psor_threads, 0, stream>>>(denom_u.ptr(), denom_v.ptr(), kLevelWidth, kLevelHeight, kLevelStride);
|
prepare_sor_stage_2<<<psor_blocks, psor_threads, 0, stream>>>(denom_u.ptr(), denom_v.ptr(), kLevelWidth, kLevelHeight, kLevelStride);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
// linear system coefficients
|
// linear system coefficients
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
@ -1073,6 +1077,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
kLevelHeight,
|
kLevelHeight,
|
||||||
kLevelStride);
|
kLevelStride);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
@ -1089,6 +1095,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
kLevelHeight,
|
kLevelHeight,
|
||||||
kLevelStride);
|
kLevelStride);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
|
||||||
}//end of solver loop
|
}//end of solver loop
|
||||||
@ -1096,7 +1104,9 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
|
|
||||||
//update u and v
|
//update u and v
|
||||||
add(ptrU->ptr(), du.ptr(), kLevelSizeInPixels, stream);
|
add(ptrU->ptr(), du.ptr(), kLevelSizeInPixels, stream);
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
add(ptrV->ptr(), dv.ptr(), kLevelSizeInPixels, stream);
|
add(ptrV->ptr(), dv.ptr(), kLevelSizeInPixels, stream);
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
//prolongate using texture
|
//prolongate using texture
|
||||||
pyr.w.pop_back();
|
pyr.w.pop_back();
|
||||||
@ -1116,15 +1126,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight);
|
NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight);
|
||||||
NcvRect32u dstROI (0, 0, nw, nh);
|
NcvRect32u dstROI (0, 0, nw, nh);
|
||||||
|
|
||||||
nppiStResize_32f_C1R (ptrU->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
|
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrU->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
|
||||||
ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic);
|
ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
|
||||||
|
|
||||||
ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
|
ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
nppiStResize_32f_C1R (ptrV->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
|
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrV->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
|
||||||
ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic);
|
ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
|
||||||
|
|
||||||
ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
|
ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
|
||||||
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
cv::gpu::device::swap<FloatVector*>(ptrU, ptrUNew);
|
cv::gpu::device::swap<FloatVector*>(ptrU, ptrUNew);
|
||||||
cv::gpu::device::swap<FloatVector*>(ptrV, ptrVNew);
|
cv::gpu::device::swap<FloatVector*>(ptrV, ptrVNew);
|
||||||
@ -1143,7 +1155,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
(vOut.ptr(), vOut.pitch(), ptrV->ptr(),
|
(vOut.ptr(), vOut.pitch(), ptrV->ptr(),
|
||||||
kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR );
|
kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR );
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
|
||||||
ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR);
|
ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,7 +315,8 @@ NCVStatus scanRowsWrapperDevice(T_in *d_src, Ncv32u srcStride,
|
|||||||
<T_in, T_out, tbDoSqr>
|
<T_in, T_out, tbDoSqr>
|
||||||
<<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>
|
<<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, (Ncv32u)alignmentOffset, roi.width, srcStride, d_dst, dstStride);
|
(d_src, (Ncv32u)alignmentOffset, roi.width, srcStride, d_dst, dstStride);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -768,7 +769,7 @@ static NCVStatus decimateWrapperDevice(T *d_src, Ncv32u srcStep,
|
|||||||
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
(d_src, srcStep, d_dst, dstStep, dstRoi, scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -997,7 +998,7 @@ NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
|
|||||||
(NULL, sumStep, NULL, sqsumStep, d_norm, normStep, roi, rect, invRectArea);
|
(NULL, sumStep, NULL, sqsumStep, d_norm, normStep, roi, rect, invRectArea);
|
||||||
}
|
}
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -1157,7 +1158,7 @@ NCVStatus transposeWrapperDevice(T *d_src, Ncv32u srcStride,
|
|||||||
<T>
|
<T>
|
||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, srcStride, d_dst, dstStride, srcRoi);
|
(d_src, srcStride, d_dst, dstStride, srcRoi);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -1407,7 +1408,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
d_hierSums.ptr(),
|
d_hierSums.ptr(),
|
||||||
d_hierSums.ptr() + partSumOffsets[1],
|
d_hierSums.ptr() + partSumOffsets[1],
|
||||||
elemRemove);
|
elemRemove);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
//calculate hierarchical partial sums
|
//calculate hierarchical partial sums
|
||||||
for (Ncv32u i=1; i<partSumNums.size()-1; i++)
|
for (Ncv32u i=1; i<partSumNums.size()-1; i++)
|
||||||
@ -1438,7 +1440,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
NULL,
|
NULL,
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
//adjust hierarchical partial sums
|
//adjust hierarchical partial sums
|
||||||
@ -1454,7 +1457,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],
|
(d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],
|
||||||
d_hierSums.ptr() + partSumOffsets[i+1]);
|
d_hierSums.ptr() + partSumOffsets[i+1]);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1466,7 +1470,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
(d_src, srcLen,
|
(d_src, srcLen,
|
||||||
d_hierSums.ptr(),
|
d_hierSums.ptr(),
|
||||||
NULL, elemRemove);
|
NULL, elemRemove);
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
//compact source vector using indices
|
//compact source vector using indices
|
||||||
@ -1480,7 +1485,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, srcLen, d_hierSums.ptr(), d_dst,
|
(d_src, srcLen, d_hierSums.ptr(), d_dst,
|
||||||
elemRemove, d_numDstElements.ptr());
|
elemRemove, d_numDstElements.ptr());
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
//get number of dst elements
|
//get number of dst elements
|
||||||
if (dstLenPinned != NULL)
|
if (dstLenPinned != NULL)
|
||||||
@ -1773,6 +1779,7 @@ NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
|
|||||||
case nppStBorderMirror:
|
case nppStBorderMirror:
|
||||||
FilterRowBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
FilterRowBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||||
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return NPPST_ERROR;
|
return NPPST_ERROR;
|
||||||
@ -1842,6 +1849,7 @@ NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
|
|||||||
case nppStBorderMirror:
|
case nppStBorderMirror:
|
||||||
FilterColumnBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
FilterColumnBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
|
||||||
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return NPPST_ERROR;
|
return NPPST_ERROR;
|
||||||
@ -1946,7 +1954,7 @@ NCVStatus BlendFrames(const Ncv32f *src0,
|
|||||||
BlendFramesKernel<<<blocks, threads, 0, nppStGetActiveCUDAstream ()>>>
|
BlendFramesKernel<<<blocks, threads, 0, nppStGetActiveCUDAstream ()>>>
|
||||||
(ufi, vfi, ubi, vbi, o1, o2, width, height, stride, theta, out);
|
(ufi, vfi, ubi, vbi, o1, o2, width, height, stride, theta, out);
|
||||||
|
|
||||||
ncvAssertCUDAReturn (cudaGetLastError (), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -2262,6 +2270,8 @@ NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
|
|||||||
ForwardWarpKernel_PSF1x1 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
ForwardWarpKernel_PSF1x1 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pDst);
|
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pDst);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2294,12 +2304,18 @@ NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
|
|||||||
MemsetKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
MemsetKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(0, srcSize.width, srcSize.height, pBuffer);
|
(0, srcSize.width, srcSize.height, pBuffer);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
ForwardWarpKernel_PSF2x2 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
ForwardWarpKernel_PSF2x2 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pBuffer, pDst);
|
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pBuffer, pDst);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
NormalizeKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
NormalizeKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(pBuffer, srcSize.width, srcSize.height, srcStep, pDst);
|
(pBuffer, srcSize.width, srcSize.height, srcStep, pDst);
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return NPPST_SUCCESS;
|
return NPPST_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2557,5 +2573,7 @@ NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
|
|||||||
status = NPPST_ERROR;
|
status = NPPST_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -874,7 +874,7 @@ static NCVStatus drawRectsWrapperDevice(T *d_dst,
|
|||||||
|
|
||||||
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
|
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
|
||||||
|
|
||||||
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
|
||||||
|
|
||||||
return NCV_SUCCESS;
|
return NCV_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -285,9 +285,16 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
|||||||
#define ncvAssertCUDAReturn(cudacall, errCode) \
|
#define ncvAssertCUDAReturn(cudacall, errCode) \
|
||||||
do \
|
do \
|
||||||
{ \
|
{ \
|
||||||
cudaError_t resCall = cudacall; \
|
cudaError_t res = cudacall; \
|
||||||
cudaError_t resGLE = cudaGetLastError(); \
|
ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
|
||||||
ncvAssertPrintReturn(cudaSuccess==resCall && cudaSuccess==resGLE, "cudaError_t=" << (int)(resCall | resGLE), errCode); \
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertCUDALastErrorReturn(errCode) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
cudaError_t res = cudaGetLastError(); \
|
||||||
|
ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user