added additional error check

This commit is contained in:
Vladislav Vinogradov 2012-01-25 07:32:35 +00:00
parent 79cc05d062
commit ddf56fa629
4 changed files with 73 additions and 37 deletions

View File

@ -876,12 +876,12 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
NcvRect32u dstROI (0, 0, level_width, level_height); NcvRect32u dstROI (0, 0, level_width, level_height);
// frame 0 // frame 0
nppiStResize_32f_C1R (I0->ptr(), srcSize, prev_level_pitch, srcROI, ncvAssertReturnNcvStat( nppiStResize_32f_C1R (I0->ptr(), srcSize, prev_level_pitch, srcROI,
level_frame0->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample); level_frame0->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample) );
// frame 1 // frame 1
nppiStResize_32f_C1R (I1->ptr(), srcSize, prev_level_pitch, srcROI, ncvAssertReturnNcvStat( nppiStResize_32f_C1R (I1->ptr(), srcSize, prev_level_pitch, srcROI,
level_frame1->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample); level_frame1->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample) );
} }
I0 = level_frame0.release(); I0 = level_frame0.release();
@ -962,32 +962,32 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
NcvRect32u oROI(0, 0, kLevelWidth, kLevelHeight); NcvRect32u oROI(0, 0, kLevelWidth, kLevelHeight);
// Ix0 // Ix0
nppiStFilterRowBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Ix0.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Ix0.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Iy0 // Iy0
nppiStFilterColumnBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Iy0.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (I0->ptr(), srcSize, nSrcStep, Iy0.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Ix // Ix
nppiStFilterRowBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Ix.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Ix.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Iy // Iy
nppiStFilterColumnBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Iy.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (I1->ptr(), srcSize, nSrcStep, Iy.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Ixx // Ixx
nppiStFilterRowBorder_32f_C1R (Ix.ptr(), srcSize, nSrcStep, Ixx.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (Ix.ptr(), srcSize, nSrcStep, Ixx.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Iyy // Iyy
nppiStFilterColumnBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Iyy.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterColumnBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Iyy.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
// Ixy // Ixy
nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI, ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI,
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) );
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);
@ -1029,6 +1029,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
kLevelStride, kLevelStride,
alpha, alpha,
gamma); gamma);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
@ -1039,6 +1041,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
prepare_sor_stage_2<<<psor_blocks, psor_threads, 0, stream>>>(denom_u.ptr(), denom_v.ptr(), kLevelWidth, kLevelHeight, kLevelStride); prepare_sor_stage_2<<<psor_blocks, psor_threads, 0, stream>>>(denom_u.ptr(), denom_v.ptr(), kLevelWidth, kLevelHeight, kLevelStride);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
// linear system coefficients // linear system coefficients
ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
@ -1073,6 +1077,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
kLevelHeight, kLevelHeight,
kLevelStride); kLevelStride);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
@ -1089,6 +1095,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
kLevelHeight, kLevelHeight,
kLevelStride); kLevelStride);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);
}//end of solver loop }//end of solver loop
@ -1096,7 +1104,9 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
//update u and v //update u and v
add(ptrU->ptr(), du.ptr(), kLevelSizeInPixels, stream); add(ptrU->ptr(), du.ptr(), kLevelSizeInPixels, stream);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
add(ptrV->ptr(), dv.ptr(), kLevelSizeInPixels, stream); add(ptrV->ptr(), dv.ptr(), kLevelSizeInPixels, stream);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
//prolongate using texture //prolongate using texture
pyr.w.pop_back(); pyr.w.pop_back();
@ -1116,15 +1126,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight); NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight);
NcvRect32u dstROI (0, 0, nw, nh); NcvRect32u dstROI (0, 0, nw, nh);
nppiStResize_32f_C1R (ptrU->ptr(), srcSize, kLevelStride * sizeof (float), srcROI, ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrU->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic); ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream); ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
nppiStResize_32f_C1R (ptrV->ptr(), srcSize, kLevelStride * sizeof (float), srcROI, ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrV->ptr(), srcSize, kLevelStride * sizeof (float), srcROI,
ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic); ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream); ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
cv::gpu::device::swap<FloatVector*>(ptrU, ptrUNew); cv::gpu::device::swap<FloatVector*>(ptrU, ptrUNew);
cv::gpu::device::swap<FloatVector*>(ptrV, ptrVNew); cv::gpu::device::swap<FloatVector*>(ptrV, ptrVNew);
@ -1143,7 +1155,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
(vOut.ptr(), vOut.pitch(), ptrV->ptr(), (vOut.ptr(), vOut.pitch(), ptrV->ptr(),
kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR );
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR);
} }

View File

@ -315,7 +315,8 @@ NCVStatus scanRowsWrapperDevice(T_in *d_src, Ncv32u srcStride,
<T_in, T_out, tbDoSqr> <T_in, T_out, tbDoSqr>
<<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>> <<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>
(d_src, (Ncv32u)alignmentOffset, roi.width, srcStride, d_dst, dstStride); (d_src, (Ncv32u)alignmentOffset, roi.width, srcStride, d_dst, dstStride);
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -768,7 +769,7 @@ static NCVStatus decimateWrapperDevice(T *d_src, Ncv32u srcStep,
(d_src, srcStep, d_dst, dstStep, dstRoi, scale); (d_src, srcStep, d_dst, dstStep, dstRoi, scale);
} }
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR); ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -997,7 +998,7 @@ NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
(NULL, sumStep, NULL, sqsumStep, d_norm, normStep, roi, rect, invRectArea); (NULL, sumStep, NULL, sqsumStep, d_norm, normStep, roi, rect, invRectArea);
} }
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR); ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -1157,7 +1158,7 @@ NCVStatus transposeWrapperDevice(T *d_src, Ncv32u srcStride,
<T> <T>
<<<grid, block, 0, nppStGetActiveCUDAstream()>>> <<<grid, block, 0, nppStGetActiveCUDAstream()>>>
(d_src, srcStride, d_dst, dstStride, srcRoi); (d_src, srcStride, d_dst, dstStride, srcRoi);
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR); ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -1407,7 +1408,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
d_hierSums.ptr(), d_hierSums.ptr(),
d_hierSums.ptr() + partSumOffsets[1], d_hierSums.ptr() + partSumOffsets[1],
elemRemove); elemRemove);
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
//calculate hierarchical partial sums //calculate hierarchical partial sums
for (Ncv32u i=1; i<partSumNums.size()-1; i++) for (Ncv32u i=1; i<partSumNums.size()-1; i++)
@ -1438,7 +1440,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
NULL, NULL,
NULL); NULL);
} }
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
} }
//adjust hierarchical partial sums //adjust hierarchical partial sums
@ -1454,7 +1457,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
<<<grid, block, 0, nppStGetActiveCUDAstream()>>> <<<grid, block, 0, nppStGetActiveCUDAstream()>>>
(d_hierSums.ptr() + partSumOffsets[i], partSumNums[i], (d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],
d_hierSums.ptr() + partSumOffsets[i+1]); d_hierSums.ptr() + partSumOffsets[i+1]);
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
} }
} }
else else
@ -1466,7 +1470,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
(d_src, srcLen, (d_src, srcLen,
d_hierSums.ptr(), d_hierSums.ptr(),
NULL, elemRemove); NULL, elemRemove);
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
} }
//compact source vector using indices //compact source vector using indices
@ -1480,7 +1485,8 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
<<<grid, block, 0, nppStGetActiveCUDAstream()>>> <<<grid, block, 0, nppStGetActiveCUDAstream()>>>
(d_src, srcLen, d_hierSums.ptr(), d_dst, (d_src, srcLen, d_hierSums.ptr(), d_dst,
elemRemove, d_numDstElements.ptr()); elemRemove, d_numDstElements.ptr());
ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
//get number of dst elements //get number of dst elements
if (dstLenPinned != NULL) if (dstLenPinned != NULL)
@ -1773,6 +1779,7 @@ NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
case nppStBorderMirror: case nppStBorderMirror:
FilterRowBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>> FilterRowBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier); (srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
break; break;
default: default:
return NPPST_ERROR; return NPPST_ERROR;
@ -1842,6 +1849,7 @@ NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
case nppStBorderMirror: case nppStBorderMirror:
FilterColumnBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>> FilterColumnBorderMirror_32f_C1R <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream ()>>>
(srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier); (srcStep, pDst, dstSize, dstStep, oROI, nKernelSize, nAnchor, multiplier);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
break; break;
default: default:
return NPPST_ERROR; return NPPST_ERROR;
@ -1946,7 +1954,7 @@ NCVStatus BlendFrames(const Ncv32f *src0,
BlendFramesKernel<<<blocks, threads, 0, nppStGetActiveCUDAstream ()>>> BlendFramesKernel<<<blocks, threads, 0, nppStGetActiveCUDAstream ()>>>
(ufi, vfi, ubi, vbi, o1, o2, width, height, stride, theta, out); (ufi, vfi, ubi, vbi, o1, o2, width, height, stride, theta, out);
ncvAssertCUDAReturn (cudaGetLastError (), NPPST_CUDA_KERNEL_EXECUTION_ERROR); ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -2262,6 +2270,8 @@ NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
ForwardWarpKernel_PSF1x1 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>> ForwardWarpKernel_PSF1x1 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pDst); (pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pDst);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -2294,12 +2304,18 @@ NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
MemsetKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>> MemsetKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
(0, srcSize.width, srcSize.height, pBuffer); (0, srcSize.width, srcSize.height, pBuffer);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
ForwardWarpKernel_PSF2x2 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>> ForwardWarpKernel_PSF2x2 <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
(pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pBuffer, pDst); (pU, pV, pSrc, srcSize.width, srcSize.height, vfStep, srcStep, timeScale, pBuffer, pDst);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
NormalizeKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>> NormalizeKernel <<<gridSize, ctaSize, 0, nppStGetActiveCUDAstream()>>>
(pBuffer, srcSize.width, srcSize.height, srcStep, pDst); (pBuffer, srcSize.width, srcSize.height, srcStep, pDst);
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
@ -2557,5 +2573,7 @@ NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
status = NPPST_ERROR; status = NPPST_ERROR;
} }
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
return status; return status;
} }

View File

@ -874,7 +874,7 @@ static NCVStatus drawRectsWrapperDevice(T *d_dst,
drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color); drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR); ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
return NCV_SUCCESS; return NCV_SUCCESS;
} }

View File

@ -285,9 +285,16 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
#define ncvAssertCUDAReturn(cudacall, errCode) \ #define ncvAssertCUDAReturn(cudacall, errCode) \
do \ do \
{ \ { \
cudaError_t resCall = cudacall; \ cudaError_t res = cudacall; \
cudaError_t resGLE = cudaGetLastError(); \ ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
ncvAssertPrintReturn(cudaSuccess==resCall && cudaSuccess==resGLE, "cudaError_t=" << (int)(resCall | resGLE), errCode); \ } while (0)
#define ncvAssertCUDALastErrorReturn(errCode) \
do \
{ \
cudaError_t res = cudaGetLastError(); \
ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
} while (0) } while (0)