diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index da9fcf6ac..1b7efb985 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1485,6 +1485,65 @@ namespace cv GpuMat maxPosBuffer; }; + ////////////////////////////////// Optical Flow ////////////////////////////////////////// + + class CV_EXPORTS BroxOpticalFlow + { + public: + BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) : + alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_), + inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_) + { + } + + //! Compute optical flow + //! frame0 - source frame (supports only CV_32FC1 type) + //! frame1 - frame to track (with the same size and type as frame0) + //! u - flow horizontal component (along x axis) + //! v - flow vertical component (along y axis) + void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null()); + + //! flow smoothness + float alpha; + + //! gradient constancy importance + float gamma; + + //! pyramid scale factor + float scale_factor; + + //! number of lagged non-linearity iterations (inner loop) + int inner_iterations; + + //! number of warping iterations (number of pyramid levels) + int outer_iterations; + + //! number of linear system solver iterations + int solver_iterations; + + GpuMat buf; + }; + + //! Interpolate frames (images) using provided optical flow (displacement field). + //! frame0 - frame 0 (32-bit floating point images, single channel) + //! frame1 - frame 1 (the same type and size) + //! fu - forward horizontal displacement + //! fv - forward vertical displacement + //! bu - backward horizontal displacement + //! bv - backward vertical displacement + //! pos - new frame position + //! newFrame - new frame + //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat; + //! occlusion masks 0, occlusion masks 1, + //! interpolated forward flow 0, interpolated forward flow 1, + //! interpolated backward flow 0, interpolated backward flow 1 + //! + CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, + const GpuMat& fu, const GpuMat& fv, + const GpuMat& bu, const GpuMat& bv, + float pos, GpuMat& newFrame, GpuMat& buf, + Stream& stream = Stream::Null()); + } //! Speckle filtering - filters small connected components on diparity image. diff --git a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu index bd11432ef..c08fe6dad 100644 --- a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu +++ b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu @@ -65,76 +65,6 @@ #include "opencv2/gpu/device/utility.hpp" -//////////////////////////////////////////// -template class shared_ptr -{ -public: - shared_ptr() : obj(0), refcount(0) {} - shared_ptr(_Tp* _obj); - ~shared_ptr() { release(); } - shared_ptr(const shared_ptr& ptr); - shared_ptr& operator = (const shared_ptr& ptr); - void addref() { if( refcount ) (*refcount)+=1; } - void release(); - void delete_obj() { if( obj ) delete obj; } - _Tp* operator -> () { return obj; } - const _Tp* operator -> () const { return obj; } - operator _Tp* () { return obj; } - operator const _Tp*() const { return obj; } -protected: - _Tp* obj; //< the object pointer. - int* refcount; //< the associated reference counter -}; - -template inline shared_ptr<_Tp>::shared_ptr(_Tp* _obj) : obj(_obj) -{ - if(obj) - { - refcount = new int; - *refcount = 1; - } - else - refcount = 0; -} - -template inline void shared_ptr<_Tp>::release() -{ - if( refcount) - { - *refcount -= 1; - if (*refcount == 0) - { - delete_obj(); - delete refcount; - } - } - refcount = 0; - obj = 0; -} - -template inline shared_ptr<_Tp>::shared_ptr(const shared_ptr<_Tp>& ptr) -{ - obj = ptr.obj; - refcount = ptr.refcount; - addref(); -} - -template inline shared_ptr<_Tp>& shared_ptr<_Tp>::operator = (const shared_ptr<_Tp>& ptr) -{ - int* _refcount = ptr.refcount; - if( _refcount ) - *_refcount += 1; - - release(); - obj = ptr.obj; - refcount = _refcount; - return *this; -} - - -//////////////////////////////////////////// -//using std::tr1::shared_ptr; - typedef NCVVectorAlloc FloatVector; ///////////////////////////////////////////////////////////////////////////////////////// @@ -738,6 +668,42 @@ void InitTextures() initTexture1D(tex_numerator_v); } +namespace +{ + struct ImagePyramid + { + std::vector img0; + std::vector img1; + + std::vector w; + std::vector h; + + explicit ImagePyramid(int outer_iterations) + { + img0.reserve(outer_iterations); + img1.reserve(outer_iterations); + + w.reserve(outer_iterations); + h.reserve(outer_iterations); + } + + ~ImagePyramid() + { + w.clear(); + h.clear(); + + for (int i = img0.size() - 1; i >= 0; --i) + { + delete img1[i]; + delete img0[i]; + } + + img0.clear(); + img1.clear(); + } + }; +} + ///////////////////////////////////////////////////////////////////////////////////////// // MAIN FUNCTION ///////////////////////////////////////////////////////////////////////////////////////// @@ -759,21 +725,19 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, const Ncv32u kSourceHeight = frame0.height(); ncvAssertPrintReturn(frame1.width() == kSourceWidth && frame1.height() == kSourceHeight, "Frame dims do not match", NCV_INCONSISTENT_INPUT); - ncvAssertReturn(uOut.width() == kSourceWidth && vOut.width() == kSourceWidth && + ncvAssertReturn(uOut.width() == kSourceWidth && vOut.width() == kSourceWidth && uOut.height() == kSourceHeight && vOut.height() == kSourceHeight, NCV_INCONSISTENT_INPUT); ncvAssertReturn(gpu_mem_allocator.isInitialized(), NCV_ALLOCATOR_NOT_INITIALIZED); bool kSkipProcessing = gpu_mem_allocator.isCounting(); - cudaDeviceProp device_props; int cuda_device; - ncvAssertCUDAReturn(cudaGetDevice(&cuda_device), NCV_CUDA_ERROR); + cudaDeviceProp device_props; ncvAssertCUDAReturn(cudaGetDeviceProperties(&device_props, cuda_device), NCV_CUDA_ERROR); - Ncv32u alignmentValue = gpu_mem_allocator.alignment (); const Ncv32u kStrideAlignmentFloat = alignmentValue / sizeof(float); @@ -817,8 +781,7 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, // temporary storage SAFE_VECTOR_DECL(device_buffer, gpu_mem_allocator, - alignUp(kSourceWidth, kStrideAlignmentFloat) - * alignUp(kSourceHeight, kStrideAlignmentFloat)); + alignUp(kSourceWidth, kStrideAlignmentFloat) * alignUp(kSourceHeight, kStrideAlignmentFloat)); // image derivatives SAFE_VECTOR_DECL(Ix, gpu_mem_allocator, kSizeInPixelsAligned); @@ -831,35 +794,31 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, // spatial derivative filter size const int kDFilterSize = 5; - const float derivativeFilterHost[kDFilterSize] = {1.0f, -8.0f, 0.0f, 8.0f, -1.0f}; SAFE_VECTOR_DECL(derivativeFilter, gpu_mem_allocator, kDFilterSize); - ncvAssertCUDAReturn( - cudaMemcpy(derivativeFilter.ptr(), - derivativeFilterHost, - sizeof(float) * kDFilterSize, - cudaMemcpyHostToDevice), - NCV_CUDA_ERROR); + if (!kSkipProcessing) + { + const float derivativeFilterHost[kDFilterSize] = {1.0f, -8.0f, 0.0f, 8.0f, -1.0f}; - InitTextures(); + ncvAssertCUDAReturn(cudaMemcpy(derivativeFilter.ptr(), derivativeFilterHost, sizeof(float) * kDFilterSize, + cudaMemcpyHostToDevice), NCV_CUDA_ERROR); + + InitTextures(); + } //prepare image pyramid - std::vector< shared_ptr > img0_pyramid; - std::vector< shared_ptr > img1_pyramid; - - std::vector w_pyramid; - std::vector h_pyramid; + ImagePyramid pyr(desc.number_of_outer_iterations); cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); float scale = 1.0f; //cuda arrays for frames - shared_ptr I0(new FloatVector(gpu_mem_allocator, kSizeInPixelsAligned)); - ncvAssertReturn(I0->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); + std::auto_ptr pI0(new FloatVector(gpu_mem_allocator, kSizeInPixelsAligned)); + ncvAssertReturn(pI0->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - shared_ptr I1(new FloatVector(gpu_mem_allocator, kSizeInPixelsAligned)); - ncvAssertReturn(I1->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); + std::auto_ptr pI1(new FloatVector(gpu_mem_allocator, kSizeInPixelsAligned)); + ncvAssertReturn(pI1->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); if (!kSkipProcessing) { @@ -867,25 +826,29 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, size_t dst_width_in_bytes = alignUp(kSourceWidth, kStrideAlignmentFloat) * sizeof(float); size_t src_width_in_bytes = kSourceWidth * sizeof(float); size_t src_pitch_in_bytes = frame0.pitch(); - ncvAssertCUDAReturn( cudaMemcpy2DAsync(I0->ptr(), dst_width_in_bytes, frame0.ptr(), + + ncvAssertCUDAReturn( cudaMemcpy2DAsync(pI0->ptr(), dst_width_in_bytes, frame0.ptr(), src_pitch_in_bytes, src_width_in_bytes, kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); - ncvAssertCUDAReturn( cudaMemcpy2DAsync(I1->ptr(), dst_width_in_bytes, frame1.ptr(), + ncvAssertCUDAReturn( cudaMemcpy2DAsync(pI1->ptr(), dst_width_in_bytes, frame1.ptr(), src_pitch_in_bytes, src_width_in_bytes, kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); } - //prepare pyramid - img0_pyramid.push_back(I0); - img1_pyramid.push_back(I1); + FloatVector* I0 = pI0.release(); + FloatVector* I1 = pI1.release(); - w_pyramid.push_back(kSourceWidth); - h_pyramid.push_back(kSourceHeight); + //prepare pyramid + pyr.img0.push_back(I0); + pyr.img1.push_back(I1); + + pyr.w.push_back(kSourceWidth); + pyr.h.push_back(kSourceHeight); scale *= scale_factor; Ncv32u prev_level_width = kSourceWidth; Ncv32u prev_level_height = kSourceHeight; - while((prev_level_width > 15) && (prev_level_height > 15) && (static_cast(img0_pyramid.size()) < desc.number_of_outer_iterations)) + while((prev_level_width > 15) && (prev_level_height > 15) && (static_cast(pyr.img0.size()) < desc.number_of_outer_iterations)) { //current resolution Ncv32u level_width = static_cast(ceilf(kSourceWidth * scale)); @@ -897,16 +860,16 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, Ncv32u prev_level_pitch = alignUp(prev_level_width, kStrideAlignmentFloat) * sizeof(float); - shared_ptr level_frame0(new FloatVector(gpu_mem_allocator, buffer_size)); + std::auto_ptr level_frame0(new FloatVector(gpu_mem_allocator, buffer_size)); ncvAssertReturn(level_frame0->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - shared_ptr level_frame1(new FloatVector(gpu_mem_allocator, buffer_size)); + std::auto_ptr level_frame1(new FloatVector(gpu_mem_allocator, buffer_size)); ncvAssertReturn(level_frame1->isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); - if (!kSkipProcessing) { + ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); + NcvSize32u srcSize (prev_level_width, prev_level_height); NcvSize32u dstSize (level_width, level_height); NcvRect32u srcROI (0, 0, prev_level_width, prev_level_height); @@ -921,20 +884,20 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, level_frame1->ptr(), dstSize, level_width_aligned * sizeof (float), dstROI, scale_factor, scale_factor, nppStSupersample); } - //store pointers - img0_pyramid.push_back(level_frame0); - img1_pyramid.push_back(level_frame1); + I0 = level_frame0.release(); + I1 = level_frame1.release(); - w_pyramid.push_back(level_width); - h_pyramid.push_back(level_height); + //store pointers + pyr.img0.push_back(I0); + pyr.img1.push_back(I1); + + pyr.w.push_back(level_width); + pyr.h.push_back(level_height); scale *= scale_factor; prev_level_width = level_width; prev_level_height = level_height; - - I0 = level_frame0; - I1 = level_frame1; } if (!kSkipProcessing) @@ -944,62 +907,56 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, ncvAssertCUDAReturn(cudaMemsetAsync(v.ptr(), 0, kSizeInPixelsAligned * sizeof(float), stream), NCV_CUDA_ERROR); //select images with lowest resolution - size_t pitch = alignUp(w_pyramid.back(), kStrideAlignmentFloat) * sizeof(float); - ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I0, img0_pyramid.back()->ptr(), channel_desc, w_pyramid.back(), h_pyramid.back(), pitch), NCV_CUDA_ERROR); - ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I1, img1_pyramid.back()->ptr(), channel_desc, w_pyramid.back(), h_pyramid.back(), pitch), NCV_CUDA_ERROR); + size_t pitch = alignUp(pyr.w.back(), kStrideAlignmentFloat) * sizeof(float); + ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I0, pyr.img0.back()->ptr(), channel_desc, pyr.w.back(), pyr.h.back(), pitch), NCV_CUDA_ERROR); + ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I1, pyr.img1.back()->ptr(), channel_desc, pyr.w.back(), pyr.h.back(), pitch), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); - } - FloatVector* ptrU = &u; - FloatVector* ptrV = &v; - FloatVector* ptrUNew = &u_new; - FloatVector* ptrVNew = &v_new; + FloatVector* ptrU = &u; + FloatVector* ptrV = &v; + FloatVector* ptrUNew = &u_new; + FloatVector* ptrVNew = &v_new; - std::vector< shared_ptr >::const_reverse_iterator img0Iter = img0_pyramid.rbegin(); - std::vector< shared_ptr >::const_reverse_iterator img1Iter = img1_pyramid.rbegin(); - //outer loop - //warping fixed point iteration - while(!w_pyramid.empty()) - { - //current grid dimensions - const Ncv32u kLevelWidth = w_pyramid.back(); - const Ncv32u kLevelHeight = h_pyramid.back(); - const Ncv32u kLevelStride = alignUp(kLevelWidth, kStrideAlignmentFloat); + std::vector::const_reverse_iterator img0Iter = pyr.img0.rbegin(); + std::vector::const_reverse_iterator img1Iter = pyr.img1.rbegin(); - //size of current image in bytes - const int kLevelSizeInBytes = kLevelStride * kLevelHeight * sizeof(float); - - //number of points at current resolution - const int kLevelSizeInPixels = kLevelStride * kLevelHeight; - - if (!kSkipProcessing) + //outer loop + //warping fixed point iteration + while(!pyr.w.empty()) { + //current grid dimensions + const Ncv32u kLevelWidth = pyr.w.back(); + const Ncv32u kLevelHeight = pyr.h.back(); + const Ncv32u kLevelStride = alignUp(kLevelWidth, kStrideAlignmentFloat); + + //size of current image in bytes + const int kLevelSizeInBytes = kLevelStride * kLevelHeight * sizeof(float); + + //number of points at current resolution + const int kLevelSizeInPixels = kLevelStride * kLevelHeight; + //initial guess for du and dv ncvAssertCUDAReturn(cudaMemsetAsync(du.ptr(), 0, kLevelSizeInBytes, stream), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaMemsetAsync(dv.ptr(), 0, kLevelSizeInBytes, stream), NCV_CUDA_ERROR); - } - //texture format descriptor - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); + //texture format descriptor + cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - I0 = *img0Iter; - I1 = *img1Iter; + I0 = *img0Iter; + I1 = *img1Iter; - ++img0Iter; - ++img1Iter; + ++img0Iter; + ++img1Iter; - if (!kSkipProcessing) - { ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I0, I0->ptr(), channel_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I1, I1->ptr(), channel_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR); - } - //compute derivatives - dim3 dBlocks(iDivUp(kLevelWidth, 32), iDivUp(kLevelHeight, 6)); - dim3 dThreads(32, 6); - const int kPitchTex = kLevelStride * sizeof(float); - if (!kSkipProcessing) - { + //compute derivatives + dim3 dBlocks(iDivUp(kLevelWidth, 32), iDivUp(kLevelHeight, 6)); + dim3 dThreads(32, 6); + + const int kPitchTex = kLevelStride * sizeof(float); + NcvSize32u srcSize(kLevelWidth, kLevelHeight); Ncv32u nSrcStep = kLevelStride * sizeof(float); NcvRect32u oROI(0, 0, kLevelWidth, kLevelHeight); @@ -1031,10 +988,7 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, // Ixy nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI, nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f); - } - - if (!kSkipProcessing) - { + ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix0, Ix0.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR); @@ -1049,23 +1003,19 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, // flow increments ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); - } + dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT)); + dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT); - dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT)); - dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT); + dim3 sor_blocks(iDivUp(kLevelWidth, SOR_TILE_WIDTH), iDivUp(kLevelHeight, SOR_TILE_HEIGHT)); + dim3 sor_threads(SOR_TILE_WIDTH, SOR_TILE_HEIGHT); - dim3 sor_blocks(iDivUp(kLevelWidth, SOR_TILE_WIDTH), iDivUp(kLevelHeight, SOR_TILE_HEIGHT)); - dim3 sor_threads(SOR_TILE_WIDTH, SOR_TILE_HEIGHT); - - // inner loop - // lagged nonlinearity fixed point iteration - ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); - for (Ncv32u current_inner_iteration = 0; current_inner_iteration < desc.number_of_inner_iterations; ++current_inner_iteration) - { - //compute coefficients - if (!kSkipProcessing) + // inner loop + // lagged nonlinearity fixed point iteration + ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); + for (Ncv32u current_inner_iteration = 0; current_inner_iteration < desc.number_of_inner_iterations; ++current_inner_iteration) { + //compute coefficients prepare_sor_stage_1_tex<<>> (diffusivity_x.ptr(), diffusivity_y.ptr(), @@ -1101,13 +1051,12 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_u, denom_u.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_v, denom_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); - } - //solve linear system - for (Ncv32u solver_iteration = 0; solver_iteration < desc.number_of_solver_iterations; ++solver_iteration) - { - float omega = 1.99f; - if (!kSkipProcessing) + + //solve linear system + for (Ncv32u solver_iteration = 0; solver_iteration < desc.number_of_solver_iterations; ++solver_iteration) { + float omega = 1.99f; + ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); @@ -1139,33 +1088,29 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, kLevelWidth, kLevelHeight, kLevelStride); - } - ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); - ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); - }//end of solver loop - }// end of inner loop + ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); + ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR); + }//end of solver loop + }// end of inner loop - //update u and v - if (!kSkipProcessing) - { + //update u and v add(ptrU->ptr(), du.ptr(), kLevelSizeInPixels, stream); add(ptrV->ptr(), dv.ptr(), kLevelSizeInPixels, stream); - } - //prolongate using texture - w_pyramid.pop_back(); - h_pyramid.pop_back(); - if (!w_pyramid.empty()) - { - //compute new image size - Ncv32u nw = w_pyramid.back(); - Ncv32u nh = h_pyramid.back(); - Ncv32u ns = alignUp(nw, kStrideAlignmentFloat); - dim3 p_blocks(iDivUp(nw, 32), iDivUp(nh, 8)); - dim3 p_threads(32, 8); - if (!kSkipProcessing) + //prolongate using texture + pyr.w.pop_back(); + pyr.h.pop_back(); + if (!pyr.w.empty()) { + //compute new image size + Ncv32u nw = pyr.w.back(); + Ncv32u nh = pyr.h.back(); + Ncv32u ns = alignUp(nw, kStrideAlignmentFloat); + + dim3 p_blocks(iDivUp(nw, 32), iDivUp(nh, 8)); + dim3 p_threads(32, 8); + NcvSize32u srcSize (kLevelWidth, kLevelHeight); NcvSize32u dstSize (nw, nh); NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight); @@ -1180,27 +1125,27 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic); ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream); + + cv::gpu::device::swap(ptrU, ptrUNew); + cv::gpu::device::swap(ptrV, ptrVNew); } - - cv::gpu::device::swap(ptrU, ptrUNew); - cv::gpu::device::swap(ptrV, ptrVNew); + scale /= scale_factor; } - scale /= scale_factor; + + // end of warping iterations + ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); + + ncvAssertCUDAReturn( cudaMemcpy2DAsync + (uOut.ptr(), uOut.pitch(), ptrU->ptr(), + kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); + + ncvAssertCUDAReturn( cudaMemcpy2DAsync + (vOut.ptr(), vOut.pitch(), ptrV->ptr(), + kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); + + ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR); + ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); } - // end of warping iterations - ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn( cudaMemcpy2DAsync - (uOut.ptr(), uOut.pitch(), ptrU->ptr(), - kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); - - ncvAssertCUDAReturn( cudaMemcpy2DAsync - (vOut.ptr(), vOut.pitch(), ptrV->ptr(), - kSourcePitch, kSourceWidth*sizeof(float), kSourceHeight, cudaMemcpyDeviceToDevice, stream), NCV_CUDA_ERROR ); - - ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR); - ncvAssertCUDAReturn(cudaStreamSynchronize(stream), NCV_CUDA_ERROR); - return NCV_SUCCESS; } diff --git a/modules/gpu/src/optical_flow.cpp b/modules/gpu/src/optical_flow.cpp new file mode 100644 index 000000000..7891ef554 --- /dev/null +++ b/modules/gpu/src/optical_flow.cpp @@ -0,0 +1,198 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; +using namespace std; + +#if !defined (HAVE_CUDA) + +void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } + +#else + +namespace +{ + size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix& frame0, const NCVMatrix& frame1, + NCVMatrix& u, NCVMatrix& v, const cudaDeviceProp& devProp) + { + NCVMemStackAllocator gpuCounter(static_cast(devProp.textureAlignment)); + CV_Assert(gpuCounter.isInitialized()); + + NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0); + CV_Assert(ncvStat == NCV_SUCCESS); + + return gpuCounter.maxSize(); + } +} + +namespace +{ + void outputHandler(const char* msg) + { + CV_Error(CV_GpuApiCallError, msg); + } +} + +void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s) +{ + ncvSetDebugOutputHandler(outputHandler); + + CV_Assert(frame0.type() == CV_32FC1); + CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type()); + + u.create(frame0.size(), CV_32FC1); + v.create(frame0.size(), CV_32FC1); + + cudaDeviceProp devProp; + cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) ); + + NCVBroxOpticalFlowDescriptor desc; + + desc.alpha = alpha; + desc.gamma = gamma; + desc.scale_factor = scale_factor; + desc.number_of_inner_iterations = inner_iterations; + desc.number_of_outer_iterations = outer_iterations; + desc.number_of_solver_iterations = solver_iterations; + + NCVMemSegment frame0MemSeg; + frame0MemSeg.begin.memtype = NCVMemoryTypeDevice; + frame0MemSeg.begin.ptr = const_cast(frame0.data); + frame0MemSeg.size = frame0.step * frame0.rows; + + NCVMemSegment frame1MemSeg; + frame1MemSeg.begin.memtype = NCVMemoryTypeDevice; + frame1MemSeg.begin.ptr = const_cast(frame1.data); + frame1MemSeg.size = frame1.step * frame1.rows; + + NCVMemSegment uMemSeg; + uMemSeg.begin.memtype = NCVMemoryTypeDevice; + uMemSeg.begin.ptr = u.ptr(); + uMemSeg.size = u.step * u.rows; + + NCVMemSegment vMemSeg; + vMemSeg.begin.memtype = NCVMemoryTypeDevice; + vMemSeg.begin.ptr = v.ptr(); + vMemSeg.size = v.step * v.rows; + + NCVMatrixReuse frame0Mat(frame0MemSeg, devProp.textureAlignment, frame0.cols, frame0.rows, frame0.step); + NCVMatrixReuse frame1Mat(frame1MemSeg, devProp.textureAlignment, frame1.cols, frame1.rows, frame1.step); + NCVMatrixReuse uMat(uMemSeg, devProp.textureAlignment, u.cols, u.rows, u.step); + NCVMatrixReuse vMat(vMemSeg, devProp.textureAlignment, v.cols, v.rows, v.step); + + cudaStream_t stream = StreamAccessor::getStream(s); + + size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp); + + ensureSizeIsEnough(1, bufSize, CV_8UC1, buf); + + NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast(devProp.textureAlignment), buf.ptr()); + CV_Assert(gpuAllocator.isInitialized()); + + NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream); + CV_Assert(ncvStat == NCV_SUCCESS); +} + +void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, + float pos, GpuMat& newFrame, GpuMat& buf, Stream& s) +{ + CV_Assert(frame0.type() == CV_32FC1); + CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type()); + CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type()); + CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type()); + CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type()); + CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type()); + + newFrame.create(frame0.size(), frame0.type()); + + buf.create(6 * frame0.rows, frame0.cols, CV_32FC1); + buf.setTo(Scalar::all(0)); + + // occlusion masks + GpuMat occ0 = buf.rowRange(0 * frame0.rows, 1 * frame0.rows); + GpuMat occ1 = buf.rowRange(1 * frame0.rows, 2 * frame0.rows); + + // interpolated forward flow + GpuMat fui = buf.rowRange(2 * frame0.rows, 3 * frame0.rows); + GpuMat fvi = buf.rowRange(3 * frame0.rows, 4 * frame0.rows); + + // interpolated backward flow + GpuMat bui = buf.rowRange(4 * frame0.rows, 5 * frame0.rows); + GpuMat bvi = buf.rowRange(5 * frame0.rows, 6 * frame0.rows); + + size_t step = frame0.step; + + CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step); + + cudaStream_t stream = StreamAccessor::getStream(s); + NppStStreamHandler h(stream); + + NppStInterpolationState state; + + state.size = NcvSize32u(frame0.cols, frame0.rows); + state.nStep = static_cast(step); + state.pSrcFrame0 = const_cast(frame0.ptr()); + state.pSrcFrame1 = const_cast(frame1.ptr()); + state.pFU = const_cast(fu.ptr()); + state.pFV = const_cast(fv.ptr()); + state.pBU = const_cast(bu.ptr()); + state.pBV = const_cast(bv.ptr()); + state.pos = pos; + state.pNewFrame = newFrame.ptr(); + state.ppBuffers[0] = occ0.ptr(); + state.ppBuffers[1] = occ1.ptr(); + state.ppBuffers[2] = fui.ptr(); + state.ppBuffers[3] = fvi.ptr(); + state.ppBuffers[4] = bui.ptr(); + state.ppBuffers[5] = bvi.ptr(); + + nppSafeCall( nppiStInterpolateFrames(&state) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +} + +#endif /* HAVE_CUDA */ diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index ed331ccc9..ea5259b07 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -76,6 +76,7 @@ #include "nvidia/core/NCV.hpp" #include "nvidia/NPP_staging/NPP_staging.hpp" #include "nvidia/NCVHaarObjectDetection.hpp" + #include "nvidia/NCVBroxOpticalFlow.hpp" #define CUDART_MINIMUM_REQUIRED_VERSION 4000 #define NPP_MINIMUM_REQUIRED_VERSION 4000 diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp new file mode 100644 index 000000000..76d8aebd0 --- /dev/null +++ b/modules/gpu/test/test_video.cpp @@ -0,0 +1,227 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_CUDA + +//#define DUMP + +struct BroxOpticalFlow : testing::TestWithParam< cv::gpu::DeviceInfo > +{ + cv::gpu::DeviceInfo devInfo; + + cv::Mat frame0; + cv::Mat frame1; + + cv::Mat u_gold; + cv::Mat v_gold; + + virtual void SetUp() + { + devInfo = GetParam(); + + cv::gpu::setDevice(devInfo.deviceID()); + + frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + frame0.convertTo(frame0, CV_32F, 1.0 / 255.0); + + frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + frame1.convertTo(frame1, CV_32F, 1.0 / 255.0); + +#ifndef DUMP + + std::ifstream f((std::string(cvtest::TS::ptr()->get_data_path()) + "opticalflow/opticalflow_gold.bin").c_str(), std::ios_base::binary); + + int rows, cols; + + f.read((char*)&rows, sizeof(rows)); + f.read((char*)&cols, sizeof(cols)); + + u_gold.create(rows, cols, CV_32FC1); + + for (int i = 0; i < u_gold.rows; ++i) + f.read((char*)u_gold.ptr(i), u_gold.cols * sizeof(float)); + + v_gold.create(rows, cols, CV_32FC1); + + for (int i = 0; i < v_gold.rows; ++i) + f.read((char*)v_gold.ptr(i), v_gold.cols * sizeof(float)); + +#endif + } +}; + +TEST_P(BroxOpticalFlow, Regression) +{ + PRINT_PARAM(devInfo); + + cv::Mat u; + cv::Mat v; + + cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + + ASSERT_NO_THROW( + cv::gpu::GpuMat d_u; + cv::gpu::GpuMat d_v; + d_flow(cv::gpu::GpuMat(frame0), cv::gpu::GpuMat(frame1), d_u, d_v); + d_u.download(u); + d_v.download(v); + d_flow.buf.release(); + ); + +#ifndef DUMP + + EXPECT_MAT_NEAR(u_gold, u, 0); + EXPECT_MAT_NEAR(v_gold, v, 0); + +#else + + std::ofstream f((std::string(cvtest::TS::ptr()->get_data_path()) + "opticalflow/opticalflow_gold.bin").c_str(), std::ios_base::binary); + + f.write((char*)&u.rows, sizeof(u.rows)); + f.write((char*)&u.cols, sizeof(u.cols)); + + for (int i = 0; i < u.rows; ++i) + f.write((char*)u.ptr(i), u.cols * sizeof(float)); + + for (int i = 0; i < v.rows; ++i) + f.write((char*)v.ptr(i), v.cols * sizeof(float)); + +#endif +} + +INSTANTIATE_TEST_CASE_P(Video, BroxOpticalFlow, testing::ValuesIn(devices())); + + + + + + +struct InterpolateFrames : testing::TestWithParam< cv::gpu::DeviceInfo > +{ + cv::gpu::DeviceInfo devInfo; + + cv::Mat frame0; + cv::Mat frame1; + + cv::Mat newFrame_gold; + + virtual void SetUp() + { + devInfo = GetParam(); + + cv::gpu::setDevice(devInfo.deviceID()); + + frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + frame0.convertTo(frame0, CV_32F, 1.0 / 255.0); + + frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + frame1.convertTo(frame1, CV_32F, 1.0 / 255.0); + +#ifndef DUMP + + std::ifstream f((std::string(cvtest::TS::ptr()->get_data_path()) + "opticalflow/interpolate_frames_gold.bin").c_str(), std::ios_base::binary); + + int rows, cols; + + f.read((char*)&rows, sizeof(rows)); + f.read((char*)&cols, sizeof(cols)); + + newFrame_gold.create(rows, cols, CV_32FC1); + + for (int i = 0; i < newFrame_gold.rows; ++i) + f.read((char*)newFrame_gold.ptr(i), newFrame_gold.cols * sizeof(float)); + +#endif + } +}; + +TEST_P(InterpolateFrames, Regression) +{ + PRINT_PARAM(devInfo); + + cv::Mat newFrame; + + cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + + ASSERT_NO_THROW( + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_fu; + cv::gpu::GpuMat d_fv; + cv::gpu::GpuMat d_bu; + cv::gpu::GpuMat d_bv; + cv::gpu::GpuMat d_newFrame; + cv::gpu::GpuMat d_buf; + d_flow(d_frame0, d_frame1, d_fu, d_fv); + d_flow(d_frame1, d_frame0, d_bu, d_bv); + cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, d_newFrame, d_buf); + d_newFrame.download(newFrame); + d_flow.buf.release(); + ); + +#ifndef DUMP + + EXPECT_MAT_NEAR(newFrame_gold, newFrame, 1e-4); + +#else + + std::ofstream f((std::string(cvtest::TS::ptr()->get_data_path()) + "opticalflow/interpolate_frames_gold.bin").c_str(), std::ios_base::binary); + + f.write((char*)&newFrame.rows, sizeof(newFrame.rows)); + f.write((char*)&newFrame.cols, sizeof(newFrame.cols)); + + for (int i = 0; i < newFrame.rows; ++i) + f.write((char*)newFrame.ptr(i), newFrame.cols * sizeof(float)); + +#endif +} + +INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, testing::ValuesIn(devices())); + +#endif diff --git a/samples/gpu/optical_flow.cpp b/samples/gpu/optical_flow.cpp new file mode 100644 index 000000000..d92219bbe --- /dev/null +++ b/samples/gpu/optical_flow.cpp @@ -0,0 +1,340 @@ +#include +#include +#include + +#include "cvconfig.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/gpu/gpu.hpp" + +#ifdef HAVE_CUDA +#include "NPP_staging/NPP_staging.hpp" +#endif + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined(HAVE_CUDA) + +int main(int argc, const char* argv[]) +{ + cout << "Please compile the library with CUDA support" << endl; + return -1; +} + +#else + +#define PARAM_INPUT "--input" +#define PARAM_SCALE "--scale" +#define PARAM_ALPHA "--alpha" +#define PARAM_GAMMA "--gamma" +#define PARAM_INNER "--inner" +#define PARAM_OUTER "--outer" +#define PARAM_SOLVER "--solver" +#define PARAM_TIME_STEP "--time-step" +#define PARAM_HELP "--help" + +void printHelp() +{ + cout << "Usage help:\n"; + cout << setiosflags(ios::left); + cout << "\t" << setw(15) << PARAM_ALPHA << " - set alpha\n"; + cout << "\t" << setw(15) << PARAM_GAMMA << " - set gamma\n"; + cout << "\t" << setw(15) << PARAM_INNER << " - set number of inner iterations\n"; + cout << "\t" << setw(15) << PARAM_INPUT << " - specify input file names (2 image files)\n"; + cout << "\t" << setw(15) << PARAM_OUTER << " - set number of outer iterations\n"; + cout << "\t" << setw(15) << PARAM_SCALE << " - set pyramid scale factor\n"; + cout << "\t" << setw(15) << PARAM_SOLVER << " - set number of basic solver iterations\n"; + cout << "\t" << setw(15) << PARAM_TIME_STEP << " - set frame interpolation time step\n"; + cout << "\t" << setw(15) << PARAM_HELP << " - display this help message\n"; +} + +int processCommandLine(int argc, const char* argv[], float& timeStep, string& frame0Name, string& frame1Name, BroxOpticalFlow& flow) +{ + timeStep = 0.25f; + + for (int iarg = 1; iarg < argc; ++iarg) + { + if (strcmp(argv[iarg], PARAM_INPUT) == 0) + { + if (iarg + 2 < argc) + { + frame0Name = argv[++iarg]; + frame1Name = argv[++iarg]; + } + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_SCALE) == 0) + { + if (iarg + 1 < argc) + flow.scale_factor = static_cast(atof(argv[++iarg])); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_ALPHA) == 0) + { + if (iarg + 1 < argc) + flow.alpha = static_cast(atof(argv[++iarg])); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_GAMMA) == 0) + { + if (iarg + 1 < argc) + flow.gamma = static_cast(atof(argv[++iarg])); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_INNER) == 0) + { + if (iarg + 1 < argc) + flow.inner_iterations = atoi(argv[++iarg]); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_OUTER) == 0) + { + if (iarg + 1 < argc) + flow.outer_iterations = atoi(argv[++iarg]); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_SOLVER) == 0) + { + if (iarg + 1 < argc) + flow.solver_iterations = atoi(argv[++iarg]); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_TIME_STEP) == 0) + { + if (iarg + 1 < argc) + timeStep = static_cast(atof(argv[++iarg])); + else + return -1; + } + else if(strcmp(argv[iarg], PARAM_HELP) == 0) + { + printHelp(); + return 0; + } + } + return 0; +} + +template inline T clamp (T x, T a, T b) +{ + return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); +} + +template inline T mapValue(T x, T a, T b, T c, T d) +{ + x = clamp(x, a, b); + return c + (d - c) * (x - a) / (b - a); +} + +void getFlowField(const Mat& u, const Mat& v, Mat& flowField) +{ + float maxDisplacement = 1.0f; + + for (int i = 0; i < u.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + for (int j = 0; j < u.cols; ++j) + { + float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); + + if (d > maxDisplacement) + maxDisplacement = d; + } + } + + flowField.create(u.size(), CV_8UC4); + + for (int i = 0; i < flowField.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + Vec4b* row = flowField.ptr(i); + + for (int j = 0; j < flowField.cols; ++j) + { + row[j][0] = 0; + row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][3] = 255; + } + } +} + +int main(int argc, const char* argv[]) +{ + string frame0Name, frame1Name; + float timeStep = 0.01f; + + BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + + int result = processCommandLine(argc, argv, timeStep, frame0Name, frame1Name, d_flow); + if (argc == 1 || result) + { + printHelp(); + return result; + } + + if (frame0Name.empty() || frame1Name.empty()) + { + cout << "Missing input file names\n"; + return -1; + } + + Mat frame0Color = imread(frame0Name); + Mat frame1Color = imread(frame1Name); + + if (frame0Color.empty() || frame1Color.empty()) + { + cout << "Can't load input images\n"; + return -1; + } + + cout << "OpenCV / NVIDIA Computer Vision\n"; + cout << "Optical Flow Demo: Frame Interpolation\n"; + cout << "=========================================\n"; + cout << "Press:\n ESC to quit\n 'a' to move to the previous frame\n 's' to move to the next frame\n"; + + frame0Color.convertTo(frame0Color, CV_32F, 1.0 / 255.0); + frame1Color.convertTo(frame1Color, CV_32F, 1.0 / 255.0); + + Mat frame0Gray, frame1Gray; + + cvtColor(frame0Color, frame0Gray, COLOR_BGR2GRAY); + cvtColor(frame1Color, frame1Gray, COLOR_BGR2GRAY); + + GpuMat d_frame0(frame0Gray); + GpuMat d_frame1(frame1Gray); + + Mat fu, fv; + Mat bu, bv; + + GpuMat d_fu, d_fv; + GpuMat d_bu, d_bv; + + cout << "Estimating optical flow\nForward...\n"; + + d_flow(d_frame0, d_frame1, d_fu, d_fv); + d_flow(d_frame1, d_frame0, d_bu, d_bv); + + d_fu.download(fu); + d_fv.download(fv); + + d_bu.download(bu); + d_bv.download(bv); + + // first frame color components (GPU memory) + GpuMat d_b, d_g, d_r; + + // second frame color components (GPU memory) + GpuMat d_bt, d_gt, d_rt; + + // prepare color components on host and copy them to device memory + Mat channels[3]; + + cv::split(frame0Color, channels); + + d_b.upload(channels[0]); + d_g.upload(channels[1]); + d_r.upload(channels[2]); + + cv::split(frame1Color, channels); + + d_bt.upload(channels[0]); + d_gt.upload(channels[1]); + d_rt.upload(channels[2]); + + cout << "Interpolating...\n"; + cout.precision (4); + + // temporary buffer + GpuMat d_buf; + + // intermediate frame color components (GPU memory) + GpuMat d_rNew, d_gNew, d_bNew; + + GpuMat d_newFrame; + + vector frames; + frames.reserve(1.0f / timeStep + 2); + + frames.push_back(frame0Color); + + // compute interpolated frames + for (float timePos = timeStep; timePos < 1.0f; timePos += timeStep) + { + // interpolate blue channel + interpolateFrames(d_b, d_bt, d_fu, d_fv, d_bu, d_bv, timePos, d_bNew, d_buf); + // interpolate green channel + interpolateFrames(d_g, d_gt, d_fu, d_fv, d_bu, d_bv, timePos, d_gNew, d_buf); + // interpolate red channel + interpolateFrames(d_r, d_rt, d_fu, d_fv, d_bu, d_bv, timePos, d_rNew, d_buf); + + GpuMat channels[] = {d_bNew, d_gNew, d_rNew}; + merge(channels, 3, d_newFrame); + + Mat newFrame; + d_newFrame.download(newFrame); + + frames.push_back(newFrame); + + cout << timePos * 100.0f << "%\r"; + } + cout << setw (5) << "100%\n"; + + frames.push_back(frame1Color); + + int currentFrame; + currentFrame = 0; + + Mat flowFieldForward; + Mat flowFieldBackward; + + getFlowField(fu, fv, flowFieldForward); + getFlowField(bu, bv, flowFieldBackward); + + imshow("Forward flow", flowFieldForward); + imshow("Backward flow", flowFieldBackward); + + imshow("Interpolated frame", frames[currentFrame]); + + bool qPressed = false; + while (!qPressed) + { + int key = toupper(waitKey(10)); + switch (key) + { + case 27: + qPressed = true; + break; + case 'A': + if (currentFrame > 0) + --currentFrame; + + imshow("Interpolated frame", frames[currentFrame]); + break; + case 'S': + if (currentFrame < frames.size() - 1) + ++currentFrame; + + imshow("Interpolated frame", frames[currentFrame]); + break; + } + } + + return 0; +} + +#endif