move obsolete algorithms from cudaoptflow to cudalegacy

2014-12-31 15:35:23 +03:00
parent c4b2058d23
commit 19c6bbe7d9
11 changed files with 49 additions and 311 deletions
--- a/modules/cudaoptflow/src/bm.cpp
+++ b/modules/cudaoptflow/src/bm.cpp
@@ -1,204 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::cuda;
-
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-
-void cv::cuda::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
-#else // HAVE_CUDA
-
-namespace optflowbm
-{
-    void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
-              int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream);
-}
-
-void cv::cuda::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st)
-{
-    CV_Assert( prev.type() == CV_8UC1 );
-    CV_Assert( curr.size() == prev.size() && curr.type() == prev.type() );
-
-    const Size velSize((prev.cols - blockSize.width + shiftSize.width) / shiftSize.width,
-                       (prev.rows - blockSize.height + shiftSize.height) / shiftSize.height);
-
-    velx.create(velSize, CV_32FC1);
-    vely.create(velSize, CV_32FC1);
-
-    // scanning scheme coordinates
-    std::vector<short2> ss((2 * maxRange.width + 1) * (2 * maxRange.height + 1));
-    int ssCount = 0;
-
-    // Calculate scanning scheme
-    const int minCount = std::min(maxRange.width, maxRange.height);
-
-    // use spiral search pattern
-    //
-    //     9 10 11 12
-    //     8  1  2 13
-    //     7  *  3 14
-    //     6  5  4 15
-    //... 20 19 18 17
-    //
-
-    for (int i = 0; i < minCount; ++i)
-    {
-        // four cycles along sides
-        int x = -i - 1, y = x;
-
-        // upper side
-        for (int j = -i; j <= i + 1; ++j, ++ssCount)
-        {
-            ss[ssCount].x = (short) ++x;
-            ss[ssCount].y = (short) y;
-        }
-
-        // right side
-        for (int j = -i; j <= i + 1; ++j, ++ssCount)
-        {
-            ss[ssCount].x = (short) x;
-            ss[ssCount].y = (short) ++y;
-        }
-
-        // bottom side
-        for (int j = -i; j <= i + 1; ++j, ++ssCount)
-        {
-            ss[ssCount].x = (short) --x;
-            ss[ssCount].y = (short) y;
-        }
-
-        // left side
-        for (int j = -i; j <= i + 1; ++j, ++ssCount)
-        {
-            ss[ssCount].x = (short) x;
-            ss[ssCount].y = (short) --y;
-        }
-    }
-
-    // the rest part
-    if (maxRange.width < maxRange.height)
-    {
-        const int xleft = -minCount;
-
-        // cycle by neighbor rings
-        for (int i = minCount; i < maxRange.height; ++i)
-        {
-            // two cycles by x
-            int y = -(i + 1);
-            int x = xleft;
-
-            // upper side
-            for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
-            {
-                ss[ssCount].x = (short) x;
-                ss[ssCount].y = (short) y;
-            }
-
-            x = xleft;
-            y = -y;
-
-            // bottom side
-            for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
-            {
-                ss[ssCount].x = (short) x;
-                ss[ssCount].y = (short) y;
-            }
-        }
-    }
-    else if (maxRange.width > maxRange.height)
-    {
-        const int yupper = -minCount;
-
-        // cycle by neighbor rings
-        for (int i = minCount; i < maxRange.width; ++i)
-        {
-            // two cycles by y
-            int x = -(i + 1);
-            int y = yupper;
-
-            // left side
-            for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
-            {
-                ss[ssCount].x = (short) x;
-                ss[ssCount].y = (short) y;
-            }
-
-            y = yupper;
-            x = -x;
-
-            // right side
-            for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
-            {
-                ss[ssCount].x = (short) x;
-                ss[ssCount].y = (short) y;
-            }
-        }
-    }
-
-    const cudaStream_t stream = StreamAccessor::getStream(st);
-
-    ensureSizeIsEnough(1, ssCount, CV_16SC2, buf);
-    if (stream == 0)
-        cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) );
-    else
-        cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) );
-
-    const int maxX = prev.cols - blockSize.width;
-    const int maxY = prev.rows - blockSize.height;
-
-    const int SMALL_DIFF = 2;
-    const int BIG_DIFF = 128;
-
-    const int blSize = blockSize.area();
-    const int acceptLevel = blSize * SMALL_DIFF;
-    const int escapeLevel = blSize * BIG_DIFF;
-
-    optflowbm::calc(prev, curr, velx, vely,
-                    make_int2(blockSize.width, blockSize.height), make_int2(shiftSize.width, shiftSize.height), usePrevious,
-                    maxX, maxY, acceptLevel, escapeLevel, buf.ptr<short2>(), ssCount, stream);
-}
-
-#endif // HAVE_CUDA
--- a/modules/cudaoptflow/src/bm_fast.cpp
+++ b/modules/cudaoptflow/src/bm_fast.cpp
@@ -1,90 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::cuda;
-
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-
-void cv::cuda::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
-
-#else // HAVE_CUDA
-
-namespace optflowbm_fast
-{
-    void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows);
-
-    template <typename T>
-    void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
-}
-
-void cv::cuda::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
-{
-    CV_Assert( I0.type() == CV_8UC1 );
-    CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() );
-
-    int border_size = search_window / 2 + block_window / 2;
-    Size esize = I0.size() + Size(border_size, border_size) * 2;
-
-    ensureSizeIsEnough(esize, I0.type(), extended_I0);
-    ensureSizeIsEnough(esize, I0.type(), extended_I1);
-
-    cuda::copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
-    cuda::copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
-
-    GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size()));
-    GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size()));
-
-    int bcols, brows;
-    optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows);
-
-    ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer);
-
-    flowx.create(I0.size(), CV_32FC1);
-    flowy.create(I0.size(), CV_32FC1);
-
-    optflowbm_fast::calc<uchar>(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream));
-}
-
-#endif // HAVE_CUDA
--- a/modules/cudaoptflow/src/cuda/bm.cu
+++ b/modules/cudaoptflow/src/cuda/bm.cu
@@ -1,169 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/reduce.hpp"
-
-using namespace cv::cuda;
-using namespace cv::cuda::device;
-
-namespace optflowbm
-{
-    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
-    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
-
-    __device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize)
-    {
-        int s = 0;
-
-        for (int y = 0; y < blockSize.y; ++y)
-        {
-            for (int x = 0; x < blockSize.x; ++x)
-                s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y));
-        }
-
-        return s;
-    }
-
-    __global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious,
-                                  const int maxX, const int maxY, const int acceptLevel, const int escapeLevel,
-                                  const short2* ss, const int ssCount)
-    {
-        const int j = blockIdx.x * blockDim.x + threadIdx.x;
-        const int i = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if (i >= velx.rows || j >= velx.cols)
-            return;
-
-        const int X1 = j * shiftSize.x;
-        const int Y1 = i * shiftSize.y;
-
-        const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0;
-        const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0;
-
-        int X2 = X1 + offX;
-        int Y2 = Y1 + offY;
-
-        int dist = numeric_limits<int>::max();
-
-        if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
-            dist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
-
-        int countMin = 1;
-        int sumx = offX;
-        int sumy = offY;
-
-        if (dist > acceptLevel)
-        {
-            // do brute-force search
-            for (int k = 0; k < ssCount; ++k)
-            {
-                const short2 ssVal = ss[k];
-
-                const int dx = offX + ssVal.x;
-                const int dy = offY + ssVal.y;
-
-                X2 = X1 + dx;
-                Y2 = Y1 + dy;
-
-                if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
-                {
-                    const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
-                    if (tmpDist < acceptLevel)
-                    {
-                        sumx = dx;
-                        sumy = dy;
-                        countMin = 1;
-                        break;
-                    }
-
-                    if (tmpDist < dist)
-                    {
-                        dist = tmpDist;
-                        sumx = dx;
-                        sumy = dy;
-                        countMin = 1;
-                    }
-                    else if (tmpDist == dist)
-                    {
-                        sumx += dx;
-                        sumy += dy;
-                        countMin++;
-                    }
-                }
-            }
-
-            if (dist > escapeLevel)
-            {
-                sumx = offX;
-                sumy = offY;
-                countMin = 1;
-            }
-        }
-
-        velx(i, j) = static_cast<float>(sumx) / countMin;
-        vely(i, j) = static_cast<float>(sumy) / countMin;
-    }
-
-    void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
-              int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
-    {
-        bindTexture(&tex_prev, prev);
-        bindTexture(&tex_curr, curr);
-
-        const dim3 block(32, 8);
-        const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y));
-
-        calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
-                                                  maxX, maxY, acceptLevel,  escapeLevel, ss, ssCount);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-}
-
-#endif // !defined CUDA_DISABLER
--- a/modules/cudaoptflow/src/cuda/bm_fast.cu
+++ b/modules/cudaoptflow/src/cuda/bm_fast.cu
@@ -1,295 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/reduce.hpp"
-
-using namespace cv::cuda;
-using namespace cv::cuda::device;
-
-namespace optflowbm_fast
-{
-    enum
-    {
-        CTA_SIZE = 128,
-
-        TILE_COLS = 128,
-        TILE_ROWS = 32,
-
-        STRIDE = CTA_SIZE
-    };
-
-    template <typename T> __device__ __forceinline__ int calcDist(T a, T b)
-    {
-        return ::abs(a - b);
-    }
-
-    template <class T> struct FastOptFlowBM
-    {
-
-        int search_radius;
-        int block_radius;
-
-        int search_window;
-        int block_window;
-
-        PtrStepSz<T> I0;
-        PtrStep<T> I1;
-
-        mutable PtrStepi buffer;
-
-        FastOptFlowBM(int search_window_, int block_window_,
-                      PtrStepSz<T> I0_, PtrStepSz<T> I1_,
-                      PtrStepi buffer_) :
-            search_radius(search_window_ / 2), block_radius(block_window_ / 2),
-            search_window(search_window_), block_window(block_window_),
-            I0(I0_), I1(I1_),
-            buffer(buffer_)
-        {
-        }
-
-        __device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
-        {
-            for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
-            {
-                dist_sums[index] = 0;
-
-                for (int tx = 0; tx < block_window; ++tx)
-                    col_sums(tx, index) = 0;
-
-                int y = index / search_window;
-                int x = index - y * search_window;
-
-                int ay = i;
-                int ax = j;
-
-                int by = i + y - search_radius;
-                int bx = j + x - search_radius;
-
-                for (int tx = -block_radius; tx <= block_radius; ++tx)
-                {
-                    int col_sum = 0;
-                    for (int ty = -block_radius; ty <= block_radius; ++ty)
-                    {
-                        int dist = calcDist(I0(ay + ty, ax + tx), I1(by + ty, bx + tx));
-
-                        dist_sums[index] += dist;
-                        col_sum += dist;
-                    }
-
-                    col_sums(tx + block_radius, index) = col_sum;
-                }
-
-                up_col_sums(j, index) = col_sums(block_window - 1, index);
-            }
-        }
-
-        __device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
-        {
-            for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
-            {
-                int y = index / search_window;
-                int x = index - y * search_window;
-
-                int ay = i;
-                int ax = j + block_radius;
-
-                int by = i + y - search_radius;
-                int bx = j + x - search_radius + block_radius;
-
-                int col_sum = 0;
-
-                for (int ty = -block_radius; ty <= block_radius; ++ty)
-                    col_sum += calcDist(I0(ay + ty, ax), I1(by + ty, bx));
-
-                dist_sums[index] += col_sum - col_sums(first, index);
-
-                col_sums(first, index) = col_sum;
-                up_col_sums(j, index) = col_sum;
-            }
-        }
-
-        __device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
-        {
-            int ay = i;
-            int ax = j + block_radius;
-
-            T a_up   = I0(ay - block_radius - 1, ax);
-            T a_down = I0(ay + block_radius, ax);
-
-            for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
-            {
-                int y = index / search_window;
-                int x = index - y * search_window;
-
-                int by = i + y - search_radius;
-                int bx = j + x - search_radius + block_radius;
-
-                T b_up   = I1(by - block_radius - 1, bx);
-                T b_down = I1(by + block_radius, bx);
-
-                int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
-
-                dist_sums[index] += col_sum  - col_sums(first, index);
-                col_sums(first, index) = col_sum;
-                up_col_sums(j, index) = col_sum;
-            }
-        }
-
-        __device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
-        {
-            int bestDist = numeric_limits<int>::max();
-            int bestInd = -1;
-
-            for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
-            {
-                int curDist = dist_sums[index];
-                if (curDist < bestDist)
-                {
-                    bestDist = curDist;
-                    bestInd = index;
-                }
-            }
-
-            __shared__ int cta_dist_buffer[CTA_SIZE];
-            __shared__ int cta_ind_buffer[CTA_SIZE];
-
-            reduceKeyVal<CTA_SIZE>(cta_dist_buffer, bestDist, cta_ind_buffer, bestInd, threadIdx.x, less<int>());
-
-            if (threadIdx.x == 0)
-            {
-                int y = bestInd / search_window;
-                int x = bestInd - y * search_window;
-
-                velx = x - search_radius;
-                vely = y - search_radius;
-            }
-        }
-
-        __device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const
-        {
-            int tbx = blockIdx.x * TILE_COLS;
-            int tby = blockIdx.y * TILE_ROWS;
-
-            int tex = ::min(tbx + TILE_COLS, I0.cols);
-            int tey = ::min(tby + TILE_ROWS, I0.rows);
-
-            PtrStepi col_sums;
-            col_sums.data = buffer.ptr(I0.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window;
-            col_sums.step = buffer.step;
-
-            PtrStepi up_col_sums;
-            up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window;
-            up_col_sums.step = buffer.step;
-
-            extern __shared__ int dist_sums[]; //search_window * search_window
-
-            int first = 0;
-
-            for (int i = tby; i < tey; ++i)
-            {
-                for (int j = tbx; j < tex; ++j)
-                {
-                    __syncthreads();
-
-                    if (j == tbx)
-                    {
-                        initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums);
-                        first = 0;
-                    }
-                    else
-                    {
-                        if (i == tby)
-                          shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums);
-                        else
-                          shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums);
-
-                        first = (first + 1) % block_window;
-                    }
-
-                    __syncthreads();
-
-                    convolve_window(i, j, dist_sums, velx(i, j), vely(i, j));
-                }
-            }
-        }
-
-    };
-
-    template<typename T> __global__ void optflowbm_fast_kernel(const FastOptFlowBM<T> fbm, PtrStepf velx, PtrStepf vely)
-    {
-        fbm(velx, vely);
-    }
-
-    void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
-    {
-        dim3 grid(divUp(src_cols, TILE_COLS), divUp(src_rows, TILE_ROWS));
-
-        buffer_cols = search_window * search_window * grid.y;
-        buffer_rows = src_cols + block_window * grid.x;
-    }
-
-    template <typename T>
-    void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream)
-    {
-        FastOptFlowBM<T> fbm(search_window, block_window, I0, I1, buffer);
-
-        dim3 block(CTA_SIZE, 1);
-        dim3 grid(divUp(I0.cols, TILE_COLS), divUp(I0.rows, TILE_ROWS));
-
-        size_t smem = search_window * search_window * sizeof(int);
-
-        optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
-        cudaSafeCall ( cudaGetLastError () );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-
-    template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
-}
-
-#endif // !defined CUDA_DISABLER
--- a/modules/cudaoptflow/src/cuda/needle_map.cu
+++ b/modules/cudaoptflow/src/cuda/needle_map.cu
@@ -1,220 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace optical_flow
-    {
-        #define NEEDLE_MAP_SCALE 16
-        #define NUM_VERTS_PER_ARROW 6
-
-        __global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
-        {
-            __shared__ float smem[2 * NEEDLE_MAP_SCALE];
-
-            volatile float* u_col_sum = smem;
-            volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
-
-            const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
-            const int y = blockIdx.y * NEEDLE_MAP_SCALE;
-
-            u_col_sum[threadIdx.x] = 0;
-            v_col_sum[threadIdx.x] = 0;
-
-            #pragma unroll
-            for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
-            {
-                u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
-                v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
-            }
-
-            if (threadIdx.x < 8)
-            {
-                // now add the column sums
-                const uint X = threadIdx.x;
-
-                if (X | 0xfe == 0xfe)  // bit 0 is 0
-                {
-                    u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
-                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
-                }
-
-                if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
-                {
-                    u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
-                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
-                }
-
-                if (X | 0xf8 == 0xf8)
-                {
-                    u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
-                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
-                }
-
-                if (X == 0)
-                {
-                    u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
-                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
-                }
-            }
-
-            if (threadIdx.x == 0)
-            {
-                const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
-
-                u_col_sum[0] *= coeff;
-                v_col_sum[0] *= coeff;
-
-                u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
-                v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
-            }
-        }
-
-        void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
-        {
-            const dim3 block(NEEDLE_MAP_SCALE);
-            const dim3 grid(u_avg.cols, u_avg.rows);
-
-            NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
-        {
-            // test - just draw a triangle at each pixel
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
-            const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
-
-            float3 v[NUM_VERTS_PER_ARROW];
-
-            if (x < u_avg.cols && y < u_avg.rows)
-            {
-                const float u_avg_val = u_avg(y, x);
-                const float v_avg_val = v_avg(y, x);
-
-                const float theta = ::atan2f(v_avg_val, u_avg_val);
-
-                float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
-                r = fmin(14.0f * (r / max_flow), 14.0f);
-
-                v[0].z = 1.0f;
-                v[1].z = 0.7f;
-                v[2].z = 0.7f;
-                v[3].z = 0.7f;
-                v[4].z = 0.7f;
-                v[5].z = 1.0f;
-
-                v[0].x = arrow_x;
-                v[0].y = arrow_y;
-                v[5].x = arrow_x;
-                v[5].y = arrow_y;
-
-                v[2].x = arrow_x + r * ::cosf(theta);
-                v[2].y = arrow_y + r * ::sinf(theta);
-                v[3].x = v[2].x;
-                v[3].y = v[2].y;
-
-                r = ::fmin(r, 2.5f);
-
-                v[1].x = arrow_x + r * ::cosf(theta - CV_PI_F / 2.0f);
-                v[1].y = arrow_y + r * ::sinf(theta - CV_PI_F / 2.0f);
-
-                v[4].x = arrow_x + r * ::cosf(theta + CV_PI_F / 2.0f);
-                v[4].y = arrow_y + r * ::sinf(theta + CV_PI_F / 2.0f);
-
-                int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[0].x * xscale;
-                vertex_data[indx++] = v[0].y * yscale;
-                vertex_data[indx++] = v[0].z;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[1].x * xscale;
-                vertex_data[indx++] = v[1].y * yscale;
-                vertex_data[indx++] = v[1].z;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[2].x * xscale;
-                vertex_data[indx++] = v[2].y * yscale;
-                vertex_data[indx++] = v[2].z;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[3].x * xscale;
-                vertex_data[indx++] = v[3].y * yscale;
-                vertex_data[indx++] = v[3].z;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[4].x * xscale;
-                vertex_data[indx++] = v[4].y * yscale;
-                vertex_data[indx++] = v[4].z;
-
-                color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
-                vertex_data[indx++] = v[5].x * xscale;
-                vertex_data[indx++] = v[5].y * yscale;
-                vertex_data[indx++] = v[5].z;
-            }
-        }
-
-        void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
-        {
-            const dim3 block(16);
-            const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
-
-            NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    }
-}}}
-
-#endif /* CUDA_DISABLER */
--- a/modules/cudaoptflow/src/interpolate_frames.cpp
+++ b/modules/cudaoptflow/src/interpolate_frames.cpp
@@ -1,113 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::cuda;
-
-#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
-
-void cv::cuda::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
-#else
-
-void cv::cuda::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
-                                float pos, GpuMat& newFrame, GpuMat& buf, Stream& s)
-{
-    CV_Assert(frame0.type() == CV_32FC1);
-    CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
-    CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
-    CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
-    CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
-    CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
-
-    newFrame.create(frame0.size(), frame0.type());
-
-    buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
-    buf.setTo(Scalar::all(0));
-
-    // occlusion masks
-    GpuMat occ0 = buf.rowRange(0 * frame0.rows, 1 * frame0.rows);
-    GpuMat occ1 = buf.rowRange(1 * frame0.rows, 2 * frame0.rows);
-
-    // interpolated forward flow
-    GpuMat fui = buf.rowRange(2 * frame0.rows, 3 * frame0.rows);
-    GpuMat fvi = buf.rowRange(3 * frame0.rows, 4 * frame0.rows);
-
-    // interpolated backward flow
-    GpuMat bui = buf.rowRange(4 * frame0.rows, 5 * frame0.rows);
-    GpuMat bvi = buf.rowRange(5 * frame0.rows, 6 * frame0.rows);
-
-    size_t step = frame0.step;
-
-    CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStStreamHandler h(stream);
-
-    NppStInterpolationState state;
-
-    state.size         = NcvSize32u(frame0.cols, frame0.rows);
-    state.nStep        = static_cast<Ncv32u>(step);
-    state.pSrcFrame0   = const_cast<Ncv32f*>(frame0.ptr<Ncv32f>());
-    state.pSrcFrame1   = const_cast<Ncv32f*>(frame1.ptr<Ncv32f>());
-    state.pFU          = const_cast<Ncv32f*>(fu.ptr<Ncv32f>());
-    state.pFV          = const_cast<Ncv32f*>(fv.ptr<Ncv32f>());
-    state.pBU          = const_cast<Ncv32f*>(bu.ptr<Ncv32f>());
-    state.pBV          = const_cast<Ncv32f*>(bv.ptr<Ncv32f>());
-    state.pos          = pos;
-    state.pNewFrame    = newFrame.ptr<Ncv32f>();
-    state.ppBuffers[0] = occ0.ptr<Ncv32f>();
-    state.ppBuffers[1] = occ1.ptr<Ncv32f>();
-    state.ppBuffers[2] = fui.ptr<Ncv32f>();
-    state.ppBuffers[3] = fvi.ptr<Ncv32f>();
-    state.ppBuffers[4] = bui.ptr<Ncv32f>();
-    state.ppBuffers[5] = bvi.ptr<Ncv32f>();
-
-    ncvSafeCall( nppiStInterpolateFrames(&state) );
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
-#endif /* HAVE_CUDA */
--- a/modules/cudaoptflow/src/needle_map.cpp
+++ b/modules/cudaoptflow/src/needle_map.cpp
@@ -1,100 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::cuda;
-
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
-
-void cv::cuda::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
-
-#else
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace optical_flow
-    {
-        void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg);
-        void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
-    }
-}}}
-
-void cv::cuda::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
-{
-    using namespace cv::cuda::device::optical_flow;
-
-    CV_Assert(u.type() == CV_32FC1);
-    CV_Assert(v.type() == u.type() && v.size() == u.size());
-
-    const int NEEDLE_MAP_SCALE = 16;
-
-    const int x_needles = u.cols / NEEDLE_MAP_SCALE;
-    const int y_needles = u.rows / NEEDLE_MAP_SCALE;
-
-    GpuMat u_avg(y_needles, x_needles, CV_32FC1);
-    GpuMat v_avg(y_needles, x_needles, CV_32FC1);
-
-    NeedleMapAverage_gpu(u, v, u_avg, v_avg);
-
-    const int NUM_VERTS_PER_ARROW = 6;
-
-    const int num_arrows = x_needles * y_needles * NUM_VERTS_PER_ARROW;
-
-    vertex.create(1, num_arrows, CV_32FC3);
-    colors.create(1, num_arrows, CV_32FC3);
-
-    colors.setTo(Scalar::all(1.0));
-
-    double uMax, vMax;
-    cuda::minMax(u_avg, 0, &uMax);
-    cuda::minMax(v_avg, 0, &vMax);
-
-    float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
-
-    CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
-
-    cuda::cvtColor(colors, colors, COLOR_HSV2RGB);
-}
-
-#endif /* HAVE_CUDA */