From 3fb0bf6e996b5aa7c094d055578a520e7764aa6f Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 16 Aug 2013 14:19:46 +0800
Subject: [PATCH 01/41] Added MOG and MOG2.

---
 modules/ocl/include/opencv2/ocl/ocl.hpp | 149 ++++++
 modules/ocl/perf/perf_bgfg.cpp          | 333 +++++++++++++
 modules/ocl/src/bgfg_mog.cpp            | 630 ++++++++++++++++++++++++
 modules/ocl/src/opencl/bgfg_mog.cl      | 543 ++++++++++++++++++++
 modules/ocl/test/test_bgfg.cpp          | 232 +++++++++
 modules/ocl/test/test_optflow.cpp       |   6 +-
 modules/ocl/test/utility.cpp            |  38 ++
 modules/ocl/test/utility.hpp            |   3 +
 samples/ocl/bgfg_segm.cpp               | 135 +++++
 9 files changed, 2066 insertions(+), 3 deletions(-)
 create mode 100644 modules/ocl/perf/perf_bgfg.cpp
 create mode 100644 modules/ocl/src/bgfg_mog.cpp
 create mode 100644 modules/ocl/src/opencl/bgfg_mog.cl
 create mode 100644 modules/ocl/test/test_bgfg.cpp
 create mode 100644 samples/ocl/bgfg_segm.cpp

diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index aa0283fbe..f250646f5 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -1698,6 +1698,155 @@ namespace cv
         //    keys   = {1,    2,   3}   (CV_8UC1)
         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
         void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+        /*!Base class for MOG and MOG2!*/
+        class CV_EXPORTS BackgroundSubtractor
+        {
+        public:
+            //! the virtual destructor
+            virtual ~BackgroundSubtractor();
+            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+
+            //! computes a background image
+            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+        };
+                /*!
+        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+        The class implements the following algorithm:
+        "An improved adaptive background mixture model for real-time tracking with shadow detection"
+        P. KadewTraKuPong and R. Bowden,
+        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+        */
+        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            int history;
+            float varThreshold;
+            float backgroundRatio;
+            float noiseSigma;
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat sortKey_;
+            oclMat mean_;
+            oclMat var_;
+        };
+
+        /*!
+        The class implements the following algorithm:
+        "Improved adaptive Gausian mixture model for background subtraction"
+        Z.Zivkovic
+        International Conference Pattern Recognition, UK, August, 2004.
+        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+        */
+        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG2(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            // parameters
+            // you should call initialize after parameters changes
+
+            int history;
+
+            //! here it is the maximum allowed number of mixture components.
+            //! Actual number is determined dynamically per pixel
+            float varThreshold;
+            // threshold on the squared Mahalanobis distance to decide if it is well described
+            // by the background model or not. Related to Cthr from the paper.
+            // This does not influence the update of the background. A typical value could be 4 sigma
+            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+            /////////////////////////
+            // less important parameters - things you might change but be carefull
+            ////////////////////////
+
+            float backgroundRatio;
+            // corresponds to fTB=1-cf from the paper
+            // TB - threshold when the component becomes significant enough to be included into
+            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+            // it is considered foreground
+            // float noiseSigma;
+            float varThresholdGen;
+
+            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+            //when a sample is close to the existing components. If it is not close
+            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+            //Smaller Tg leads to more generated components and higher Tg might make
+            //lead to small number of components but they can grow too large
+            float fVarInit;
+            float fVarMin;
+            float fVarMax;
+
+            //initial variance  for the newly generated components.
+            //It will will influence the speed of adaptation. A good guess should be made.
+            //A simple way is to estimate the typical standard deviation from the images.
+            //I used here 10 as a reasonable value
+            // min and max can be used to further control the variance
+            float fCT; //CT - complexity reduction prior
+            //this is related to the number of samples needed to accept that a component
+            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+            //shadow detection parameters
+            bool bShadowDetection; //default 1 - do shadow detection
+            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+            float fTau;
+            // Tau - shadow threshold. The shadow is detected if the pixel is darker
+            //version of the background. Tau is a threshold on how much darker the shadow can be.
+            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat variance_;
+            oclMat mean_;
+
+            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+        };
     }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
new file mode 100644
index 000000000..e7aad759f
--- /dev/null
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -0,0 +1,333 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::ocl;
+
+void cvtFrameFmt(std::vector<Mat>& input, std::vector<Mat>& output, int output_cn)
+{
+    for(int i=0; i<input.size(); i++)
+    {
+        if(output_cn==1)
+            cvtColor(input[i], output[i], COLOR_RGB2GRAY);
+        else
+            cvtColor(input[i], output[i], COLOR_RGB2RGBA);
+    }
+}
+///////////// MOG////////////////////////
+PERFTEST(mog)
+{
+    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
+    int cn[] = {1, 3};
+
+    float learningRate[] = {0.0f, 0.01f};
+
+    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
+    {
+        VideoCapture cap(inputFile[idx]);
+        ASSERT_TRUE(cap.isOpened());
+
+        Mat frame;
+        int nframe = 5;
+        Mat foreground_cpu;
+        oclMat foreground_ocl;
+        std::vector<cv::Mat> frame_buffer_init;
+        std::vector<Mat> frame_buffer(nframe);
+        std::vector<oclMat> frame_buffer_ocl;
+        std::vector<Mat> foreground_buf_ocl;
+        std::vector<Mat> foreground_buf_cpu;
+        BackgroundSubtractorMOG mog_cpu;
+        cv::ocl::MOG d_mog;
+        for(int i = 0; i < nframe; i++)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            frame_buffer_init.push_back(frame);
+        }
+
+        for(unsigned int i = 0; i < sizeof(learningRate)/sizeof(float); i++)
+        {
+            for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
+            {
+                SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j]<<"; learningRate: "<<learningRate[i];
+                if(cn[j]==1)
+                    cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
+                else
+                    frame_buffer=frame_buffer_init;
+
+                foreground_buf_cpu.clear();
+                CPU_ON;
+                for(int iter = 0; iter < nframe; iter++)
+                {
+                    mog_cpu(frame_buffer[iter], foreground_cpu, learningRate[i]);
+                    foreground_buf_cpu.push_back(foreground_cpu);
+                }
+                CPU_OFF;
+
+                WARMUP_ON;
+                d_mog(oclMat(frame_buffer[0]), foreground_ocl, learningRate[i]);
+                WARMUP_OFF;
+
+                frame_buffer_ocl.clear();
+                for(int iter =0; iter < nframe; iter++)
+                    frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
+
+                GPU_ON;
+                for(int iter = 0; iter < nframe; iter++)
+                {
+                    d_mog(frame_buffer_ocl[iter], foreground_ocl, learningRate[i]);
+                }
+                GPU_OFF;
+
+                foreground_buf_ocl.clear();
+                GPU_FULL_ON;
+                for(int iter = 0; iter < nframe; iter++)
+                {
+                    d_mog(oclMat(frame_buffer[iter]), foreground_ocl, learningRate[i]);
+                    cv::Mat temp;
+                    foreground_ocl.download(temp);
+                    foreground_buf_ocl.push_back(temp);
+                }
+                GPU_FULL_OFF;
+
+                for(int iter = 0; iter < nframe; iter++)
+                    TestSystem::instance().ExpectedMatNear(foreground_buf_ocl[iter], foreground_buf_cpu[iter], 0.0);
+
+            }
+        }
+        cap.release();
+        d_mog.release();
+    }
+}
+
+///////////// MOG2////////////////////////
+PERFTEST(mog2)
+{
+    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
+    int cn[] = {1, 3, 4};
+
+    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
+    {
+        cv::VideoCapture cap(inputFile[idx]);
+        ASSERT_TRUE(cap.isOpened());
+
+        cv::Mat frame;
+        int nframe = 5;
+        std::vector<cv::Mat> frame_buffer_init;
+        std::vector<cv::Mat> frame_buffer(nframe);
+        std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+        std::vector<cv::Mat> foreground_buf_ocl;
+        std::vector<cv::Mat> foreground_buf_cpu;
+        cv::ocl::oclMat foreground_ocl;
+
+        for(int i = 0; i < nframe; i++)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            frame_buffer_init.push_back(frame);
+        }
+        cv::ocl::MOG2 d_mog;
+
+        for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
+        {
+            SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j];
+
+            if(cn[j] == 1)
+                cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
+            else
+                frame_buffer=frame_buffer_init;
+
+            cv::BackgroundSubtractorMOG2 mog_cpu;
+            mog_cpu.set("detectShadows", false);
+            cv::Mat foreground_cpu;
+
+            foreground_buf_cpu.clear();
+            CPU_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                mog_cpu(frame_buffer[iter], foreground_cpu);
+                foreground_buf_cpu.push_back(foreground_cpu);
+            }
+            CPU_OFF;
+
+            WARMUP_ON;
+            d_mog(oclMat(frame_buffer[0]), foreground_ocl);
+            WARMUP_OFF;
+
+            frame_buffer_ocl.clear();
+
+            for(int iter =0; iter < nframe; iter++)
+                frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
+
+            GPU_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                d_mog(frame_buffer_ocl[iter], foreground_ocl);
+            }
+            GPU_OFF;
+
+            foreground_buf_ocl.clear();
+
+            GPU_FULL_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                d_mog(oclMat(frame_buffer[iter]), foreground_ocl);
+
+                cv::Mat temp1;
+                foreground_ocl.download(temp1);
+                foreground_buf_ocl.push_back(temp1);
+            }
+            GPU_FULL_OFF;
+
+            for(int iter = 0; iter < nframe; iter++)
+                TestSystem::instance().ExpectedMatNear(foreground_buf_ocl[iter], foreground_buf_cpu[iter], 0.0);
+
+        }
+        cap.release();
+        d_mog.release();
+    }
+}
+
+///////////// MOG2GetBackgroundImage////////////////////////
+PERFTEST(mog2_GetBackgroundImage)
+{
+    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
+    int cn[] = {3};
+
+    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
+    {
+        cv::VideoCapture cap(inputFile[idx]);
+        ASSERT_TRUE(cap.isOpened());
+
+        cv::Mat frame;
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        int nframe = 5;
+        std::vector<cv::Mat> frame_buffer_init;
+        std::vector<cv::Mat> frame_buffer(nframe);
+        std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+        std::vector<cv::Mat> foreground_buf_ocl;
+        std::vector<cv::Mat> foreground_buf_cpu;
+
+        for(int i = 0; i < nframe; i++)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            frame_buffer_init.push_back(frame);
+        }
+
+        for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
+        {
+            SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j];
+
+            frame_buffer = frame_buffer_init;
+            cv::Mat temp;
+
+            if(cn[j] == 1)
+                cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
+            else
+                frame_buffer=frame_buffer_init;
+
+            cv::BackgroundSubtractorMOG2 mog_cpu;
+            cv::Mat foreground_cpu;
+            cv::Mat background_cpu;
+
+            mog_cpu(frame, foreground_cpu);
+            mog_cpu.getBackgroundImage(background_cpu);
+
+            foreground_cpu.release();
+            background_cpu.release();
+
+            cv::ocl::oclMat d_frame(frame);
+            cv::ocl::MOG2 d_mog;
+            cv::ocl::oclMat foreground_ocl;
+            cv::ocl::oclMat background_ocl;
+
+            for(int iter =0; iter < nframe; iter++)
+                frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
+
+            CPU_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                mog_cpu(frame_buffer[iter], foreground_cpu);
+                foreground_buf_cpu.push_back(foreground_cpu);
+            }
+            mog_cpu.getBackgroundImage(background_cpu);
+            CPU_OFF;
+
+            WARMUP_ON;
+            d_mog(d_frame, foreground_ocl);
+            WARMUP_OFF;
+
+            foreground_ocl.release();
+
+            GPU_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                d_mog(frame_buffer_ocl[iter], foreground_ocl);
+            }
+            d_mog.getBackgroundImage(background_ocl);
+            GPU_OFF;
+
+            foreground_buf_ocl.clear();
+
+            cv::Mat temp1;
+            GPU_FULL_ON;
+            for(int iter = 0; iter < nframe; iter++)
+            {
+                d_mog(oclMat(frame_buffer[iter]), foreground_ocl);
+
+                foreground_ocl.download(temp1);
+                foreground_buf_ocl.push_back(temp1);
+            }
+            d_mog.getBackgroundImage(background_ocl);
+            GPU_FULL_OFF;
+
+            background_ocl.download(temp1);
+            TestSystem::instance().ExpectedMatNear(temp1, background_cpu, 0.0);
+        }
+    }
+}
diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
new file mode 100644
index 000000000..33bf143d2
--- /dev/null
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -0,0 +1,630 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::ocl;
+namespace cv 
+{
+    namespace ocl
+    {
+        extern const char* bgfg_mog;
+
+        typedef struct _contant_struct
+        {
+            cl_float c_Tb;
+            cl_float c_TB;
+            cl_float c_Tg;
+            cl_float c_varInit;
+            cl_float c_varMin;
+            cl_float c_varMax;
+            cl_float c_tau;
+            cl_uchar c_shadowVal;
+        }contant_struct;
+
+        cl_mem cl_constants = NULL;
+        float c_TB;
+    }
+}
+
+#if _MSC_VER
+#define snprintf sprintf_s
+#endif
+
+namespace cv { namespace ocl { namespace device
+{
+    namespace mog
+    {
+        void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+            int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
+
+        void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
+
+        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, 
+                            unsigned char shadowVal);
+
+        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, 
+                      float alphaT, float prune, bool detectShadows, int nmixtures);
+
+        void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
+    }
+}}}
+
+namespace mog
+{
+    const int defaultNMixtures = 5;
+    const int defaultHistory = 200;
+    const float defaultBackgroundRatio = 0.7f;
+    const float defaultVarThreshold = 2.5f * 2.5f;
+    const float defaultNoiseSigma = 30.0f * 0.5f;
+    const float defaultInitialWeight = 0.05f;
+}
+void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
+{
+
+}
+cv::ocl::BackgroundSubtractor::~BackgroundSubtractor()
+{
+
+}
+
+cv::ocl::MOG::MOG(int nmixtures) :
+frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
+    history = mog::defaultHistory;
+    varThreshold = mog::defaultVarThreshold;
+    backgroundRatio = mog::defaultBackgroundRatio;
+    noiseSigma = mog::defaultNoiseSigma;
+}
+
+void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store
+    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+    // the mean (nchannels values) and
+    // the diagonal covariance matrix (another nchannels values)
+
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    weight_.setTo(cv::Scalar::all(0));
+    sortKey_.setTo(cv::Scalar::all(0));
+    mean_.setTo(cv::Scalar::all(0));
+    var_.setTo(cv::Scalar::all(0));
+
+    nframes_ = 0;
+}
+
+void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    CV_Assert(frame.depth() == CV_8U);
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+        varThreshold, learningRate, backgroundRatio, noiseSigma);
+}
+
+void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
+}
+
+void cv::ocl::MOG::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    sortKey_.release();
+    mean_.release();
+    var_.release();
+    clReleaseMemObject(cl_constants);
+}
+
+static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withoutLearning_kernel";
+    vector< pair<size_t, const void*> > args;
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+
+static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withLearning_kernel";
+    vector< pair<size_t, const void*> > args;
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&sortKey.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&sortKey_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&learningRate));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&minVar));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
+{
+    const float minVar = noiseSigma * noiseSigma;
+
+    if(learningRate > 0.0f)
+        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures, 
+                         varThreshold, backgroundRatio, learningRate, minVar);
+    else
+        mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
+}
+
+void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {dst.cols, dst.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "getBackgroundImage_kernel";
+    vector< pair<size_t, const void*> > args;
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+{
+    varMin = cv::min(varMin, varMax);
+    varMax = cv::max(varMin, varMax);
+
+    c_TB = TB;
+
+    _contant_struct *constants = new _contant_struct;
+    constants->c_Tb = Tb;
+    constants->c_TB = TB;
+    constants->c_Tg = Tg;
+    constants->c_varInit = varInit;
+    constants->c_varMin = varMin;
+    constants->c_varMax = varMax;
+    constants->c_tau = tau;
+    constants->c_shadowVal = shadowVal;
+
+    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), 
+        (void *)constants, sizeof(_contant_struct));
+}
+
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, 
+                                oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
+{
+    Context* clCxt = Context::getContext();
+
+    const float alpha1 = 1.0f - alphaT;
+
+    cl_int detectShadows_flag = 0;
+    if(detectShadows)
+        detectShadows_flag = 1;
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(variance.step/variance.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    String kernel_name = "mog2_kernel";
+    vector< pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&variance.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(make_pair(sizeof(cl_float), (void*)&alphaT));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&alpha1));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&prune));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    int dst_y = (int)(dst.offset/dst.step);
+    int dst_x = (int)(dst.offset%dst.step);
+    dst_x = dst_x/(int)dst.elemSize();
+
+    String kernel_name = "getBackgroundImage2_kernel";
+    vector< pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));
+    args.push_back(make_pair(sizeof(cl_float), (void*)&c_TB));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_x));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+/////////////////////////////////////////////////////////////////
+// MOG2
+
+namespace mog2
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+}
+
+cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
+
+    history = mog2::defaultHistory;
+    varThreshold = mog2::defaultVarThreshold;
+    bShadowDetection = true;
+
+    backgroundRatio = mog2::defaultBackgroundRatio;
+    fVarInit = mog2::defaultVarInit;
+    fVarMax  = mog2::defaultVarMax;
+    fVarMin = mog2::defaultVarMin;
+
+    varThresholdGen = mog2::defaultVarThresholdGen;
+    fCT = mog2::defaultfCT;
+    nShadowDetection =  mog2::defaultnShadowDetection;
+    fTau = mog2::defaultfTau;
+}
+
+void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
+{
+    using namespace cv::ocl::device::mog;
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    weight_.setTo(Scalar::all(0));
+
+    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    variance_.setTo(Scalar::all(0));
+
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
+    mean_.setTo(Scalar::all(0));
+
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
+
+    loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
+}
+
+void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+    fgmask.setTo(cv::Scalar::all(0));
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
+}
+
+void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
+}
+
+void cv::ocl::MOG2::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    variance_.release();
+    mean_.release();
+
+    bgmodelUsedModes_.release();
+}
+
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
new file mode 100644
index 000000000..4ad6a52f7
--- /dev/null
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -0,0 +1,543 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if defined (CN1)
+#define T_FRAME uchar
+#define T_MEAN_VAR float
+#define CONVERT_TYPE convert_uchar_sat
+#define F_ZERO (0.0f)
+float cvt(uchar val)
+{
+    return val;
+}
+
+float sqr(float val)
+{
+    return val * val;
+}
+
+float sum(float val)
+{
+    return val;
+}
+
+float clamp1(float var, float learningRate, float diff, float minVar)
+{
+    return fmax(var + learningRate * (diff * diff - var), minVar);
+}
+#else
+#define T_FRAME uchar4
+#define T_MEAN_VAR float4
+#define CONVERT_TYPE convert_uchar4_sat
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+float4 cvt(const uchar4 val)
+{
+    float4 result;
+    result.x = val.x;
+    result.y = val.y;
+    result.z = val.z;
+    result.w = val.w;
+
+    return result;
+}
+
+float sqr(const float4 val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+
+float sum(const float4 val)
+{
+    return (val.x + val.y + val.z);
+}
+
+float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+{
+    float4 result;
+    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
+    result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar);
+    result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar);
+    result.w = 0.0f;
+    return result;
+}
+#endif
+
+typedef struct
+{
+    float c_Tb;
+    float c_TB;
+    float c_Tg;
+    float c_varInit;
+    float c_varMin;
+    float c_varMax;
+    float c_tau;
+    uchar c_shadowVal;
+}con_srtuct_t;
+
+void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+    __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
+    int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, int fgmask_offset_x, 
+    int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < frame_col && y < frame_row)
+    {
+    
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+        int kHit = -1;
+        int kForeground = -1;
+
+        for (int k = 0; k < (NMIXTURES); ++k)
+        {
+            if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f)
+                break;
+
+            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+            T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x];
+
+            T_MEAN_VAR diff = pix - mu;
+
+            if (sqr(diff) < varThreshold * sum(_var))
+            {
+                kHit = k;
+                break;
+            }
+        }
+
+        if (kHit >= 0)
+        {
+            float wsum = 0.0f;
+            for (int k = 0; k < (NMIXTURES); ++k)
+            {
+                wsum += weight[(k * frame_row + y) * weight_step + x];
+
+                if (wsum > backgroundRatio)
+                {
+                    kForeground = k + 1;
+                    break;
+                }
+            }
+        }
+        
+        if(kHit < 0 || kHit >= kForeground)
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
+        else
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
+        
+    }
+}
+
+__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, 
+    __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int sortKey_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, float learningRate, float minVar, 
+    int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    const float w0 = 0.05f;
+    const float sk0 = w0 / 30.0f;
+    const float var0 = 900.f;
+
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < frame_col && y < frame_row)
+    {
+
+        float wsum = 0.0f;
+        int kHit = -1;
+        int kForeground = -1;
+        int k = 0;
+
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+    
+        for (; k < (NMIXTURES); ++k)
+        {
+            float w = weight[(k * frame_row + y) * weight_step + x];
+            wsum += w;
+
+            if (w < 1.192092896e-07f)
+                break;
+
+            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+            T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+
+            T_MEAN_VAR diff = pix - mu;
+
+            if (sqr(diff) < varThreshold * sum(_var))
+            {
+                wsum -= w;
+                float dw = learningRate * (1.0f - w);
+
+                _var = clamp1(_var, learningRate, diff, minVar);
+
+                float sortKey_prev = w / sqr(sum(_var));
+                sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
+
+                float weight_prev = w + dw;
+                weight[(k * frame_row + y) * weight_step + x] = weight_prev;
+
+                T_MEAN_VAR mean_prev = mu + learningRate * diff;
+                mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+
+                T_MEAN_VAR var_prev = _var;
+                var[(k * frame_row + y) * var_step + x] = var_prev;
+
+                int k1 = k - 1;
+
+                if (k1 >= 0)
+                {
+                    float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
+                    float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
+                    T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
+                    T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
+
+                    for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
+                    {
+                        sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                        sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
+
+                        weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
+                        weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
+
+                        mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
+                        mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
+
+                        var[(k1 * frame_row + y) * var_step + x] = var_prev;
+                        var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
+
+                        sortKey_prev = sortKey_next;
+                        sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
+
+                        weight_prev = weight_next;
+                        weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
+
+                        mean_prev = mean_next;
+                        mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
+
+                        var_prev = var_next;
+                        var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
+                    }
+                }
+
+                kHit = k1 + 1;
+                break;
+            }
+        }
+
+        if (kHit < 0)
+        {
+            kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
+            wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
+
+            weight[(k * frame_row + y) * weight_step + x] = w0;
+            mean[(k * frame_row + y) * mean_step + x] = pix;
+            #if defined (CN1)
+            var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
+            #else
+            var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
+            #endif
+            sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
+        }
+        else
+        {
+            for( ; k < (NMIXTURES); k++)
+                wsum += weight[(k * frame_row + y) * weight_step + x];
+        }
+
+        float wscale = 1.0f / wsum;
+        wsum = 0;
+        for (k = 0; k < (NMIXTURES); ++k)
+        {
+            float w = weight[(k * frame_row + y) * weight_step + x];
+            wsum += w *= wscale;
+
+            weight[(k * frame_row + y) * weight_step + x] = w;
+            sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
+
+            if (wsum > backgroundRatio && kForeground < 0)
+                kForeground = k + 1;
+        }
+        if(kHit >= kForeground)
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-1);
+        else
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(0);
+    }
+}
+
+__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
+    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, 
+    float backgroundRatio)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < (NMIXTURES); ++mode)
+        {
+            float _weight = weight[(mode * dst_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > backgroundRatio)
+                break;
+        }
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[y * dst_step + x] = CONVERT_TYPE(meanVal);
+    }
+}
+
+__kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
+        __global uchar* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, 
+        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, 
+        int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]);
+
+        bool background = false; // true - the pixel classified as background
+
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+        int nmodes = modesUsed[y * modesUsed_step + x];
+        int nNewModes = nmodes; //current number of modes in GMM
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune;
+
+            if (!fitsPDF)
+            {
+                float var = variance[(mode * frame_row + y) * var_step + x];
+
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR diff = _mean - pix;
+                float dist2 = sqr(diff);
+
+                if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var)
+                    background = true;
+
+                if (dist2 < constants -> c_Tg * var)
+                {
+                    fitsPDF = true;
+                    _weight += alphaT;
+                    float k = alphaT / _weight;
+                    mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff;
+                    float varnew = var + k * (dist2 - var);
+                    varnew = fmax(varnew, constants -> c_varMin);
+                    varnew = fmin(varnew, constants -> c_varMax);
+
+                    variance[(mode * frame_row + y) * var_step + x] = varnew;
+                    for (int i = mode; i > 0; --i)
+                    {
+                        if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x])
+                            break;
+                        swap(weight, x, y, i - 1, frame_row, weight_step);
+                        swap(variance, x, y, i - 1, frame_row, var_step);
+                        #if defined (CN1)
+                        swap(mean, x, y, i - 1, frame_row, mean_step);
+                        #else
+                        swap4(mean, x, y, i - 1, frame_row, mean_step);
+                        #endif
+                    }
+                }
+            } // !fitsPDF
+
+            if (_weight < -prune)
+            {
+                _weight = 0.0;
+                nmodes--;
+            }
+
+            weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value
+            totalWeight += _weight;
+        }
+
+        totalWeight = 1.f / totalWeight;
+        for (int mode = 0; mode < nmodes; ++mode)
+            weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
+
+        nmodes = nNewModes;
+
+        if (!fitsPDF)
+        {
+            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+
+            if (nmodes == 1)
+                weight[(mode * frame_row + y) * weight_step + x] = 1.f;
+            else
+            {
+                weight[(mode * frame_row + y) * weight_step + x] = alphaT;
+
+                for (int i = 0; i < nmodes - 1; ++i)
+                    weight[(i * frame_row + y) * weight_step + x] *= alpha1;
+            }
+
+            mean[(mode * frame_row + y) * mean_step + x] = pix;
+            variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit;
+
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                // check one up
+                if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x])
+                    break;
+
+                swap(weight, x, y, i - 1, frame_row, weight_step);
+                swap(variance, x, y, i - 1, frame_row, var_step);
+                #if defined (CN1)
+                swap(mean, x, y, i - 1, frame_row, mean_step);
+                #else
+                swap4(mean, x, y, i - 1, frame_row, mean_step);
+                #endif
+            }
+        }
+
+        modesUsed[y * modesUsed_step + x] = nmodes;
+
+        bool isShadow = false;
+        if (detectShadows_flag && !background)
+        {
+            float tWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR pix_mean = pix * _mean;
+
+                float numerator = sum(pix_mean);
+                float denominator = sqr(_mean);
+
+                if (denominator == 0)
+                    break;
+
+                if (numerator <= denominator && numerator >= constants -> c_tau * denominator)
+                {
+                    float a = numerator / denominator;
+
+                    T_MEAN_VAR dD = a * _mean - pix;
+
+                    if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a)
+                    {
+                        isShadow = true;
+                        break;
+                    }
+                }
+
+                tWeight += weight[(mode * frame_row + y) * weight_step + x];
+                if (tWeight > constants -> c_TB)
+                    break;
+            }
+        }
+
+        fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255;
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global uchar* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
+    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, 
+    int mean_step, int dst_step, int dst_x, int dst_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < modesUsed_col && y < modesUsed_row)
+    {
+        int nmodes = modesUsed[y * modesUsed_step + x];
+
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = weight[(mode * modesUsed_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > c_TB)
+                break;
+        }
+
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal);
+    }
+}
diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
new file mode 100644
index 000000000..f2bda321b
--- /dev/null
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -0,0 +1,232 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+extern string workdir;
+//////////////////////////////////////////////////////
+// MOG
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(UseGray, bool)
+    IMPLEMENT_PARAM_CLASS(LearningRate, double)
+}
+
+PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
+{
+    bool useGray;
+    double learningRate;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+
+        learningRate = GET_PARAM(1);
+
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG mog;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    cv::BackgroundSubtractorMOG mog_gold;
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate);
+
+        mog_gold(frame, foreground_gold, learningRate);
+
+        EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0);
+    }
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine(
+    testing::Values(UseGray(false), UseGray(true)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01)),
+    Values(true, false)));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
+}
+
+PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
+{
+    bool useGray;
+    bool detectShadow;
+    bool useRoi;
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+        detectShadow = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog2, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    cv::BackgroundSubtractorMOG2 mog2_gold;
+    mog2_gold.set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold(frame, foreground_gold);
+
+        if (detectShadow)
+        {
+
+            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 1e-2);
+        }
+        else
+        {
+            EXPECT_MAT_NEAR(foreground_gold, foreground, 0);
+        }
+    }
+}
+
+TEST_P(mog2, getBackgroundImage)
+{
+    if (useGray)
+        return;
+
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground;
+
+    cv::BackgroundSubtractorMOG2 mog2_gold;
+    mog2_gold.set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold(frame, foreground_gold);
+    }
+
+    cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi);
+    mog2.getBackgroundImage(background);
+
+    cv::Mat background_gold;
+    mog2_gold.getBackgroundImage(background_gold);
+
+    EXPECT_MAT_NEAR(background_gold, background, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
+    testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(DetectShadow(true), DetectShadow(false)),
+    Values(true, false)));
+
+#endif
\ No newline at end of file
diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp
index 4693d46dd..8fcc105a1 100644
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@@ -146,10 +146,10 @@ PARAM_TEST_CASE(TVL1, bool)
 
 TEST_P(TVL1, Accuracy)
 {
-    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame0 = readImage("F:/mcw/opencv/opencv/samples/gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1 = readImage("../../../opencv/samples/gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
@@ -168,7 +168,7 @@ TEST_P(TVL1, Accuracy)
     EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
     EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
 }
-INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true));
 
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp
index 440a89d4a..750c3c82b 100644
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@@ -100,6 +100,44 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
     return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
 }
 
+cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi)
+{
+    Size size0 = size;
+
+    if (useRoi)
+    {
+        size0.width += randomInt(5, 15);
+        size0.height += randomInt(5, 15);
+    }
+
+    cv::ocl::oclMat d_m(size0, type);
+
+    if (size0 != size)
+        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
+
+    return d_m;
+}
+
+cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
+{ 
+    CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
+    cv::ocl::oclMat d_m;
+    d_m = createMat_ocl(m.size(), m.type(), useRoi);
+
+    Size ls;
+    Point pt;
+
+    d_m.locateROI(ls, pt);
+
+    Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
+    
+    cv::ocl::oclMat m_ocl(m);
+
+    cv::ocl::oclMat d_m_roi(d_m, roi);
+    
+    m_ocl.copyTo(d_m);
+    return d_m;
+}
 /*
 void showDiff(InputArray gold_, InputArray actual_, double eps)
 {
diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp
index 0b101ec50..1e17c6dbc 100644
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@@ -70,6 +70,9 @@ double checkNorm(const cv::Mat &m);
 double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
 double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
 
+//oclMat create
+cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false);
+cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false);
 #define EXPECT_MAT_NORM(mat, eps) \
 { \
     EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp
new file mode 100644
index 000000000..410f34693
--- /dev/null
+++ b/samples/ocl/bgfg_segm.cpp
@@ -0,0 +1,135 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core/core.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+#define M_MOG  1
+#define M_MOG2 2
+
+int main(int argc, const char** argv)
+{
+
+    cv::CommandLineParser cmd(argc, argv,
+        "{ c | camera | false       | use camera }"
+        "{ f | file   | 768x576.avi | input video file }"
+        "{ m | method | mog         | method (mog, mog2) }"
+        "{ h | help   | false       | print help message }");
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage : bgfg_segm [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
+
+    bool useCamera = cmd.get<bool>("camera");
+    string file = cmd.get<string>("file");
+    string method = cmd.get<string>("method");
+
+    if (method != "mog" && method != "mog2")
+    {
+        cerr << "Incorrect method" << endl;
+        return -1;
+    }
+
+    int m = method == "mog" ? M_MOG : M_MOG2;
+
+    VideoCapture cap;
+
+    if (useCamera)
+        cap.open(0);
+    else
+        cap.open(file);
+
+    if (!cap.isOpened())
+    {
+        cerr << "can not open camera or video file" << endl;
+        return -1;
+    }
+
+    std::vector<cv::ocl::Info>info;
+    cv::ocl::getDevice(info);
+
+    Mat frame;
+    cap >> frame;
+
+    oclMat d_frame(frame);
+
+    cv::ocl::MOG mog;
+    cv::ocl::MOG2 mog2;
+
+    oclMat d_fgmask;
+    oclMat d_fgimg;
+    oclMat d_bgimg;
+
+    d_fgimg.create(d_frame.size(), d_frame.type());
+
+    Mat fgmask;
+    Mat fgimg;
+    Mat bgimg;
+
+    switch (m)
+    {
+    case M_MOG:
+        mog(d_frame, d_fgmask, 0.01f);
+        break;
+
+    case M_MOG2:
+        mog2(d_frame, d_fgmask);
+        break;
+    }
+
+    for(;;)
+    {
+        cap >> frame;
+        if (frame.empty())
+            break;
+        d_frame.upload(frame);
+
+        int64 start = cv::getTickCount();
+
+        //update the model
+        switch (m)
+        {
+        case M_MOG:
+            mog(d_frame, d_fgmask, 0.01f);
+            mog.getBackgroundImage(d_bgimg);
+            break;
+
+        case M_MOG2:
+            mog2(d_frame, d_fgmask);
+            mog2.getBackgroundImage(d_bgimg);
+            break;
+        }
+
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
+        d_fgimg.setTo(Scalar::all(0));
+        d_frame.copyTo(d_fgimg, d_fgmask);
+
+        d_fgmask.download(fgmask);
+        d_fgimg.download(fgimg);
+        if (!d_bgimg.empty())
+            d_bgimg.download(bgimg);
+
+        imshow("image", frame);
+        imshow("foreground mask", fgmask);
+        imshow("foreground image", fgimg);
+        if (!bgimg.empty())
+            imshow("mean background image", bgimg);
+
+        int key = waitKey(30);
+        if (key == 27)
+            break;
+    }
+
+    return 0;
+}

From 8feaadc69f5467201913d06eff678588034d9dce Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 16 Aug 2013 15:24:55 +0800
Subject: [PATCH 02/41] Resolved a compiling error under Linux.

---
 modules/ocl/src/bgfg_mog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
index 33bf143d2..0bdfe6f2f 100644
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -69,7 +69,7 @@ namespace cv
     }
 }
 
-#if _MSC_VER
+#if defined _MSC_VER
 #define snprintf sprintf_s
 #endif
 

From 916b92bc3b323a5e73aae18cef4a9aebfda11035 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 16 Aug 2013 15:33:43 +0800
Subject: [PATCH 03/41] Resolved a compiling warning under Windows.

---
 modules/ocl/perf/perf_bgfg.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index e7aad759f..358b8ff38 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -48,9 +48,9 @@ using namespace cv::ocl;
 
 void cvtFrameFmt(std::vector<Mat>& input, std::vector<Mat>& output, int output_cn)
 {
-    for(int i=0; i<input.size(); i++)
+    for(int i = 0; i< (int)(input.size()); i++)
     {
-        if(output_cn==1)
+        if(output_cn == 1)
             cvtColor(input[i], output[i], COLOR_RGB2GRAY);
         else
             cvtColor(input[i], output[i], COLOR_RGB2RGBA);

From a3aa7966ae60a885ec9d337e7ec38268bb673bf8 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 16 Aug 2013 16:07:53 +0800
Subject: [PATCH 04/41] Resolved compiling errors under Linux.

---
 modules/ocl/perf/perf_bgfg.cpp | 2 +-
 modules/ocl/test/test_bgfg.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index 358b8ff38..a75d6b8b9 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -46,7 +46,7 @@
 using namespace cv;
 using namespace cv::ocl;
 
-void cvtFrameFmt(std::vector<Mat>& input, std::vector<Mat>& output, int output_cn)
+static void cvtFrameFmt(std::vector<Mat>& input, std::vector<Mat>& output, int output_cn)
 {
     for(int i = 0; i< (int)(input.size()); i++)
     {
diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
index f2bda321b..f5afd12ee 100644
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 

From 1bcd1fd3a2db3c8741e63ff976aab17ac87b23d8 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 16 Aug 2013 17:39:17 +0800
Subject: [PATCH 05/41] Used perf_precomp.hpp instead of precomp.hpp for test.

---
 modules/ocl/perf/perf_bgfg.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index a75d6b8b9..d507a3b5e 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -42,7 +42,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 using namespace cv;
 using namespace cv::ocl;
 

From c1de14c27ac555fe6e60fff1f3fd307a26e79a24 Mon Sep 17 00:00:00 2001
From: kdrobnyh <klim.drobnyh@gmail.com>
Date: Sat, 17 Aug 2013 20:31:03 +0400
Subject: [PATCH 06/41] Fix bilateralFilter function

---
 modules/imgproc/src/smooth.cpp                | 68 ++++++++++++-------
 .../imgproc/test/test_bilateral_filter.cpp    |  2 +-
 2 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 00be08618..3dad2c087 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1875,6 +1875,41 @@ private:
     float *space_weight, *color_weight;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int d = radius * 2 + 1;
+          IppiSize kernel = {d, d};
+          IppiSize roi={dst.cols, range.end - range.start};
+          int bufsize=0;
+          ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+          AutoBuffer<uchar> buf(bufsize);
+          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+          ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec );
+          if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double sigma_color;
+    double sigma_space;
+    int radius;
+    bool *ok;
+    const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&);
+};
+#endif
+
 static void
 bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     double sigma_color, double sigma_space,
@@ -1904,32 +1939,19 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     radius = MAX(radius, 1);
     d = radius*2 + 1;
 
-#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
-    if(cn == 1)
-    {
-        IppiSize kernel = {d, d};
-        IppiSize roi={src.cols, src.rows};
-        int bufsize=0;
-        ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
-        AutoBuffer<uchar> buf(bufsize+128);
-        IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
-        ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec );
-        Mat tsrc;
-        const Mat* psrc = &src;
-        if( src.data == dst.data )
-        {
-            src.copyTo(tsrc);
-            psrc = &tsrc;
-        }
-        if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0],
-                                       dst.data, (int)dst.step[0],
-                                       roi, kernel, pSpec) >= 0 )
-            return;
-    }
-#endif
     Mat temp;
     copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
 
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if( cn == 1 )
+    {
+        bool ok;
+        IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
+        parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
+        if( ok ) return;
+    }
+#endif
+    
     vector<float> _color_weight(cn*256);
     vector<float> _space_weight(d*d);
     vector<int> _space_ofs(d*d);
diff --git a/modules/imgproc/test/test_bilateral_filter.cpp b/modules/imgproc/test/test_bilateral_filter.cpp
index 2d45fdcf7..0bfc3dc4c 100644
--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@@ -251,7 +251,7 @@ namespace cvtest
 
     int CV_BilateralFilterTest::validate_test_results(int test_case_index)
     {
-        static const double eps = 1;
+        static const double eps = 4;
 
         Mat reference_dst, reference_src;
         if (_src.depth() == CV_32F)

From 1e8194fd3ccf37bb894f4af410f1fc62ff2c8b23 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 22 Aug 2013 15:42:07 +0800
Subject: [PATCH 07/41] Optimized mog and mog2, which have much better
 performance.

---
 modules/ocl/src/bgfg_mog.cpp       |  15 ++-
 modules/ocl/src/opencl/bgfg_mog.cl | 207 ++++++++++++++---------------
 modules/ocl/test/test_bgfg.cpp     |   2 +-
 3 files changed, 114 insertions(+), 110 deletions(-)

diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
index 0bdfe6f2f..c079c6b8f 100644
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -254,7 +254,7 @@ static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, ocl
 }
 
 
-static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
     int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
 {
     Context* clCxt = Context::getContext();
@@ -262,6 +262,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat
     size_t local_thread[] = {32, 8, 1};
     size_t global_thread[] = {frame.cols, frame.rows, 1};
 
+    oclMat fgmask(fgmask_raw.size(), CV_32SC1);
+
     int frame_step = (int)(frame.step/frame.elemSize());
     int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
     int weight_step = (int)(weight.step/weight.elemSize());
@@ -318,6 +320,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat
     args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y));
 
     openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmask_raw);
 }
 
 void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
@@ -392,9 +396,11 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var
         (void *)constants, sizeof(_contant_struct));
 }
 
-void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, 
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance, 
                                 oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
 {
+    oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
+
     Context* clCxt = Context::getContext();
 
     const float alpha1 = 1.0f - alphaT;
@@ -464,6 +470,9 @@ void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask,
     args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants));
 
     openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmaskRaw);
 }
 
 void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
@@ -580,7 +589,7 @@ void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
     mean_.setTo(Scalar::all(0));
 
     //make the array for keeping track of the used modes per pixel - all zeros at start
-    bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+    bgmodelUsedModes_.create(frameSize_, CV_32FC1);
     bgmodelUsedModes_.setTo(cv::Scalar::all(0));
 
     loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index 4ad6a52f7..77bdb9c2a 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -188,7 +188,7 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar
     }
 }
 
-__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
     __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, 
     __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
     int weight_step, int sortKey_step, int mean_step, int var_step,
@@ -202,130 +202,125 @@ __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* f
     int x = get_global_id(0);
     int y = get_global_id(1);
 
-    if(x < frame_col && y < frame_row)
+    if(x >= frame_col || y >= frame_row) return;
+    float wsum = 0.0f;
+    int kHit = -1;
+    int kForeground = -1;
+    int k = 0;
+
+    T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+    for (; k < (NMIXTURES); ++k)
     {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        wsum += w;
 
-        float wsum = 0.0f;
-        int kHit = -1;
-        int kForeground = -1;
-        int k = 0;
+        if (w < 1.192092896e-07f)
+            break;
 
-        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
-    
-        for (; k < (NMIXTURES); ++k)
+        T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+        T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+
+        float sortKey_prev, weight_prev;
+        T_MEAN_VAR mean_prev, var_prev;
+        if (sqr(pix - mu) < varThreshold * sum(_var))
         {
-            float w = weight[(k * frame_row + y) * weight_step + x];
-            wsum += w;
+            wsum -= w;
+            float dw = learningRate * (1.0f - w);
 
-            if (w < 1.192092896e-07f)
-                break;
+            _var = clamp1(_var, learningRate, pix - mu, minVar);
 
-            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
-            T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+            sortKey_prev = w / sqr(sum(_var));
+            sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
 
-            T_MEAN_VAR diff = pix - mu;
+            weight_prev = w + dw;
+            weight[(k * frame_row + y) * weight_step + x] = weight_prev;
 
-            if (sqr(diff) < varThreshold * sum(_var))
+            mean_prev = mu + learningRate * (pix - mu);
+            mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+
+            var_prev = _var;
+            var[(k * frame_row + y) * var_step + x] = var_prev;
+        }
+
+        int k1 = k - 1;
+
+        if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
+            float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
+            T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
+            T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
+
+            for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
             {
-                wsum -= w;
-                float dw = learningRate * (1.0f - w);
+                sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
 
-                _var = clamp1(_var, learningRate, diff, minVar);
+                weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
+                weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
 
-                float sortKey_prev = w / sqr(sum(_var));
-                sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
+                mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
 
-                float weight_prev = w + dw;
-                weight[(k * frame_row + y) * weight_step + x] = weight_prev;
+                var[(k1 * frame_row + y) * var_step + x] = var_prev;
+                var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
 
-                T_MEAN_VAR mean_prev = mu + learningRate * diff;
-                mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+                sortKey_prev = sortKey_next;
+                sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
 
-                T_MEAN_VAR var_prev = _var;
-                var[(k * frame_row + y) * var_step + x] = var_prev;
+                weight_prev = weight_next;
+                weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
 
-                int k1 = k - 1;
+                mean_prev = mean_next;
+                mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
 
-                if (k1 >= 0)
-                {
-                    float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
-                    float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
-                    T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
-                    T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
-
-                    for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
-                    {
-                        sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
-                        sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
-
-                        weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
-                        weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
-
-                        mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
-                        mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
-
-                        var[(k1 * frame_row + y) * var_step + x] = var_prev;
-                        var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
-
-                        sortKey_prev = sortKey_next;
-                        sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
-
-                        weight_prev = weight_next;
-                        weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
-
-                        mean_prev = mean_next;
-                        mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
-
-                        var_prev = var_next;
-                        var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
-                    }
-                }
-
-                kHit = k1 + 1;
-                break;
+                var_prev = var_next;
+                var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
             }
         }
 
-        if (kHit < 0)
-        {
-            kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
-            wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
-
-            weight[(k * frame_row + y) * weight_step + x] = w0;
-            mean[(k * frame_row + y) * mean_step + x] = pix;
-            #if defined (CN1)
-            var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
-            #else
-            var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
-            #endif
-            sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
-        }
-        else
-        {
-            for( ; k < (NMIXTURES); k++)
-                wsum += weight[(k * frame_row + y) * weight_step + x];
-        }
-
-        float wscale = 1.0f / wsum;
-        wsum = 0;
-        for (k = 0; k < (NMIXTURES); ++k)
-        {
-            float w = weight[(k * frame_row + y) * weight_step + x];
-            wsum += w *= wscale;
-
-            weight[(k * frame_row + y) * weight_step + x] = w;
-            sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
-
-            if (wsum > backgroundRatio && kForeground < 0)
-                kForeground = k + 1;
-        }
-        if(kHit >= kForeground)
-            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-1);
-        else
-            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(0);
+        kHit = k1 + 1;
+        break;
     }
+
+    if (kHit < 0)
+    {
+        kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
+        wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
+
+        weight[(k * frame_row + y) * weight_step + x] = w0;
+        mean[(k * frame_row + y) * mean_step + x] = pix;
+#if defined (CN1)
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
+#else
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
+#endif
+        sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
+    }
+    else
+    {
+        for( ; k < (NMIXTURES); k++)
+            wsum += weight[(k * frame_row + y) * weight_step + x];
+    }
+
+    float wscale = 1.0f / wsum;
+    wsum = 0;
+    for (k = 0; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        w *= wscale;
+        wsum += w;
+
+        weight[(k * frame_row + y) * weight_step + x] = w;
+        sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
+
+        kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
+    }
+    fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
 }
 
+
 __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
     int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, 
     float backgroundRatio)
@@ -355,8 +350,8 @@ __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_
     }
 }
 
-__kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
-        __global uchar* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, 
+__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
+        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, 
         int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, 
         int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
 {
@@ -509,7 +504,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __gl
     }
 }
 
-__kernel void getBackgroundImage2_kernel(__global uchar* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
+__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
     __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, 
     int mean_step, int dst_step, int dst_x, int dst_y)
 {
diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
index f5afd12ee..e35f26e3b 100644
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -191,7 +191,7 @@ TEST_P(mog2, getBackgroundImage)
     if (useGray)
         return;
 
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "video/768x576.avi";
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 

From 26b5eb3e3990b31be90ff3ca0afe18eaf68768da Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Thu, 29 Aug 2013 10:48:15 +0800
Subject: [PATCH 08/41] add adaptive bilateral filter (cpp and ocl version)

---
 modules/imgproc/doc/filtering.rst             |  22 +
 .../include/opencv2/imgproc/imgproc.hpp       |   4 +
 modules/imgproc/src/smooth.cpp                | 230 ++++++++++
 modules/ocl/doc/image_filtering.rst           |   2 +-
 modules/ocl/include/opencv2/ocl/ocl.hpp       |  10 +-
 modules/ocl/perf/perf_filters.cpp             |  79 ++++
 modules/ocl/src/filtering.cpp                 |  98 ++++
 .../opencl/filtering_adaptive_bilateral.cl    | 424 ++++++++++++++++++
 modules/ocl/test/test_filters.cpp             |  76 ++++
 modules/ocl/test/test_imgproc.cpp             |  65 ---
 samples/ocl/adaptive_bilateral_filter.cpp     |  51 +++
 11 files changed, 994 insertions(+), 67 deletions(-)
 create mode 100644 modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
 create mode 100644 samples/ocl/adaptive_bilateral_filter.cpp

diff --git a/modules/imgproc/doc/filtering.rst b/modules/imgproc/doc/filtering.rst
index 3d230d1ca..1816c6a43 100755
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@@ -412,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht
 This filter does not work inplace.
 
 
+adaptiveBilateralFilter
+-----------------------
+Applies the adaptive bilateral filter to an image.
+
+.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
+
+.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
+
+    :param src: Source 8-bit, 1-channel or 3-channel image.
+
+    :param dst: Destination image of the same size and type as  ``src`` .
+
+    :param ksize: filter kernel size.
+
+    :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
+
+    :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
+
+    :param borderType: border mode used to extrapolate pixels outside of the image.
+
+The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
+
 
 
 blur
diff --git a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
index f51bbaab7..1981a61d9 100644
--- a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
@@ -398,6 +398,10 @@ CV_EXPORTS_W void GaussianBlur( InputArray src,
 CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                    double sigmaColor, double sigmaSpace,
                                    int borderType=BORDER_DEFAULT );
+//! smooths the image using adaptive bilateral filter
+CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
+                                           double sigmaSpace, Point anchor=Point(-1, -1),
+                                           int borderType=BORDER_DEFAULT );
 //! smooths the image using the box filter. Each pixel is processed in O(1) time
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
                              Size ksize, Point anchor=Point(-1,-1),
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 00be08618..e38487aa5 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -2250,6 +2250,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
         "Bilateral filtering is only implemented for 8u and 32f images" );
 }
 
+
+/****************************************************************************************\
+                                  Adaptive Bilateral Filtering
+\****************************************************************************************/
+
+namespace cv
+{
+#define CALCVAR 1
+#define FIXED_WEIGHT 0
+
+class adaptiveBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
+        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
+    {
+        if( sigma_space <= 0 )
+            sigma_space = 1;
+        CV_Assert((ksize.width & 1) && (ksize.height & 1));
+        space_weight.resize(ksize.width * ksize.height);
+        double sigma2 = sigma_space * sigma_space;
+        int idx = 0;
+        int w = ksize.width / 2;
+        int h = ksize.height / 2;
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
+        }
+    }
+    virtual void operator()(const Range& range) const
+    {
+        int cn = dest->channels();
+        int anX = anchor.x;
+
+        const uchar *tptr;
+
+        for(int i = range.start;i < range.end; i++)
+        {
+            int startY = i;
+            if(cn == 1)
+            {
+                float var;
+                int currVal;
+                int sumVal = 0;
+                int sumValSqr = 0;
+                int currValCenter;
+                int currWRTCenter;
+                float weight;
+                float totalWeight = 0.;
+                float tmpSum = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal = 0;
+                    sumValSqr= 0;
+                    totalWeight = 0.;
+                    tmpSum = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width  - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width );
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal = tptr[cn*(y+anX)];
+                            sumVal += currVal;
+                            sumValSqr += (currVal *currVal);
+                        }
+                    }
+                    var = ( (sumValSqr * howManyAll)- sumVal * sumVal )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2);
+                    currValCenter =tptr[j+cn*anX];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight = 1.0;
+#else
+                            currVal = tptr[cn*(y+anX)];
+                            currWRTCenter = currVal - currValCenter;
+
+                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
+#endif
+                            tmpSum += ((float)tptr[cn*(y+anX)] * weight);
+                            totalWeight += weight;
+                        }
+                    }
+                    tmpSum /= totalWeight;
+
+                   dest->at<uchar>(startY ,j)= static_cast<uchar>(tmpSum);
+                }
+            }
+            else
+            {
+                assert(cn == 3);
+                float var_b, var_g, var_r;
+                int currVal_b, currVal_g, currVal_r;
+                int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0;
+                int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r;
+                float weight_b, weight_g, weight_r;
+                float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                    sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                    totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                    tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width);
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2];
+                            sumVal_b += currVal_b;
+                            sumVal_g += currVal_g;
+                            sumVal_r += currVal_r;
+                            sumValSqr_b += (currVal_b *currVal_b);
+                            sumValSqr_g += (currVal_g *currVal_g);
+                            sumValSqr_r += (currVal_r *currVal_r);
+                        }
+                    }
+                    var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
+                    var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
+                    var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var_b = 900.0; var_g = 900.0;var_r = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j;
+                    currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight_b = 1.0;
+                            weight_g = 1.0;
+                            weight_r = 1.0;
+#else
+                            currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2];
+                            currWRTCenter_b = currVal_b - currValCenter_b;
+                            currWRTCenter_g = currVal_g - currValCenter_g;
+                            currWRTCenter_r = currVal_r - currValCenter_r;
+
+                            float cur_spw = space_weight[x*ksize.width+y+anX];
+                            weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
+                            weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
+                            weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
+#endif
+                            tmpSum_b += ((float)tptr[cn*(y+anX)]   * weight_b);
+                            tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
+                            tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r);
+                            totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r;
+                        }
+                    }
+                    tmpSum_b /= totalWeight_b;
+                    tmpSum_g /= totalWeight_g;
+                    tmpSum_r /= totalWeight_r;
+
+                    dest->at<uchar>(startY,j  )= static_cast<uchar>(tmpSum_b);
+                    dest->at<uchar>(startY,j+1)= static_cast<uchar>(tmpSum_g);
+                    dest->at<uchar>(startY,j+2)= static_cast<uchar>(tmpSum_r);
+                }
+            }
+        }
+    }
+private:
+    const Mat *temp;
+    Mat *dest;
+    Size ksize;
+    double sigma_space;
+    Point anchor;
+    vector<float> space_weight;
+};
+static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
+{
+    Size size = src.size();
+
+    CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
+              src.type() == dst.type() && src.size() == dst.size() &&
+              src.data != dst.data );
+    Mat temp;
+    copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
+
+    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
+    parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
+}
+}
+void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
+                                  double sigmaSpace, Point anchor, int borderType )
+{
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
+
+    anchor = normalizeAnchor(anchor,ksize);
+    if( src.depth() == CV_8U )
+        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
+    else
+        CV_Error( CV_StsUnsupportedFormat,
+        "Adaptive Bilateral filtering is only implemented for 8u images" );
+}
+
 //////////////////////////////////////////////////////////////////////////////////////////
 
 CV_IMPL void
diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst
index ce89e85de..1f90eedda 100644
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@@ -127,7 +127,7 @@ ocl::bilateralFilter
 --------------------
 Returns void
 
-.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT)
+.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)
 
     :param src: The source image
 
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index 5b3642d03..f2b858caa 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -520,7 +520,15 @@ namespace cv
 
         //! bilateralFilter
         // supports 8UC1 8UC4
-        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+
+        //! Applies an adaptive bilateral filter to the input image
+        //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
+        //  the function calculates a constant at each window based on local standard deviation,
+        //  and use this constant to do filtering.
+        //  supports 8UC1 8UC3
+        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+
         //! computes exponent of each matrix element (b = e**a)
         // supports only CV_32FC1 type
         CV_EXPORTS void exp(const oclMat &a, oclMat &b);
diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
index 28c290096..5f510d63f 100644
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -321,3 +321,82 @@ PERF_TEST_P(filter2DFixture, filter2D,
     else
         OCL_PERF_ELSE
 }
+
+///////////// Bilateral////////////////////////
+
+typedef Size_MatType BilateralFixture;
+
+PERF_TEST_P(BilateralFixture, Bilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), d = 7;
+	double sigmacolor = 50.0, sigmaspace = 50.0;
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC3)
+        declare.time(8);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// adaptiveBilateral////////////////////////
+
+typedef Size_MatType adaptiveBilateralFixture;
+
+PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+	double sigmaspace = 10.0;
+	Size ksize(9,9);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(15);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1.);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp
index a08f0ed2b..c0557980b 100644
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -64,6 +64,7 @@ extern const char *filter_sep_row;
 extern const char *filter_sep_col;
 extern const char *filtering_laplacian;
 extern const char *filtering_morph;
+extern const char *filtering_adaptive_bilateral;
 }
 }
 
@@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
     Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
     f->apply(src, dst);
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Adaptive Bilateral Filter
+
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+{
+    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
+    if( sigmaSpace <= 0 )
+        sigmaSpace = 1;
+    Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
+    double sigma2 = sigmaSpace * sigmaSpace;
+    int idx = 0;
+    int w = ksize.width / 2;
+    int h = ksize.height / 2;
+    for(int y=-h; y<=h; y++)
+        for(int x=-w; x<=w; x++)
+    {
+        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+    }
+    oclMat dlut(lut);
+    int depth = src.depth();
+    int cn = src.oclchannels();
+
+    normalizeAnchor(anchor, ksize);
+    const static String kernelName = "edgeEnhancingFilter";
+
+    dst.create(src.size(), src.type());
+
+    char btype[30];
+    switch(borderType)
+    {
+    case BORDER_CONSTANT:
+        sprintf(btype, "BORDER_CONSTANT");
+        break;
+    case BORDER_REPLICATE:
+        sprintf(btype, "BORDER_REPLICATE");
+        break;
+    case BORDER_REFLECT:
+        sprintf(btype, "BORDER_REFLECT");
+        break;
+    case BORDER_WRAP:
+        sprintf(btype, "BORDER_WRAP");
+        break;
+    case BORDER_REFLECT101:
+        sprintf(btype, "BORDER_REFLECT_101");
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "This border type is not supported");
+        break;
+    }
+
+    //the following constants may be adjusted for performance concerns
+    const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
+
+    //Normalize the result by default
+    const float alpha = ksize.height * ksize.width;
+
+    const size_t gSize = blockSizeX - ksize.width / 2 * 2;
+    const size_t globalSizeX = (src.cols) % gSize == 0 ?
+        src.cols / gSize * blockSizeX :
+        (src.cols / gSize + 1) * blockSizeX;
+    const size_t rows_per_thread = 1 + EXTRA;
+    const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
+        ((src.rows + rows_per_thread - 1) / rows_per_thread) :
+        (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
+
+    size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
+    size_t localThreads[3]  = { blockSizeX, blockSizeY, 1};
+
+    char build_options[250];
+
+    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
+    sprintf(build_options,
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
+        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+
+    std::vector<pair<size_t , const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
+    int lut_step = dlut.step1();
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
+
+    openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
+        globalThreads, localThreads, args, cn, depth, build_options);
+}
\ No newline at end of file
diff --git a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
new file mode 100644
index 000000000..a8e0fd17e
--- /dev/null
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@@ -0,0 +1,424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Harris Gasparakis, harris.gasparakis@amd.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
+//    Yao Wang, bitwangyaoyao@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifdef BORDER_REPLICATE
+//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
+#endif
+
+#ifdef BORDER_REFLECT
+//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
+#endif
+
+#ifdef BORDER_REFLECT_101
+//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
+#endif
+
+//blur function does not support BORDER_WRAP
+#ifdef BORDER_WRAP
+//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
+#endif
+
+__kernel void
+edgeEnhancingFilter_C4_D0(
+    __global const uchar4 * restrict src,
+    __global uchar4 *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float* lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step) >> 2;
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step) >> 2;
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar4 data[ksY+EXTRA][THREADS];
+
+    float4 tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float4)(0,0,0,0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar4 ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step>>2) + cur_col];
+        }
+
+        data[j][col] = con ? ss : (uchar4)0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step>>2) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 var[1+EXTRA];
+
+#if VAR_PER_CHANNEL
+    float4 weight;
+    float4 totalWeight = (float4)(0,0,0,0);
+#else
+    float weight;
+    float totalWeight = 0;
+#endif
+
+    int4 currValCenter;
+    int4 currWRTCenter;
+
+    int4 sumVal = 0;
+    int4 sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int4 currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj ;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal =0;
+            sumValSqr=0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+#if VAR_PER_CHANNEL
+                    weight.x = 1.0f;
+                    weight.y = 1.0f;
+                    weight.z = 1.0f;
+                    weight.w = 1.0f;
+#else
+                    weight = 1.0f;
+#endif
+#else
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+#if VAR_PER_CHANNEL
+                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
+                    //weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
+                    //weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
+                    //weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
+                    //weight.w = 0;
+#else
+                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +  mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
+#endif
+#endif
+                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+            }
+
+#if VAR_PER_CHANNEL
+            totalWeight = (float4)(0,0,0,0);
+#else
+            totalWeight = 0;
+#endif
+        }
+    }
+}
+
+
+__kernel void
+edgeEnhancingFilter_C1_D0(
+    __global const uchar * restrict src,
+    __global uchar *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float * lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step);
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step);
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar data[ksY+EXTRA][THREADS];
+
+    float tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float)(0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step) + cur_col];
+        }
+
+        data[j][col] = con ? ss : 0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float var[1+EXTRA];
+
+    float weight;
+    float totalWeight = 0;
+
+    int currValCenter;
+    int currWRTCenter;
+
+    int sumVal = 0;
+    int sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal = 0;
+            sumValSqr =0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= (uint)(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float)(900.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+                    weight = 1.0f;
+#else
+                    currVal	= (int)(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
+                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+            }
+
+            totalWeight = 0;
+        }
+    }
+}
diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp
index c98c8f40d..4a22ec503 100644
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@@ -353,6 +353,69 @@ TEST_P(Filter2D, Mat)
         Near(1);
     }
 }
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Bilateral
+struct Bilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    double sigmacolor, sigmaspace;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        cv::RNG &rng = TS::ptr()->get_rng();
+        sigmacolor = rng.uniform(20, 100);
+        sigmaspace = rng.uniform(10, 40);
+    }
+};
+
+TEST_P(Bilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype);
+        cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype);
+        Near(1);
+    }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AdaptiveBilateral
+struct AdaptiveBilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    Point anchor;
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        anchor = Point(-1,-1);
+    }
+};
+
+TEST_P(AdaptiveBilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype);
+        cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype);
+        Near(1);
+    }
+
+}
+
 INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
                         Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                         Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
@@ -400,4 +463,17 @@ INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
                         Values(Size(0, 0)), //not use
                         Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
 
+INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT,  (MatType)cv::BORDER_REFLECT_101)));
 #endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 46cd257c8..426fcef3f 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat)
 }
 
 
-
-
-
-////////////////////////////////bilateralFilter////////////////////////////////////////////
-
-struct bilateralFilter : ImgprocTestBase {};
-
-TEST_P(bilateralFilter, Mat)
-{
-    double sigmacolor = 50.0;
-    int radius = 9;
-    int d = 2 * radius + 1;
-    double sigmaspace = 20.0;
-    int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
-    //const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    if (mat1.depth() != CV_8U || mat1.type() != dst.type())
-    {
-        cout << "Unsupported type" << endl;
-        EXPECT_DOUBLE_EQ(0.0, 0.0);
-    }
-    else
-    {
-        for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
-            for(int j = 0; j < LOOP_TIMES; j++)
-            {
-                random_roi();
-                if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
-                {
-                    continue;
-                }
-                //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
-                //{
-                //	dst_roi.adjustROI(radius, radius, radius, radius);
-                //	cldst_roi.adjustROI(radius, radius, radius, radius);
-                //}
-                //else
-                //{
-                //	continue;
-                //}
-
-                cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                Near(1.);
-            }
-    }
-}
-
-
-
 ////////////////////////////////copyMakeBorder////////////////////////////////////////////
 
 struct CopyMakeBorder : ImgprocTestBase {};
@@ -1622,21 +1572,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
                             NULL_TYPE,
                             Values(false))); // Values(false) is the reserved parameter
 
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	NULL_TYPE,
-//	Values(false))); // Values(false) is the reserved parameter
-INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            NULL_TYPE,
-                            Values(false))); // Values(false) is the reserved parameter
-
 
 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
                             Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp
new file mode 100644
index 000000000..df226b195
--- /dev/null
+++ b/samples/ocl/adaptive_bilateral_filter.cpp
@@ -0,0 +1,51 @@
+// This sample shows the difference of adaptive bilateral filter and bilateral filter.
+#include "opencv2/core/core.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+
+using namespace cv;
+using namespace std;
+
+
+int main( int argc, const char** argv )
+{
+    const char* keys =
+        "{ i | input   |          | specify input image }"
+        "{ k | ksize   |     5    | specify kernel size }";
+    CommandLineParser cmd(argc, argv, keys);
+    string src_path = cmd.get<string>("i");
+    int ks = cmd.get<int>("k");
+    const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"};
+
+    Mat src = imread(src_path);
+    Mat abFilterCPU;
+    if(src.empty()){
+        //cout << "error read image: " << src_path << endl;
+        return -1;
+    }
+
+    std::vector<ocl::Info> infos;
+    ocl::getDevice(infos);
+
+    ocl::oclMat dsrc(src), dABFilter, dBFilter;
+
+    Size ksize(ks, ks);
+    adaptiveBilateralFilter(src,abFilterCPU, ksize, 10);
+    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10);
+    ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9);
+
+    Mat abFilter = dABFilter;
+    Mat bFilter = dBFilter;
+    imshow(winName[0], src);
+
+    imshow(winName[1], abFilterCPU);
+
+    imshow(winName[2], abFilter);
+
+    imshow(winName[3], bFilter);
+
+    waitKey();
+    return 0;
+
+}
\ No newline at end of file

From ab235cda74577aa93a65b0da959b4273e5498575 Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Thu, 29 Aug 2013 11:59:19 +0800
Subject: [PATCH 09/41] fix warnings

---
 modules/ocl/perf/perf_filters.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
index 5f510d63f..aa562412b 100644
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -333,7 +333,7 @@ PERF_TEST_P(BilateralFixture, Bilateral,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), d = 7;
-	double sigmacolor = 50.0, sigmaspace = 50.0;
+    double sigmacolor = 50.0, sigmaspace = 50.0;
 
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
@@ -372,8 +372,8 @@ PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int type = get<1>(params);
-	double sigmaspace = 10.0;
-	Size ksize(9,9);
+    double sigmaspace = 10.0;
+    Size ksize(9,9);
 
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);

From 5728612f95bf1fa19debdd7245b6f956cc70781f Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 29 Aug 2013 14:05:56 +0800
Subject: [PATCH 10/41] Removed the trailing whitespace

---
 modules/ocl/src/bgfg_mog.cpp       | 13 ++++++-------
 modules/ocl/src/opencl/bgfg_mog.cl | 17 +++++++----------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
index c079c6b8f..d39f86394 100644
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -46,7 +46,7 @@
 #include "precomp.hpp"
 using namespace cv;
 using namespace cv::ocl;
-namespace cv 
+namespace cv
 {
     namespace ocl
     {
@@ -82,10 +82,10 @@ namespace cv { namespace ocl { namespace device
 
         void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
 
-        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, 
+        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
                             unsigned char shadowVal);
 
-        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, 
+        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
                       float alphaT, float prune, bool detectShadows, int nmixtures);
 
         void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
@@ -392,11 +392,11 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var
     constants->c_tau = tau;
     constants->c_shadowVal = shadowVal;
 
-    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), 
+    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
         (void *)constants, sizeof(_contant_struct));
 }
 
-void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance, 
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
                                 oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
 {
     oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
@@ -635,5 +635,4 @@ void cv::ocl::MOG2::release()
     mean_.release();
 
     bgmodelUsedModes_.release();
-}
-
+}
\ No newline at end of file
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index 77bdb9c2a..2e269999a 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -134,7 +134,7 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar
     __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
     int frame_row, int frame_col, int frame_step, int fgmask_step,
     int weight_step, int mean_step, int var_step,
-    float varThreshold, float backgroundRatio, int fgmask_offset_x, 
+    float varThreshold, float backgroundRatio, int fgmask_offset_x,
     int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
 {
     int x = get_global_id(0);
@@ -142,7 +142,6 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar
 
     if (x < frame_col && y < frame_row)
     {
-    
         T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
 
         int kHit = -1;
@@ -179,20 +178,18 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar
                 }
             }
         }
-        
         if(kHit < 0 || kHit >= kForeground)
             fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
         else
             fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
-        
     }
 }
 
 __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
-    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, 
+    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
     __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
     int weight_step, int sortKey_step, int mean_step, int var_step,
-    float varThreshold, float backgroundRatio, float learningRate, float minVar, 
+    float varThreshold, float backgroundRatio, float learningRate, float minVar,
     int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
 {
     const float w0 = 0.05f;
@@ -322,7 +319,7 @@ __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgm
 
 
 __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
-    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, 
+    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
     float backgroundRatio)
 {
     int x = get_global_id(0);
@@ -351,8 +348,8 @@ __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_
 }
 
 __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
-        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, 
-        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, 
+        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
+        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
         int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
 {
     int x = get_global_id(0);
@@ -505,7 +502,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob
 }
 
 __kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
-    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, 
+    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
     int mean_step, int dst_step, int dst_x, int dst_y)
 {
     int x = get_global_id(0);

From 14e083f1e0736ef0119a2cb3588b179c7d06a5ee Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 29 Aug 2013 14:08:56 +0800
Subject: [PATCH 11/41] Removed trailing whitespace

---
 modules/ocl/test/utility.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp
index 750c3c82b..5d3195719 100644
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@@ -119,7 +119,7 @@ cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi)
 }
 
 cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
-{ 
+{
     CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
     cv::ocl::oclMat d_m;
     d_m = createMat_ocl(m.size(), m.type(), useRoi);
@@ -130,11 +130,11 @@ cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
     d_m.locateROI(ls, pt);
 
     Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
-    
+
     cv::ocl::oclMat m_ocl(m);
 
     cv::ocl::oclMat d_m_roi(d_m, roi);
-    
+
     m_ocl.copyTo(d_m);
     return d_m;
 }
@@ -289,4 +289,3 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
     }
     return final_test_result;
 }
-

From 0233c4c198a359014307e7da11b66b6f5f1f14ea Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 29 Aug 2013 15:49:02 +0800
Subject: [PATCH 12/41] Removed whitespace.

---
 modules/ocl/src/bgfg_mog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
index d39f86394..3051ac82f 100644
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -330,7 +330,7 @@ void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask,
     const float minVar = noiseSigma * noiseSigma;
 
     if(learningRate > 0.0f)
-        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures, 
+        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
                          varThreshold, backgroundRatio, learningRate, minVar);
     else
         mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);

From 4f3349ffe43a8b486f6e7358350dcedc8b31ab88 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 29 Aug 2013 16:24:26 +0800
Subject: [PATCH 13/41] Added perf namespace.

---
 modules/ocl/perf/perf_bgfg.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index d507a3b5e..5bf406daf 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -43,6 +43,7 @@
 //
 //M*/
 #include "perf_precomp.hpp"
+using namespace perf;
 using namespace cv;
 using namespace cv::ocl;
 

From 114f3266d801a23dcdbbda70b5cbb99026ac4fd4 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 29 Aug 2013 17:35:47 +0800
Subject: [PATCH 14/41] Removed performance test.

---
 modules/ocl/perf/perf_bgfg.cpp | 334 ---------------------------------
 1 file changed, 334 deletions(-)
 delete mode 100644 modules/ocl/perf/perf_bgfg.cpp

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
deleted file mode 100644
index 5bf406daf..000000000
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "perf_precomp.hpp"
-using namespace perf;
-using namespace cv;
-using namespace cv::ocl;
-
-static void cvtFrameFmt(std::vector<Mat>& input, std::vector<Mat>& output, int output_cn)
-{
-    for(int i = 0; i< (int)(input.size()); i++)
-    {
-        if(output_cn == 1)
-            cvtColor(input[i], output[i], COLOR_RGB2GRAY);
-        else
-            cvtColor(input[i], output[i], COLOR_RGB2RGBA);
-    }
-}
-///////////// MOG////////////////////////
-PERFTEST(mog)
-{
-    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
-    int cn[] = {1, 3};
-
-    float learningRate[] = {0.0f, 0.01f};
-
-    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
-    {
-        VideoCapture cap(inputFile[idx]);
-        ASSERT_TRUE(cap.isOpened());
-
-        Mat frame;
-        int nframe = 5;
-        Mat foreground_cpu;
-        oclMat foreground_ocl;
-        std::vector<cv::Mat> frame_buffer_init;
-        std::vector<Mat> frame_buffer(nframe);
-        std::vector<oclMat> frame_buffer_ocl;
-        std::vector<Mat> foreground_buf_ocl;
-        std::vector<Mat> foreground_buf_cpu;
-        BackgroundSubtractorMOG mog_cpu;
-        cv::ocl::MOG d_mog;
-        for(int i = 0; i < nframe; i++)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-            frame_buffer_init.push_back(frame);
-        }
-
-        for(unsigned int i = 0; i < sizeof(learningRate)/sizeof(float); i++)
-        {
-            for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
-            {
-                SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j]<<"; learningRate: "<<learningRate[i];
-                if(cn[j]==1)
-                    cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
-                else
-                    frame_buffer=frame_buffer_init;
-
-                foreground_buf_cpu.clear();
-                CPU_ON;
-                for(int iter = 0; iter < nframe; iter++)
-                {
-                    mog_cpu(frame_buffer[iter], foreground_cpu, learningRate[i]);
-                    foreground_buf_cpu.push_back(foreground_cpu);
-                }
-                CPU_OFF;
-
-                WARMUP_ON;
-                d_mog(oclMat(frame_buffer[0]), foreground_ocl, learningRate[i]);
-                WARMUP_OFF;
-
-                frame_buffer_ocl.clear();
-                for(int iter =0; iter < nframe; iter++)
-                    frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
-
-                GPU_ON;
-                for(int iter = 0; iter < nframe; iter++)
-                {
-                    d_mog(frame_buffer_ocl[iter], foreground_ocl, learningRate[i]);
-                }
-                GPU_OFF;
-
-                foreground_buf_ocl.clear();
-                GPU_FULL_ON;
-                for(int iter = 0; iter < nframe; iter++)
-                {
-                    d_mog(oclMat(frame_buffer[iter]), foreground_ocl, learningRate[i]);
-                    cv::Mat temp;
-                    foreground_ocl.download(temp);
-                    foreground_buf_ocl.push_back(temp);
-                }
-                GPU_FULL_OFF;
-
-                for(int iter = 0; iter < nframe; iter++)
-                    TestSystem::instance().ExpectedMatNear(foreground_buf_ocl[iter], foreground_buf_cpu[iter], 0.0);
-
-            }
-        }
-        cap.release();
-        d_mog.release();
-    }
-}
-
-///////////// MOG2////////////////////////
-PERFTEST(mog2)
-{
-    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
-    int cn[] = {1, 3, 4};
-
-    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
-    {
-        cv::VideoCapture cap(inputFile[idx]);
-        ASSERT_TRUE(cap.isOpened());
-
-        cv::Mat frame;
-        int nframe = 5;
-        std::vector<cv::Mat> frame_buffer_init;
-        std::vector<cv::Mat> frame_buffer(nframe);
-        std::vector<cv::ocl::oclMat> frame_buffer_ocl;
-        std::vector<cv::Mat> foreground_buf_ocl;
-        std::vector<cv::Mat> foreground_buf_cpu;
-        cv::ocl::oclMat foreground_ocl;
-
-        for(int i = 0; i < nframe; i++)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-            frame_buffer_init.push_back(frame);
-        }
-        cv::ocl::MOG2 d_mog;
-
-        for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
-        {
-            SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j];
-
-            if(cn[j] == 1)
-                cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
-            else
-                frame_buffer=frame_buffer_init;
-
-            cv::BackgroundSubtractorMOG2 mog_cpu;
-            mog_cpu.set("detectShadows", false);
-            cv::Mat foreground_cpu;
-
-            foreground_buf_cpu.clear();
-            CPU_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                mog_cpu(frame_buffer[iter], foreground_cpu);
-                foreground_buf_cpu.push_back(foreground_cpu);
-            }
-            CPU_OFF;
-
-            WARMUP_ON;
-            d_mog(oclMat(frame_buffer[0]), foreground_ocl);
-            WARMUP_OFF;
-
-            frame_buffer_ocl.clear();
-
-            for(int iter =0; iter < nframe; iter++)
-                frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
-
-            GPU_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                d_mog(frame_buffer_ocl[iter], foreground_ocl);
-            }
-            GPU_OFF;
-
-            foreground_buf_ocl.clear();
-
-            GPU_FULL_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                d_mog(oclMat(frame_buffer[iter]), foreground_ocl);
-
-                cv::Mat temp1;
-                foreground_ocl.download(temp1);
-                foreground_buf_ocl.push_back(temp1);
-            }
-            GPU_FULL_OFF;
-
-            for(int iter = 0; iter < nframe; iter++)
-                TestSystem::instance().ExpectedMatNear(foreground_buf_ocl[iter], foreground_buf_cpu[iter], 0.0);
-
-        }
-        cap.release();
-        d_mog.release();
-    }
-}
-
-///////////// MOG2GetBackgroundImage////////////////////////
-PERFTEST(mog2_GetBackgroundImage)
-{
-    const string inputFile[] = {"768x576.avi", "1920x1080.avi"};
-    int cn[] = {3};
-
-    for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++)
-    {
-        cv::VideoCapture cap(inputFile[idx]);
-        ASSERT_TRUE(cap.isOpened());
-
-        cv::Mat frame;
-        cap >> frame;
-        ASSERT_FALSE(frame.empty());
-
-        int nframe = 5;
-        std::vector<cv::Mat> frame_buffer_init;
-        std::vector<cv::Mat> frame_buffer(nframe);
-        std::vector<cv::ocl::oclMat> frame_buffer_ocl;
-        std::vector<cv::Mat> foreground_buf_ocl;
-        std::vector<cv::Mat> foreground_buf_cpu;
-
-        for(int i = 0; i < nframe; i++)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-            frame_buffer_init.push_back(frame);
-        }
-
-        for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++)
-        {
-            SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<<cn[j];
-
-            frame_buffer = frame_buffer_init;
-            cv::Mat temp;
-
-            if(cn[j] == 1)
-                cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
-            else
-                frame_buffer=frame_buffer_init;
-
-            cv::BackgroundSubtractorMOG2 mog_cpu;
-            cv::Mat foreground_cpu;
-            cv::Mat background_cpu;
-
-            mog_cpu(frame, foreground_cpu);
-            mog_cpu.getBackgroundImage(background_cpu);
-
-            foreground_cpu.release();
-            background_cpu.release();
-
-            cv::ocl::oclMat d_frame(frame);
-            cv::ocl::MOG2 d_mog;
-            cv::ocl::oclMat foreground_ocl;
-            cv::ocl::oclMat background_ocl;
-
-            for(int iter =0; iter < nframe; iter++)
-                frame_buffer_ocl.push_back(oclMat(frame_buffer[iter]));
-
-            CPU_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                mog_cpu(frame_buffer[iter], foreground_cpu);
-                foreground_buf_cpu.push_back(foreground_cpu);
-            }
-            mog_cpu.getBackgroundImage(background_cpu);
-            CPU_OFF;
-
-            WARMUP_ON;
-            d_mog(d_frame, foreground_ocl);
-            WARMUP_OFF;
-
-            foreground_ocl.release();
-
-            GPU_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                d_mog(frame_buffer_ocl[iter], foreground_ocl);
-            }
-            d_mog.getBackgroundImage(background_ocl);
-            GPU_OFF;
-
-            foreground_buf_ocl.clear();
-
-            cv::Mat temp1;
-            GPU_FULL_ON;
-            for(int iter = 0; iter < nframe; iter++)
-            {
-                d_mog(oclMat(frame_buffer[iter]), foreground_ocl);
-
-                foreground_ocl.download(temp1);
-                foreground_buf_ocl.push_back(temp1);
-            }
-            d_mog.getBackgroundImage(background_ocl);
-            GPU_FULL_OFF;
-
-            background_ocl.download(temp1);
-            TestSystem::instance().ExpectedMatNear(temp1, background_cpu, 0.0);
-        }
-    }
-}

From 81a6ae791b991d387d1ad97b1b75a97bc5d94d91 Mon Sep 17 00:00:00 2001
From: Nghia Ho <nghiaho12@yahoo.com>
Date: Sat, 31 Aug 2013 23:35:03 +1000
Subject: [PATCH 15/41] changed int -> size_t when accessing std::vector

---
 .../objdetect/cascade_classifier/cascade_classifier.rst       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
index 03080fec5..8a4f25cc2 100644
--- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
+++ b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
@@ -90,7 +90,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
     //-- Detect faces
     face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
 
-    for( int i = 0; i < faces.size(); i++ )
+    for( size_t i = 0; i < faces.size(); i++ )
     {
       Point center( faces[i].x + faces[i].width*0.5, faces[i].y + faces[i].height*0.5 );
       ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
@@ -101,7 +101,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
       //-- In each face, detect eyes
       eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CV_HAAR_SCALE_IMAGE, Size(30, 30) );
 
-      for( int j = 0; j < eyes.size(); j++ )
+      for( size_t j = 0; j < eyes.size(); j++ )
        {
          Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 );
          int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );

From 3c2a8912ee3b239ac8b22de4233de82eb7f7007f Mon Sep 17 00:00:00 2001
From: peng xiao <hisenxpress@gmail.com>
Date: Mon, 2 Sep 2013 10:06:01 +0800
Subject: [PATCH 16/41] Let clAmdBlas library initialize once during program
 lifetime.

---
 modules/ocl/src/gemm.cpp           | 47 ++++++++++++++++++++++++++++--
 modules/ocl/src/initialization.cpp |  3 ++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
index 44f23da69..6e04baca4 100644
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -46,16 +46,59 @@
 #include <iomanip>
 #include "precomp.hpp"
 
+namespace cv { namespace ocl {
+
+// used for clAmdBlas library to avoid redundant setup/teardown
+void clBlasSetup();
+void clBlasTeardown();
+
+}} /* namespace cv { namespace ocl */
+
+
 #if !defined HAVE_CLAMDBLAS
 void cv::ocl::gemm(const oclMat&, const oclMat&, double,
                    const oclMat&, double, oclMat&, int)
 {
     CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
 }
+
+void cv::ocl::clBlasSetup()
+{
+    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+}
+
 #else
 #include "clAmdBlas.h"
 using namespace cv;
 
+static bool clBlasInitialized = false;
+static Mutex cs;
+
+void cv::ocl::clBlasSetup()
+{
+    AutoLock al(cs);
+    if(!clBlasInitialized)
+    {
+        openCLSafeCall(clAmdBlasSetup());
+        clBlasInitialized = true;
+    }
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    AutoLock al(cs);
+    if(clBlasInitialized)
+    {
+        clAmdBlasTeardown();
+        clBlasInitialized = false;
+    }
+}
+
 void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
                    const oclMat &src3, double beta, oclMat &dst, int flags)
 {
@@ -71,7 +114,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
         dst.create(src1.rows, src2.cols, src1.type());
         dst.setTo(Scalar::all(0));
     }
-    openCLSafeCall( clAmdBlasSetup() );
+
+    clBlasSetup();
 
     const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
     const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
@@ -156,6 +200,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
     }
     break;
     }
-    clAmdBlasTeardown();
 }
 #endif
diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp
index b990e09fe..564b40357 100644
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -68,6 +68,7 @@ namespace cv
     namespace ocl
     {
         extern void fft_teardown();
+        extern void clBlasTeardown();
         /*
          * The binary caching system to eliminate redundant program source compilation.
          * Strictly, this is not a cache because we do not implement evictions right now.
@@ -1050,6 +1051,7 @@ namespace cv
         void Info::release()
         {
             fft_teardown();
+            clBlasTeardown();
             impl->release();
             impl = new Impl;
             DeviceName.clear();
@@ -1058,6 +1060,7 @@ namespace cv
         Info::~Info()
         {
             fft_teardown();
+            clBlasTeardown();
             impl->release();
         }
 

From 7e638cb0b54b4c37bcfdbcc8a8191fa5f986fbfb Mon Sep 17 00:00:00 2001
From: StevenPuttemans <steven.puttemans@lessius.eu>
Date: Fri, 30 Aug 2013 14:21:11 +0200
Subject: [PATCH 17/41] Bugfix 3115: Added not to documentation for python
 version for facerecognizer interface + white + whitespaces remove

---
 modules/contrib/doc/facerec/facerec_api.rst | 2 ++
 samples/python2/facerec_demo.py             | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/modules/contrib/doc/facerec/facerec_api.rst b/modules/contrib/doc/facerec/facerec_api.rst
index 9e8170d48..3100cfd8f 100644
--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@@ -70,6 +70,8 @@ Moreover every :ocv:class:`FaceRecognizer` supports the:
 
 * **Loading/Saving** the model state from/to a given XML or YAML.
 
+.. note:: When using the FaceRecognizer interface in combination with Python, please stick to Python 2. Some underlying scripts like create_csv will not work in other versions, like Python 3.
+
 Setting the Thresholds
 +++++++++++++++++++++++
 
diff --git a/samples/python2/facerec_demo.py b/samples/python2/facerec_demo.py
index 1b0adcc21..9eeb04e0b 100755
--- a/samples/python2/facerec_demo.py
+++ b/samples/python2/facerec_demo.py
@@ -31,6 +31,11 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
+# ------------------------------------------------------------------------------------------------
+# Note:
+# When using the FaceRecognizer interface in combination with Python, please stick to Python 2.
+# Some underlying scripts like create_csv will not work in other versions, like Python 3.
+# ------------------------------------------------------------------------------------------------
 
 import os
 import sys

From e6ec3dd17f9fe5165de49106c935fc1117f90615 Mon Sep 17 00:00:00 2001
From: kdrobnyh <klim.drobnyh@gmail.com>
Date: Sun, 18 Aug 2013 02:13:44 +0400
Subject: [PATCH 18/41] Add IPP support in resize, warpAffine, warpPerspective
 functions

---
 modules/imgproc/src/imgwarp.cpp | 311 ++++++++++++++++++++++++++++++++
 1 file changed, 311 insertions(+)

diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index e6c189421..4c9063da5 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -50,9 +50,73 @@
 #include <iostream>
 #include <vector>
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static IppStatus sts = ippInit();
+#endif 
+
 namespace cv
 {
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
+    typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *);
+
+    template <int channels, typename Type>
+    bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
+    {
+        Type values[channels];
+        for( int i = 0; i < channels; i++ )
+            values[i] = (Type)value[i];
+        return func(values, dataPointer, step, size) >= 0;
+    }
+
+    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    {
+        if( channels == 1 )
+        {
+            switch( depth ) 
+            { 
+            case CV_8U: 
+                return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
+            case CV_16U:
+                return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
+            case CV_32F:
+                return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
+            }  
+        }
+        else
+        {
+            if( channels == 3 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
+                case CV_16U:
+                    return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
+                case CV_32F:
+                    return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
+                }  
+            }
+            else if( channels == 4 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
+                case CV_16U:
+                    return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
+                case CV_32F:
+                    return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
+                } 
+            }
+        }
+        return false;
+    }
+#endif
+
 /************** interpolation formulas and tables ***************/
 
 const int INTER_RESIZE_COEF_BITS=11;
@@ -1604,6 +1668,45 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
     return k;
 }
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPresizeInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int cn = src.channels();
+          IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start };
+          int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
+          int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
+          int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
+          IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
+          int bufsize;
+          ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
+          Ipp8u *buf;
+          buf = ippsMalloc_8u( bufsize );
+          IppStatus sts;
+          if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 )
+              *ok = false;
+          ippsFree(buf);
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double inv_scale_x;
+    double inv_scale_y;
+    int mode;
+    ippiResizeSqrPixelFunc func;
+    bool *ok;
+    const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
+};
+#endif
 
 }
 
@@ -1745,6 +1848,39 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
     int depth = src.depth(), cn = src.channels();
     double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
     int k, sx, sy, dx, dy;
+    
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int mode = 
+        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
+        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
+        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
+        interpolation == INTER_AREA && inv_scale_x * inv_scale_y > 1 ? IPPI_INTER_NN :
+        0;
+    int type = src.type();
+    ippiResizeSqrPixelFunc ippFunc = 
+        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R : 
+        type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
+        type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
+        type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
+        type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R :
+        type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R :
+        type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R :
+        type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R :
+        type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
+        type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
+        type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
+        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R : 
+        0;
+    if( ippFunc && mode )
+    {
+        bool ok;
+        Range range(0, src.rows);
+        IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
+    }
+#endif
 
     if( interpolation == INTER_NEAREST )
     {
@@ -3257,6 +3393,49 @@ private:
     double *M;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpAffineInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = { src.cols, src.rows };
+          IppiRect srcroi = { 0, 0, src.cols, src.rows };
+          IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
+          int cnn = src.channels();
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = { dst.cols, range.end - range.start };
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[2][3];
+    int mode;
+    int borderType;
+    Scalar borderValue;
+    ippiWarpAffineBackFunc func;
+    bool *ok;
+    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+};
+#endif
+
 }
 
 
@@ -3303,6 +3482,50 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
     const int AB_BITS = MAX(10, (int)INTER_BITS);
     const int AB_SCALE = 1 << AB_BITS;
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && 
+        ( channels == 1 || channels == 3 || channels == 4 ) && 
+        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+    {
+        int type = src.type();
+        ippiWarpAffineBackFunc ippFunc = 
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
+            0;
+        int mode = 
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[2][3];
+            for( int i = 0; i < 2; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+    
     for( x = 0; x < dst.cols; x++ )
     {
         adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@@ -3410,6 +3633,50 @@ private:
     Scalar borderValue;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpPerspectiveInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : 
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = {src.cols, src.rows};
+          IppiRect srcroi = {0, 0, src.cols, src.rows};
+          IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
+          int cnn = src.channels();
+
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = {dst.cols, range.end - range.start};
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[3][3];
+    int mode;
+    int borderType;
+    const Scalar borderValue;
+    ippiWarpPerspectiveBackFunc func;
+    bool *ok;
+    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+};
+#endif
+
 }
 
 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@@ -3439,6 +3706,50 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
 
     if( !(flags & WARP_INVERSE_MAP) )
          invert(matM, matM);
+         
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && 
+        ( channels == 1 || channels == 3 || channels == 4 ) && 
+        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    {
+        int type = src.type();
+        ippiWarpPerspectiveBackFunc ippFunc = 
+            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
+            0;
+        int mode = 
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[3][3];
+            for( int i = 0; i < 3; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
 
     Range range(0, dst.rows);
     warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);

From e85e4d3ab90da55d0461c63f1502278ee34f4143 Mon Sep 17 00:00:00 2001
From: Vadim Pisarevsky <vadim.pisarevsky@gmail.com>
Date: Mon, 2 Sep 2013 18:34:50 +0400
Subject: [PATCH 19/41] fixed bug in IPP-accelerated morphology; added several
 IPP imgwarp functions (by Klim)

---
 modules/imgproc/src/imgwarp.cpp | 57 +++++++++++++++------------------
 modules/imgproc/src/morph.cpp   | 29 ++++++++---------
 2 files changed, 39 insertions(+), 47 deletions(-)

diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 4c9063da5..a4fda282d 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -52,7 +52,7 @@
 
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 static IppStatus sts = ippInit();
-#endif 
+#endif
 
 namespace cv
 {
@@ -76,15 +76,15 @@ namespace cv
     {
         if( channels == 1 )
         {
-            switch( depth ) 
-            { 
-            case CV_8U: 
+            switch( depth )
+            {
+            case CV_8U:
                 return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
             case CV_16U:
                 return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
             case CV_32F:
                 return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
-            }  
+            }
         }
         else
         {
@@ -98,7 +98,7 @@ namespace cv
                     return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
                 case CV_32F:
                     return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
-                }  
+                }
             }
             else if( channels == 4 )
             {
@@ -110,7 +110,7 @@ namespace cv
                     return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
                 case CV_32F:
                     return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
-                } 
+                }
             }
         }
         return false;
@@ -1848,17 +1848,12 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
     int depth = src.depth(), cn = src.channels();
     double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
     int k, sx, sy, dx, dy;
-    
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    int mode = 
-        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
-        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
-        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
-        interpolation == INTER_AREA && inv_scale_x * inv_scale_y > 1 ? IPPI_INTER_NN :
-        0;
+    int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
     int type = src.type();
-    ippiResizeSqrPixelFunc ippFunc = 
-        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R : 
+    ippiResizeSqrPixelFunc ippFunc =
+        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R :
         type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
         type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
         type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
@@ -1869,9 +1864,9 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
         type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
         type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
         type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
-        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R : 
+        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R :
         0;
-    if( ippFunc && mode )
+    if( ippFunc && mode != 0 )
     {
         bool ok;
         Range range(0, src.rows);
@@ -3485,12 +3480,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
     int depth = src.depth();
     int channels = src.channels();
-    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && 
-        ( channels == 1 || channels == 3 || channels == 4 ) && 
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
         ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
     {
         int type = src.type();
-        ippiWarpAffineBackFunc ippFunc = 
+        ippiWarpAffineBackFunc ippFunc =
             type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
             type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
             type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
@@ -3501,10 +3496,10 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
             type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
             type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
             0;
-        int mode = 
+        int mode =
             flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
             flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
             0;
         if( mode && ippFunc )
         {
@@ -3525,7 +3520,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
         }
     }
 #endif
-    
+
     for( x = 0; x < dst.cols; x++ )
     {
         adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@@ -3638,7 +3633,7 @@ class IPPwarpPerspectiveInvoker :
     public ParallelLoopBody
 {
 public:
-    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : 
+    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
       ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
       {
           *ok = true;
@@ -3706,16 +3701,16 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
 
     if( !(flags & WARP_INVERSE_MAP) )
          invert(matM, matM);
-         
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
     int depth = src.depth();
     int channels = src.channels();
-    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && 
-        ( channels == 1 || channels == 3 || channels == 4 ) && 
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
         ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
     {
         int type = src.type();
-        ippiWarpPerspectiveBackFunc ippFunc = 
+        ippiWarpPerspectiveBackFunc ippFunc =
             type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
             type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
             type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
@@ -3726,10 +3721,10 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
             type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
             type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
             0;
-        int mode = 
+        int mode =
             flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
             flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
             0;
         if( mode && ippFunc )
         {
diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp
index b8bb7cf38..19636bc96 100644
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@@ -1213,11 +1213,10 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
 }
 
 static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
-    InputArray _kernel,
-    const Point &anchor, int iterations,
+    const Mat& _kernel, Point anchor, int iterations,
     int borderType, const Scalar &borderValue)
 {
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat src = _src.getMat(), kernel = _kernel;
     if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) ||
         !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) )
         || !( op == MORPH_DILATE || op == MORPH_ERODE) )
@@ -1248,9 +1247,6 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
 
     }
     Size ksize = kernel.data ? kernel.size() : Size(3,3);
-    Point normanchor = normalizeAnchor(anchor, ksize);
-
-    CV_Assert( normanchor.inside(Rect(0, 0, ksize.width, ksize.height)) );
 
     _dst.create( src.size(), src.type() );
     Mat dst = _dst.getMat();
@@ -1265,7 +1261,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
     if( !kernel.data )
     {
         ksize = Size(1+iterations*2,1+iterations*2);
-        normanchor = Point(iterations, iterations);
+        anchor = Point(iterations, iterations);
         rectKernel = true;
         iterations = 1;
     }
@@ -1273,7 +1269,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
     {
         ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
              ksize.height + (iterations-1)*(ksize.height-1)),
-        normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
+        anchor = Point(anchor.x*iterations, anchor.y*iterations);
         kernel = Mat();
         rectKernel = true;
         iterations = 1;
@@ -1283,7 +1279,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
     if( iterations > 1 )
         return false;
 
-    return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel );
+    return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
 }
 #endif
 
@@ -1292,18 +1288,19 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
                      Point anchor, int iterations,
                      int borderType, const Scalar& borderValue )
 {
-
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    if( IPPMorphOp(op, _src, _dst, _kernel, anchor, iterations, borderType, borderValue) )
-        return;
-#endif
-
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat kernel = _kernel.getMat();
     Size ksize = kernel.data ? kernel.size() : Size(3,3);
     anchor = normalizeAnchor(anchor, ksize);
 
     CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) );
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
+        return;
+#endif
+
+    Mat src = _src.getMat();
+
     _dst.create( src.size(), src.type() );
     Mat dst = _dst.getMat();
 

From 7b3e3f69fb8600a43d51a464a812f3c00d770194 Mon Sep 17 00:00:00 2001
From: peng xiao <hisenxpress@gmail.com>
Date: Tue, 3 Sep 2013 09:31:13 +0800
Subject: [PATCH 20/41] Modify according to @alalek.

---
 modules/ocl/src/gemm.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
index 6e04baca4..a9533b5d8 100644
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -81,11 +81,14 @@ static Mutex cs;
 
 void cv::ocl::clBlasSetup()
 {
-    AutoLock al(cs);
     if(!clBlasInitialized)
     {
-        openCLSafeCall(clAmdBlasSetup());
-        clBlasInitialized = true;
+        AutoLock al(cs);
+        if(!clBlasInitialized)
+        {
+            openCLSafeCall(clAmdBlasSetup());
+            clBlasInitialized = true;
+        }
     }
 }
 

From ea165394484460f6907c32d447003baab89eaeb4 Mon Sep 17 00:00:00 2001
From: pengxiao <pengxiao@outlook.com>
Date: Tue, 3 Sep 2013 10:30:37 +0800
Subject: [PATCH 21/41] Fix a crash of ocl program if clAmdBlas is not linked.

---
 modules/ocl/src/gemm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
index a9533b5d8..7e31cdbf4 100644
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -69,7 +69,7 @@ void cv::ocl::clBlasSetup()
 
 void cv::ocl::clBlasTeardown()
 {
-    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+    //intentionally do nothing
 }
 
 #else

From a70bdfc13f8279ee86be1a0281fbd9ee357245b7 Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Tue, 3 Sep 2013 15:02:18 +0800
Subject: [PATCH 22/41] a little fix to tests and sample

---
 modules/ocl/test/test_imgproc.cpp | 24 ++++++++++--------------
 samples/ocl/clahe.cpp             |  4 ++++
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 46cd257c8..f723e13bc 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -1396,14 +1396,10 @@ TEST_P(calcHist, Mat)
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CLAHE
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
-}
 
-PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+PARAM_TEST_CASE(CLAHE, cv::Size, double)
 {
-    cv::Size size;
+    cv::Size gridSize;
     double clipLimit;
 
     cv::Mat src;
@@ -1414,22 +1410,22 @@ PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
 
     virtual void SetUp()
     {
-        size = GET_PARAM(0);
+        gridSize = GET_PARAM(0);
         clipLimit = GET_PARAM(1);
 
         cv::RNG &rng = TS::ptr()->get_rng();
-        src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+        src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false);
         g_src.upload(src);
     }
 };
 
 TEST_P(CLAHE, Accuracy)
 {
-    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit, gridSize);
     clahe->apply(g_src, g_dst);
     cv::Mat dst(g_dst);
 
-    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit, gridSize);
     clahe_gold->apply(src, dst_gold);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
@@ -1725,10 +1721,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
                             ONE_TYPE(CV_32SC1) //no use
                         ));
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
-                        Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
-                        Values(0.0, 40.0)));
+INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE, Combine(
+                        Values(cv::Size(4, 4), cv::Size(32, 8), cv::Size(8, 64)),
+                        Values(0.0, 10.0, 62.0, 300.0)));
 
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
+INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES);
 
 #endif // HAVE_OPENCL
diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp
index c2f4b27bf..1fbf49fac 100644
--- a/samples/ocl/clahe.cpp
+++ b/samples/ocl/clahe.cpp
@@ -44,6 +44,10 @@ int main(int argc, char** argv)
     namedWindow("CLAHE");
     createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback);
     createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback);
+
+    vector<ocl::Info> info;
+    CV_Assert(ocl::getDevice(info));
+
     Mat frame, outframe;
     ocl::oclMat d_outframe;
 

From e528f39deff8f1b31ff53c9e3676e07b8227d0ed Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Wed, 4 Sep 2013 14:37:46 +0800
Subject: [PATCH 23/41] Added the performance test for mog and mog2.

---
 modules/ocl/perf/perf_bgfg.cpp | 337 +++++++++++++++++++++++++++++++++
 1 file changed, 337 insertions(+)
 create mode 100644 modules/ocl/perf/perf_bgfg.cpp

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
new file mode 100644
index 000000000..188dffa13
--- /dev/null
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -0,0 +1,337 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+
+///////////// PyrLKOpticalFlow ////////////////////////
+
+using namespace perf;
+using std::tr1::get;
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32) /* assume that we have ffmpeg */
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int, double> VideoMOGParamType;
+typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
+
+PERF_TEST_P(VideoMOGFixture, Video_MOG,
+            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+            ::testing::Values(1, 3),
+            ::testing::Values(0.0, 0.01)))
+{
+    VideoMOGParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    const float learningRate = static_cast<float>(get<2>(params));
+    
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::Mat temp;
+    if (cn == 1)
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+    else
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+    cv::swap(temp, frame);
+
+    if(RUN_PLAIN_IMPL)
+    {
+        cv::BackgroundSubtractorMOG mog;
+        cv::Mat foreground;
+
+        mog(frame, foreground, learningRate);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+
+            cv::swap(temp, frame);
+
+            TEST_CYCLE()
+            mog(frame, foreground, learningRate);
+
+            SANITY_CHECK(foreground);
+        }
+    }else if(RUN_OCL_IMPL)
+    {
+        cv::ocl::oclMat d_frame(frame);
+        cv::ocl::MOG d_mog;
+        cv::ocl::oclMat foreground;
+        cv::Mat foreground_h;
+
+        d_mog(d_frame, foreground, learningRate);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+
+            d_frame.upload(frame);
+
+            OCL_TEST_CYCLE()
+            d_mog(d_frame, foreground, learningRate);
+
+            foreground.download(foreground_h);
+            SANITY_CHECK(foreground_h);
+        }
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int> VideoMOG2ParamType;
+typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
+
+PERF_TEST_P(VideoMOG2Fixture, Video_MOG2,
+            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+            ::testing::Values(1, 3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::Mat temp;
+    if (cn == 1)
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+    else
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+    cv::swap(temp, frame);
+
+    if(RUN_PLAIN_IMPL)
+    {
+        cv::BackgroundSubtractorMOG2 mog2;
+        cv::Mat foreground;
+
+        mog2.set("detectShadows", false);
+        mog2(frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+
+            cv::swap(temp, frame);
+
+            TEST_CYCLE()
+                mog2(frame, foreground);
+
+            SANITY_CHECK(foreground);
+        }
+    }else if(RUN_OCL_IMPL)
+    {
+        cv::ocl::oclMat d_frame(frame);
+        cv::ocl::MOG2 d_mog2;
+        cv::ocl::oclMat foreground;
+        cv::Mat foreground_h;
+
+        d_mog2(d_frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+
+            d_frame.upload(frame);
+
+            OCL_TEST_CYCLE()
+                d_mog2(d_frame, foreground);
+
+            foreground.download(foreground_h);
+            SANITY_CHECK(foreground_h);
+        }
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
+
+PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2,
+            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+            ::testing::Values(1, 3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::Mat temp;
+    if (cn == 1)
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+    else
+        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+    cv::swap(temp, frame);
+
+    if(RUN_PLAIN_IMPL)
+    {
+        cv::BackgroundSubtractorMOG2 mog2;
+        cv::Mat foreground;
+
+        mog2.set("detectShadows", false);
+        mog2(frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+
+            cv::swap(temp, frame);
+
+            TEST_CYCLE()
+                mog2(frame, foreground);
+        }
+        cv::Mat background;
+        TEST_CYCLE() 
+            mog2.getBackgroundImage(background);
+
+        SANITY_CHECK(background);
+    }else if(RUN_OCL_IMPL)
+    {
+        cv::ocl::oclMat d_frame(frame);
+        cv::ocl::MOG2 d_mog2;
+        cv::ocl::oclMat foreground;
+        cv::Mat background_h;
+
+        d_mog2(d_frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+
+            d_frame.upload(frame);
+            d_mog2(d_frame, foreground);
+        }
+        cv::ocl::oclMat background;
+        OCL_TEST_CYCLE()
+            d_mog2.getBackgroundImage(background);
+
+        background.download(background_h);
+        SANITY_CHECK(background_h);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+

From dd73016c8ba1b70dd371c44995f51f3c206c2636 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Wed, 4 Sep 2013 15:00:36 +0800
Subject: [PATCH 24/41] Removed whitespace.

---
 modules/ocl/perf/perf_bgfg.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index 188dffa13..185f704b3 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -79,7 +79,7 @@ PERF_TEST_P(VideoMOGFixture, Video_MOG,
     const string inputFile = perf::TestBase::getDataPath(get<0>(params));
     const int cn = get<1>(params);
     const float learningRate = static_cast<float>(get<2>(params));
-    
+
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -296,7 +296,7 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2,
                 mog2(frame, foreground);
         }
         cv::Mat background;
-        TEST_CYCLE() 
+        TEST_CYCLE()
             mog2.getBackgroundImage(background);
 
         SANITY_CHECK(background);
@@ -333,5 +333,4 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2,
     }else
         OCL_PERF_ELSE
 }
-#endif
-
+#endif
\ No newline at end of file

From f538e503853b42c43ddf5ffb59338e2c3e38c805 Mon Sep 17 00:00:00 2001
From: ilya-lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 4 Sep 2013 11:56:22 +0400
Subject: [PATCH 25/41] updated .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 4fd406edd..0bcffd726 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ tegra/
 .sw[a-z]
 .*.swp
 tags
+*.autosave

From a9975b144a22e76228125eb0a25f78ec13db6815 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Wed, 4 Sep 2013 14:06:34 +0400
Subject: [PATCH 26/41] Fixed a bug in FLANN resulting in uninitialized
 accesses.

This is fixed upstream in mariusmuja/flann@b615f26, but that fix
would break binary compatibility, so I had to make a different one.

Since the bug isn't quite obvious, here's an explanation. In the const
version of any::cast, if policy is a small_any_policy, its get_value
returns its input argument. So r becomes a pointer to obj, and the
return value is a reference to a local variable, which is invalidated
when the function exits.
---
 modules/flann/include/opencv2/flann/any.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/flann/include/opencv2/flann/any.h b/modules/flann/include/opencv2/flann/any.h
index 89189c64e..7140b2a08 100644
--- a/modules/flann/include/opencv2/flann/any.h
+++ b/modules/flann/include/opencv2/flann/any.h
@@ -255,8 +255,7 @@ public:
     const T& cast() const
     {
         if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
-        void* obj = const_cast<void*>(object);
-        T* r = reinterpret_cast<T*>(policy->get_value(&obj));
+        T* r = reinterpret_cast<T*>(policy->get_value(const_cast<void **>(&object)));
         return *r;
     }
 

From 6ebfa87181af66511098c49b317493bdf854abaf Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Wed, 4 Sep 2013 16:13:27 +0400
Subject: [PATCH 27/41] Delete a bunch more trailing whitespace that slipped
 through the cracks.

---
 modules/imgproc/src/color.cpp                 | 38 +++++++++----------
 modules/imgproc/src/smooth.cpp                |  2 +-
 modules/java/android_test/AndroidManifest.xml |  4 +-
 modules/java/android_test/res/layout/main.xml |  6 +--
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index 9469925ec..bac63ad6f 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -398,7 +398,7 @@ private:
 
 struct IPPColor2GrayFunctor
 {
-    IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func) 
+    IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func)
     {
         coeffs[0] = 0.114f;
         coeffs[1] = 0.587f;
@@ -454,7 +454,7 @@ struct IPPReorderGeneralFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
-        Mat temp; 
+        Mat temp;
         temp.create(rows, cols, CV_MAKETYPE(depth, 3));
         if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0)
             return false;
@@ -478,7 +478,7 @@ struct IPPGeneralReorderFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
-        Mat temp; 
+        Mat temp;
         temp.create(rows, cols, CV_MAKETYPE(depth, 3));
         if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0)
             return false;
@@ -3651,8 +3651,8 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
 
             _dst.create( sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
-            
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)          
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( code == CV_BGR2BGRA || code == CV_RGB2RGBA)
             {
                 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) )
@@ -3737,7 +3737,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             CV_Assert( scn == 3 || scn == 4 );
             _dst.create(sz, CV_MAKETYPE(depth, 1));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( code == CV_BGR2GRAY )
             {
@@ -3789,13 +3789,13 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( code == CV_GRAY2BGR )
             {
                 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
                     return;
-            } 
+            }
             else if( code == CV_GRAY2BGRA )
             {
                 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
@@ -3882,7 +3882,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
 
             _dst.create(sz, CV_MAKETYPE(depth, 3));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( code == CV_BGR2XYZ && scn == 3 )
             {
@@ -3898,7 +3898,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             {
                 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) )
                     return;
-            } 
+            }
             else if( code == CV_RGB2XYZ && scn == 4 )
             {
                 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) )
@@ -3921,7 +3921,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
 
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( code == CV_XYZ2BGR && dcn == 3 )
             {
@@ -3964,7 +3964,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
 
             _dst.create(sz, CV_MAKETYPE(depth, 3));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( depth == CV_8U || depth == CV_16U )
             {
@@ -3982,12 +3982,12 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) )
                         return;
-                } 
+                }
                 else if( code == CV_RGB2HSV_FULL && scn == 4 )
                 {
                     if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) )
                         return;
-                } 
+                }
                 else if( code == CV_BGR2HLS_FULL && scn == 3 )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
@@ -4002,7 +4002,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) )
                         return;
-                } 
+                }
                 else if( code == CV_RGB2HLS_FULL && scn == 4 )
                 {
                     if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) )
@@ -4045,7 +4045,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
 
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
-            
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
             if( depth == CV_8U || depth == CV_16U )
             {
@@ -4063,12 +4063,12 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) )
                         return;
-                } 
+                }
                 else if( code == CV_HSV2RGB_FULL && dcn == 4 )
                 {
                     if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
                         return;
-                } 
+                }
                 else if( code == CV_HLS2BGR_FULL && dcn == 3 )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
@@ -4083,7 +4083,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 {
                     if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) )
                         return;
-                } 
+                }
                 else if( code == CV_HLS2RGB_FULL && dcn == 4 )
                 {
                     if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 3dad2c087..3d42e3be5 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1951,7 +1951,7 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
         if( ok ) return;
     }
 #endif
-    
+
     vector<float> _color_weight(cn*256);
     vector<float> _space_weight(d*d);
     vector<int> _space_ofs(d*d);
diff --git a/modules/java/android_test/AndroidManifest.xml b/modules/java/android_test/AndroidManifest.xml
index dfe25fff0..81f2bc134 100644
--- a/modules/java/android_test/AndroidManifest.xml
+++ b/modules/java/android_test/AndroidManifest.xml
@@ -3,7 +3,7 @@
       package="org.opencv.test"
       android:versionCode="1"
       android:versionName="1.0">
-    
+
     <uses-sdk android:minSdkVersion="8" />
 
     <!-- We add an application tag here just so that we can indicate that
@@ -20,7 +20,7 @@
     <instrumentation android:name="org.opencv.test.OpenCVTestRunner"
                      android:targetPackage="org.opencv.test"
                      android:label="Tests for org.opencv"/>
-    
+
     <uses-permission android:name="android.permission.CAMERA"/>
     <uses-feature android:name="android.hardware.camera" />
     <uses-feature android:name="android.hardware.camera.autofocus" />
diff --git a/modules/java/android_test/res/layout/main.xml b/modules/java/android_test/res/layout/main.xml
index 3a5f117d3..1fb4479c2 100644
--- a/modules/java/android_test/res/layout/main.xml
+++ b/modules/java/android_test/res/layout/main.xml
@@ -4,9 +4,9 @@
     android:layout_width="fill_parent"
     android:layout_height="fill_parent"
     >
-<TextView  
-    android:layout_width="fill_parent" 
-    android:layout_height="wrap_content" 
+<TextView
+    android:layout_width="fill_parent"
+    android:layout_height="wrap_content"
     android:text="@string/hello"
     />
 </LinearLayout>

From e5735282dacf31abd54f47c0133fd3e7b329dca9 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Wed, 4 Sep 2013 18:41:19 +0400
Subject: [PATCH 28/41] Added the ability to have samples inside a module.

---
 cmake/OpenCVModule.cmake | 44 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index 80c07bda3..4076d9fab 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -33,6 +33,7 @@
 #   <add extra installation rules>
 #   ocv_add_accuracy_tests(<extra dependencies>)
 #   ocv_add_perf_tests(<extra dependencies>)
+#   ocv_add_samples(<extra dependencies>)
 #
 #
 # If module have no "extra" then you can define it in one line:
@@ -578,6 +579,7 @@ macro(ocv_define_module module_name)
 
   ocv_add_accuracy_tests()
   ocv_add_perf_tests()
+  ocv_add_samples()
 endmacro()
 
 # ensures that all passed modules are available
@@ -722,6 +724,48 @@ function(ocv_add_accuracy_tests)
   endif()
 endfunction()
 
+function(ocv_add_samples)
+  set(samples_path "${CMAKE_CURRENT_SOURCE_DIR}/samples")
+  string(REGEX REPLACE "^opencv_" "" module_id ${the_module})
+
+  if(BUILD_EXAMPLES AND EXISTS "${samples_path}")
+    set(samples_deps ${the_module} ${OPENCV_MODULE_${the_module}_DEPS} opencv_highgui ${ARGN})
+    ocv_check_dependencies(${samples_deps})
+
+    if(OCV_DEPENDENCIES_FOUND)
+      file(GLOB sample_sources "${samples_path}/*.cpp")
+      ocv_include_modules(${OPENCV_MODULE_${the_module}_DEPS})
+
+      foreach(source ${sample_sources})
+        get_filename_component(name "${source}" NAME_WE)
+        set(the_target "example_${module_id}_${name}")
+
+        add_executable(${the_target} "${source}")
+        target_link_libraries(${the_target} ${samples_deps})
+
+        set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}")
+
+        if(ENABLE_SOLUTION_FOLDERS)
+          set_target_properties(${the_target} PROPERTIES
+            OUTPUT_NAME "${module_id}-example-${name}"
+            FOLDER "samples/${module_id}")
+        endif()
+
+        if(WIN32)
+          install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${module_id}" COMPONENT main)
+        endif()
+      endforeach()
+    endif()
+  endif()
+
+  if(INSTALL_C_EXAMPLES AND NOT WIN32 AND EXISTS "${samples_path}")
+    file(GLOB sample_files "${samples_path}/*")
+    install(FILES ${sample_files}
+            DESTINATION share/OpenCV/samples/${module_id}
+            PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
+  endif()
+endfunction()
+
 # internal macro; finds all link dependencies of the module
 # should be used at the end of CMake processing
 macro(__ocv_track_module_link_dependencies the_module optkind)

From 0f8f48965896d2d95f68b6a9486cb7ab8a7dad45 Mon Sep 17 00:00:00 2001
From: ilya-lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 4 Sep 2013 12:44:52 +0400
Subject: [PATCH 29/41] changed data path and eps

---
 modules/ocl/test/test_bgfg.cpp    | 15 +++++----------
 modules/ocl/test/test_optflow.cpp |  4 ++--
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
index e35f26e3b..aea361979 100644
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -142,7 +142,7 @@ PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
 
 TEST_P(mog2, Update)
 {
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -175,14 +175,9 @@ TEST_P(mog2, Update)
         mog2_gold(frame, foreground_gold);
 
         if (detectShadow)
-        {
-
-            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 1e-2);
-        }
+            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 15e-3)
         else
-        {
-            EXPECT_MAT_NEAR(foreground_gold, foreground, 0);
-        }
+            EXPECT_MAT_NEAR(foreground_gold, foreground, 0)
     }
 }
 
@@ -191,7 +186,7 @@ TEST_P(mog2, getBackgroundImage)
     if (useGray)
         return;
 
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "video/768x576.avi";
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -229,4 +224,4 @@ INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
     testing::Values(DetectShadow(true), DetectShadow(false)),
     Values(true, false)));
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp
index 47bd807d9..57aeabac2 100644
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@@ -146,10 +146,10 @@ PARAM_TEST_CASE(TVL1, bool)
 
 TEST_P(TVL1, Accuracy)
 {
-    cv::Mat frame0 = readImage("F:/mcw/opencv/opencv/samples/gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage("../../../opencv/samples/gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;

From 36e0891480094889da07cfe0d4d27c2149daf43e Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Wed, 4 Sep 2013 18:52:03 +0400
Subject: [PATCH 30/41] Fixed OCL samples' install path being inconsistent with
 the other samples.

---
 samples/ocl/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt
index ddbaf26bc..9b04dc397 100644
--- a/samples/ocl/CMakeLists.txt
+++ b/samples/ocl/CMakeLists.txt
@@ -54,6 +54,6 @@ endif()
 if (INSTALL_C_EXAMPLES AND NOT WIN32)
   file(GLOB install_list *.c *.cpp *.jpg *.png *.data makefile.* build_all.sh *.dsp *.cmd )
   install(FILES ${install_list}
-          DESTINATION share/opencv/samples/${project}
+          DESTINATION share/OpenCV/samples/${project}
           PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif()

From 722bdc52a02c1fa6ccdd90c012b1abd49f6ff30c Mon Sep 17 00:00:00 2001
From: ilya-lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 4 Sep 2013 11:55:00 +0400
Subject: [PATCH 31/41] enabled gemm and dft erf test

---
 modules/ocl/perf/perf_fft.cpp     |  9 +++++++--
 modules/ocl/perf/perf_gemm.cpp    | 11 +++++++----
 modules/ocl/perf/perf_precomp.hpp |  3 ++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp
index 840f009a3..4cba47e96 100644
--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@@ -43,6 +43,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
+
 #include "perf_precomp.hpp"
 
 using namespace perf;
@@ -51,7 +52,9 @@ using namespace perf;
 
 typedef TestBaseWithParam<Size> dftFixture;
 
-PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not implemented
+#ifdef HAVE_CLAMDFFT
+
+PERF_TEST_P(dftFixture, dft, OCL_TYPICAL_MAT_SIZES)
 {
     const Size srcSize = GetParam();
 
@@ -70,7 +73,7 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme
 
         oclDst.download(dst);
 
-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 1.5);
     }
     else if (RUN_PLAIN_IMPL)
     {
@@ -81,3 +84,5 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme
     else
         OCL_PERF_ELSE
 }
+
+#endif
diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp
index aaa1dac23..803e1f91b 100644
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -51,8 +51,9 @@ using namespace perf;
 
 typedef TestBaseWithParam<Size> gemmFixture;
 
-PERF_TEST_P(gemmFixture, DISABLED_gemm,
-            ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000)) // TODO not implemented
+#ifdef HAVE_CLAMDBLAS
+
+PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
 {
     const Size srcSize = GetParam();
 
@@ -72,14 +73,16 @@ PERF_TEST_P(gemmFixture, DISABLED_gemm,
 
         oclDst.download(dst);
 
-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
     }
     else if (RUN_PLAIN_IMPL)
     {
         TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);
 
-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
     }
     else
         OCL_PERF_ELSE
 }
+
+#endif
diff --git a/modules/ocl/perf/perf_precomp.hpp b/modules/ocl/perf/perf_precomp.hpp
index a93d74eb2..a6d9eab40 100644
--- a/modules/ocl/perf/perf_precomp.hpp
+++ b/modules/ocl/perf/perf_precomp.hpp
@@ -59,6 +59,7 @@
 #include <vector>
 #include <numeric>
 
+#include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
@@ -93,7 +94,7 @@ using namespace cv;
 
 #ifdef HAVE_OPENCV_GPU
 #define OCL_PERF_ELSE               \
-        if (RUN_GPU_IMPL)          \
+        if (RUN_GPU_IMPL)           \
             CV_TEST_FAIL_NO_IMPL(); \
         else                        \
             CV_TEST_FAIL_NO_IMPL();

From 1366df8250459821bf7252e4f4123e961cb3067b Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Thu, 5 Sep 2013 19:33:27 +0800
Subject: [PATCH 32/41] MOG&MOG2: Modified the performance test according to
 the feedback of the community.

---
 modules/ocl/perf/perf_bgfg.cpp | 270 +++++++++++++--------------------
 1 file changed, 103 insertions(+), 167 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index 185f704b3..619145bae 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -44,33 +44,62 @@
 //
 //M*/
 #include "perf_precomp.hpp"
-
-///////////// PyrLKOpticalFlow ////////////////////////
-
 using namespace perf;
-using std::tr1::get;
-using std::tr1::tuple;
-using std::tr1::make_tuple;
-
+using namespace std;
+using namespace cv::ocl;
+using namespace cv;
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
-    defined(WIN32) /* assume that we have ffmpeg */
+    defined(WIN32)
 
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
 #else
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
 #endif
 
+static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output, int output_cn)
+{
+    for(int i = 0; i< (int)(input.size()); i++)
+    {
+        if(output_cn == 1)
+            cvtColor(input[i], output[i], COLOR_RGB2GRAY);
+        else
+            cvtColor(input[i], output[i], COLOR_RGB2RGBA);
+    }
+}
+
+static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+{
+    cv::Mat frame;
+    std::vector<Mat> frame_buffer_init;
+    int nFrame = (int)frame_buffer.size();
+    for(int i = 0; i < nFrame; i++)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+        frame_buffer_init.push_back(frame);
+    }
+
+    if(cn == 1)
+        cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
+    else
+        frame_buffer = frame_buffer_init;
+
+    for(int i = 0; i < nFrame; i++)
+        frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
+}
+
+///////////// MOG ////////////////////////
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
 
 typedef tuple<string, int, double> VideoMOGParamType;
 typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
 
-PERF_TEST_P(VideoMOGFixture, Video_MOG,
-            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+PERF_TEST_P(VideoMOGFixture, MOG,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
             ::testing::Values(1, 3),
             ::testing::Values(0.0, 0.01)))
 {
@@ -80,108 +109,72 @@ PERF_TEST_P(VideoMOGFixture, Video_MOG,
     const int cn = get<1>(params);
     const float learningRate = static_cast<float>(get<2>(params));
 
+    const int nFrame = 5;
+
+    Mat foreground_cpu;
+    std::vector<Mat> frame_buffer(nFrame);
+    std::vector<oclMat> frame_buffer_ocl;
+
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
-    cv::Mat frame;
-
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    cv::Mat temp;
-    if (cn == 1)
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-    else
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-    cv::swap(temp, frame);
+    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
 
     if(RUN_PLAIN_IMPL)
     {
         cv::BackgroundSubtractorMOG mog;
         cv::Mat foreground;
 
-        mog(frame, foreground, learningRate);
-
-        for (int i = 0; i < 10; ++i)
+        TEST_CYCLE()
         {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-
-            cv::swap(temp, frame);
-
-            TEST_CYCLE()
-            mog(frame, foreground, learningRate);
-
-            SANITY_CHECK(foreground);
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog(frame_buffer[i], foreground, learningRate);
+            }
         }
+        SANITY_CHECK(foreground);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::oclMat d_frame(frame);
         cv::ocl::MOG d_mog;
         cv::ocl::oclMat foreground;
         cv::Mat foreground_h;
-
-        d_mog(d_frame, foreground, learningRate);
-
-        for (int i = 0; i < 10; ++i)
+        OCL_TEST_CYCLE()
         {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-            cv::swap(temp, frame);
-
-            d_frame.upload(frame);
-
-            OCL_TEST_CYCLE()
-            d_mog(d_frame, foreground, learningRate);
-
-            foreground.download(foreground_h);
-            SANITY_CHECK(foreground_h);
+            for (int i = 0; i < nFrame; ++i)
+            {
+                d_mog(frame_buffer_ocl[i], foreground, learningRate);
+            }
         }
+        foreground.download(foreground_h);
+        SANITY_CHECK(foreground_h);
     }else
         OCL_PERF_ELSE
 }
 #endif
 
+///////////// MOG2 ////////////////////////
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
 
 typedef tuple<string, int> VideoMOG2ParamType;
 typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
 
-PERF_TEST_P(VideoMOG2Fixture, Video_MOG2,
-            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+PERF_TEST_P(VideoMOG2Fixture, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
             ::testing::Values(1, 3)))
 {
     VideoMOG2ParamType params = GetParam();
 
     const string inputFile = perf::TestBase::getDataPath(get<0>(params));
     const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
-    cv::Mat frame;
-
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    cv::Mat temp;
-    if (cn == 1)
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-    else
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-    cv::swap(temp, frame);
+    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
 
     if(RUN_PLAIN_IMPL)
     {
@@ -189,145 +182,88 @@ PERF_TEST_P(VideoMOG2Fixture, Video_MOG2,
         cv::Mat foreground;
 
         mog2.set("detectShadows", false);
-        mog2(frame, foreground);
 
-        for (int i = 0; i < 10; ++i)
+        TEST_CYCLE()
         {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-
-            cv::swap(temp, frame);
-
-            TEST_CYCLE()
-                mog2(frame, foreground);
-
-            SANITY_CHECK(foreground);
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2(frame_buffer[i], foreground);
+            }
         }
+        SANITY_CHECK(foreground);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::oclMat d_frame(frame);
         cv::ocl::MOG2 d_mog2;
         cv::ocl::oclMat foreground;
         cv::Mat foreground_h;
 
-        d_mog2(d_frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
+        OCL_TEST_CYCLE()
         {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-            cv::swap(temp, frame);
-
-            d_frame.upload(frame);
-
-            OCL_TEST_CYCLE()
-                d_mog2(d_frame, foreground);
-
-            foreground.download(foreground_h);
-            SANITY_CHECK(foreground_h);
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground);
+            }
         }
+        foreground.download(foreground_h);
+        SANITY_CHECK(foreground_h);
     }else
         OCL_PERF_ELSE
 }
 #endif
 
+///////////// MOG2_GetBackgroundImage //////////////////
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
 
 typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
 
-PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2,
-            ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"),
+PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
             ::testing::Values(1, 3)))
 {
     VideoMOG2ParamType params = GetParam();
 
     const string inputFile = perf::TestBase::getDataPath(get<0>(params));
     const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
-    cv::Mat frame;
-
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    cv::Mat temp;
-    if (cn == 1)
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-    else
-        cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-    cv::swap(temp, frame);
+    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
 
     if(RUN_PLAIN_IMPL)
     {
         cv::BackgroundSubtractorMOG2 mog2;
         cv::Mat foreground;
-
-        mog2.set("detectShadows", false);
-        mog2(frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-
-            cv::swap(temp, frame);
-
-            TEST_CYCLE()
-                mog2(frame, foreground);
-        }
         cv::Mat background;
+        mog2.set("detectShadows", false);
         TEST_CYCLE()
+        {
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2(frame_buffer[i], foreground);
+            }
             mog2.getBackgroundImage(background);
-
+        }
         SANITY_CHECK(background);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::oclMat d_frame(frame);
         cv::ocl::MOG2 d_mog2;
         cv::ocl::oclMat foreground;
         cv::Mat background_h;
-
-        d_mog2(d_frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            cv::Mat temp;
-            if (cn == 1)
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-            else
-                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-            cv::swap(temp, frame);
-
-            d_frame.upload(frame);
-            d_mog2(d_frame, foreground);
-        }
         cv::ocl::oclMat background;
-        OCL_TEST_CYCLE()
-            d_mog2.getBackgroundImage(background);
 
+        OCL_TEST_CYCLE()
+        {
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground);
+            }
+            d_mog2.getBackgroundImage(background);
+        }
         background.download(background_h);
         SANITY_CHECK(background_h);
     }else

From be98cd2f7f87d9949d2a847110c8aeb7a1e6d665 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 5 Sep 2013 19:25:03 +0400
Subject: [PATCH 33/41] replaced explicit __stdcall by CV_STDCALL

---
 modules/imgproc/src/color.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index bac63ad6f..ba7426be3 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -253,19 +253,19 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
     return ok;
 }
 
-IppStatus __stdcall ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
          IppiSize roiSize, const int *dstOrder)
 {
     return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
 }
 
-IppStatus __stdcall ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
          IppiSize roiSize, const int *dstOrder)
 {
     return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
 }
 
-IppStatus __stdcall ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
          IppiSize roiSize, const int *dstOrder)
 {
     return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);

From 172242a94e6a9ab6ee20912c3f7899e900e7664f Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Fri, 6 Sep 2013 13:39:41 +0800
Subject: [PATCH 34/41] Revised performance test according to the feedback of
 the community.

---
 modules/ocl/perf/perf_bgfg.cpp | 103 +++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 45 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index 619145bae..ec96c9007 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -48,6 +48,8 @@ using namespace perf;
 using namespace std;
 using namespace cv::ocl;
 using namespace cv;
+using std::tr1::tuple;
+using std::tr1::get;
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
@@ -60,6 +62,7 @@ using namespace cv;
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
 #endif
 
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
 static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output, int output_cn)
 {
     for(int i = 0; i< (int)(input.size()); i++)
@@ -70,8 +73,8 @@ static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output, int output_cn)
             cvtColor(input[i], output[i], COLOR_RGB2RGBA);
     }
 }
-
-static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+//prepare data for CPU
+static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
 {
     cv::Mat frame;
     std::vector<Mat> frame_buffer_init;
@@ -87,11 +90,14 @@ static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer, ve
         cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
     else
         frame_buffer = frame_buffer_init;
-
-    for(int i = 0; i < nFrame; i++)
+}
+//copy CPU data to GPU
+static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+{
+    for(int i = 0; i < (int)frame_buffer.size(); i++)
         frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
 }
-
+#endif
 ///////////// MOG ////////////////////////
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
 
@@ -118,15 +124,16 @@ PERF_TEST_P(VideoMOGFixture, MOG,
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
-    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
+    prepareData(cap, cn, frame_buffer);
 
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
     if(RUN_PLAIN_IMPL)
     {
-        cv::BackgroundSubtractorMOG mog;
-        cv::Mat foreground;
-
         TEST_CYCLE()
         {
+            cv::BackgroundSubtractorMOG mog;
+            foreground.release();
             for (int i = 0; i < nFrame; i++)
             {
                 mog(frame_buffer[i], foreground, learningRate);
@@ -135,18 +142,19 @@ PERF_TEST_P(VideoMOGFixture, MOG,
         SANITY_CHECK(foreground);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::MOG d_mog;
-        cv::ocl::oclMat foreground;
-        cv::Mat foreground_h;
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
         OCL_TEST_CYCLE()
         {
+            cv::ocl::MOG d_mog;
+            foreground_d.release();
             for (int i = 0; i < nFrame; ++i)
             {
-                d_mog(frame_buffer_ocl[i], foreground, learningRate);
+                d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
             }
         }
-        foreground.download(foreground_h);
-        SANITY_CHECK(foreground_h);
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
     }else
         OCL_PERF_ELSE
 }
@@ -173,18 +181,18 @@ PERF_TEST_P(VideoMOG2Fixture, MOG2,
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
-
-    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
+    prepareData(cap, cn, frame_buffer);
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
 
     if(RUN_PLAIN_IMPL)
     {
-        cv::BackgroundSubtractorMOG2 mog2;
-        cv::Mat foreground;
-
-        mog2.set("detectShadows", false);
-
         TEST_CYCLE()
         {
+            cv::BackgroundSubtractorMOG2 mog2;
+            mog2.set("detectShadows", false);
+            foreground.release();
+
             for (int i = 0; i < nFrame; i++)
             {
                 mog2(frame_buffer[i], foreground);
@@ -193,19 +201,19 @@ PERF_TEST_P(VideoMOG2Fixture, MOG2,
         SANITY_CHECK(foreground);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::MOG2 d_mog2;
-        cv::ocl::oclMat foreground;
-        cv::Mat foreground_h;
-
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
         OCL_TEST_CYCLE()
         {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
             for (int i = 0; i < nFrame; i++)
             {
-                d_mog2(frame_buffer_ocl[i], foreground);
+                d_mog2(frame_buffer_ocl[i], foreground_d);
             }
         }
-        foreground.download(foreground_h);
-        SANITY_CHECK(foreground_h);
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
     }else
         OCL_PERF_ELSE
 }
@@ -218,7 +226,7 @@ typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
 
 PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
             ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-            ::testing::Values(1, 3)))
+            ::testing::Values(3)))
 {
     VideoMOG2ParamType params = GetParam();
 
@@ -232,16 +240,21 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
-    prepareData(cap, cn, frame_buffer, frame_buffer_ocl);
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::Mat background;
+    cv::ocl::oclMat foreground_d;
+    cv::ocl::oclMat background_d;
 
     if(RUN_PLAIN_IMPL)
     {
-        cv::BackgroundSubtractorMOG2 mog2;
-        cv::Mat foreground;
-        cv::Mat background;
-        mog2.set("detectShadows", false);
         TEST_CYCLE()
         {
+            cv::BackgroundSubtractorMOG2 mog2;
+            mog2.set("detectShadows", false);
+            foreground.release();
+            background.release();
             for (int i = 0; i < nFrame; i++)
             {
                 mog2(frame_buffer[i], foreground);
@@ -251,22 +264,22 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
         SANITY_CHECK(background);
     }else if(RUN_OCL_IMPL)
     {
-        cv::ocl::MOG2 d_mog2;
-        cv::ocl::oclMat foreground;
-        cv::Mat background_h;
-        cv::ocl::oclMat background;
-
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
         OCL_TEST_CYCLE()
         {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            background_d.release();
             for (int i = 0; i < nFrame; i++)
             {
-                d_mog2(frame_buffer_ocl[i], foreground);
+                d_mog2(frame_buffer_ocl[i], foreground_d);
             }
-            d_mog2.getBackgroundImage(background);
+            d_mog2.getBackgroundImage(background_d);
         }
-        background.download(background_h);
-        SANITY_CHECK(background_h);
+        background_d.download(background);
+        SANITY_CHECK(background);
     }else
         OCL_PERF_ELSE
 }
-#endif
\ No newline at end of file
+#endif

From 7d62fdc62c89becc229aead1dce8cafe5e153e7f Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Fri, 6 Sep 2013 12:46:58 +0400
Subject: [PATCH 35/41] GPU resize: removed a few parameter names to avoid
 unused parameter warnings.

---
 modules/gpu/src/cuda/resize.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu
index dc9f462d1..110e62d03 100644
--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace device
     }
 
     template <typename T>
-    void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    void call_resize_nearest_tex(const PtrStepSz<T>& /*src*/, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
     {
         const dim3 block(32, 8);
         const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -301,7 +301,7 @@ namespace cv { namespace gpu { namespace device
 
     template <typename T> struct ResizeNearestDispatcher
     {
-        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
         {
             call_resize_nearest_glob(src, dst, fy, fx, stream);
         }

From 27947474ec52a0866b3425f0cc13dfeeadd515e1 Mon Sep 17 00:00:00 2001
From: ilya-lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 6 Sep 2013 13:25:56 +0400
Subject: [PATCH 36/41] fixed data path

---
 modules/ocl/test/test_bgfg.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
index aea361979..bdf3841be 100644
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -81,7 +81,7 @@ PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
 
 TEST_P(mog, Update)
 {
-    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi";
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 

From 2de8487e5844a11a99bad39925bb2074be747ed3 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Fri, 6 Sep 2013 15:53:42 +0400
Subject: [PATCH 37/41] Fixed a few tests that use uninitialized inputs.

---
 modules/imgproc/test/test_imgwarp.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index 4ea3b76ba..53f7d5c2f 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1424,7 +1424,7 @@ TEST(Imgproc_fitLine_vector_2d, regression)
 
 TEST(Imgproc_fitLine_Mat_2dC2, regression)
 {
-    cv::Mat mat1(3, 1, CV_32SC2);
+    cv::Mat mat1 = Mat::zeros(3, 1, CV_32SC2);
     std::vector<float> line1;
 
     cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1444,7 +1444,7 @@ TEST(Imgproc_fitLine_Mat_2dC1, regression)
 
 TEST(Imgproc_fitLine_Mat_3dC3, regression)
 {
-    cv::Mat mat1(2, 1, CV_32SC3);
+    cv::Mat mat1 = Mat::zeros(2, 1, CV_32SC3);
     std::vector<float> line1;
 
     cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1454,7 +1454,7 @@ TEST(Imgproc_fitLine_Mat_3dC3, regression)
 
 TEST(Imgproc_fitLine_Mat_3dC1, regression)
 {
-    cv::Mat mat2(2, 3, CV_32SC1);
+    cv::Mat mat2 = Mat::zeros(2, 3, CV_32SC1);
     std::vector<float> line2;
 
     cv::fitLine(mat2, line2, CV_DIST_L2, 0 ,0 ,0);

From 4f109d12924d1ac5e097e3fec707274cfbb5d7ec Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Fri, 6 Sep 2013 16:02:41 +0400
Subject: [PATCH 38/41] Fixed a memory access error in
 CV_Remap_Test::generate_test_data.

begin_x[1] is not the second component of the element, but the element
after the one pointed to begin_x. When begin_x points to the last
element, that line overwrites data past the end of the allocation, which,
during my tests, happened to contain the reference count for the matrix.
Hilarity ensues.
---
 modules/imgproc/test/test_imgwarp_strict.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/imgproc/test/test_imgwarp_strict.cpp b/modules/imgproc/test/test_imgwarp_strict.cpp
index 59b851c45..064ba9356 100644
--- a/modules/imgproc/test/test_imgwarp_strict.cpp
+++ b/modules/imgproc/test/test_imgwarp_strict.cpp
@@ -679,8 +679,8 @@ void CV_Remap_Test::generate_test_data()
             MatIterator_<Vec2s> begin_x = mapx.begin<Vec2s>(), end_x = mapx.end<Vec2s>();
             for ( ; begin_x != end_x; ++begin_x)
             {
-                begin_x[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
-                begin_x[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
+                (*begin_x)[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
+                (*begin_x)[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
             }
 
             if (interpolation != INTER_NEAREST)

From 2fb0d23104a4d6e7607269b51357974eae4a52e6 Mon Sep 17 00:00:00 2001
From: Jin Ma <jinma06njuee@gmail.om>
Date: Sat, 7 Sep 2013 10:40:30 +0800
Subject: [PATCH 39/41] Removed unnecessary process for conversion to 4-channel
 format.

---
 modules/ocl/perf/perf_bgfg.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
index ec96c9007..ecf112702 100644
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -63,14 +63,11 @@ using std::tr1::get;
 #endif
 
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
-static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output, int output_cn)
+static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
 {
     for(int i = 0; i< (int)(input.size()); i++)
     {
-        if(output_cn == 1)
-            cvtColor(input[i], output[i], COLOR_RGB2GRAY);
-        else
-            cvtColor(input[i], output[i], COLOR_RGB2RGBA);
+        cvtColor(input[i], output[i], COLOR_RGB2GRAY);
     }
 }
 //prepare data for CPU
@@ -87,7 +84,7 @@ static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
     }
 
     if(cn == 1)
-        cvtFrameFmt(frame_buffer_init, frame_buffer, 1);
+        cvtFrameFmt(frame_buffer_init, frame_buffer);
     else
         frame_buffer = frame_buffer_init;
 }

From c439702f0c28ebb57e1ef7d9f99bb33711e51fd4 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 9 Sep 2013 10:51:44 +0400
Subject: [PATCH 40/41] added missing gpu::matchTemplate implementation for
 non-CUDA build

---
 modules/gpu/src/match_template.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/gpu/src/match_template.cpp b/modules/gpu/src/match_template.cpp
index 4643f3b52..2587803da 100644
--- a/modules/gpu/src/match_template.cpp
+++ b/modules/gpu/src/match_template.cpp
@@ -49,6 +49,7 @@ using namespace std;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
 void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
+void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, MatchTemplateBuf&, Stream&) { throw_nogpu(); }
 
 #else
 

From 40212e1eb9348a1a6d578d9906c677931af605ca Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Mon, 9 Sep 2013 13:53:49 +0400
Subject: [PATCH 41/41] Link to Hough Circle Transform sample fixed in
 tutorial.

---
 doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
index 6d307beec..39590eb42 100644
--- a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
@@ -40,7 +40,7 @@ Code
    * Display the detected circle in a window.
 
    .. |TutorialHoughCirclesSimpleDownload| replace:: here
-   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghlines.cpp
+   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghcircles.cpp
    .. |TutorialHoughCirclesFancyDownload| replace:: here
    .. _TutorialHoughCirclesFancyDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp