From 3fb0bf6e996b5aa7c094d055578a520e7764aa6f Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Fri, 16 Aug 2013 14:19:46 +0800 Subject: [PATCH 01/41] Added MOG and MOG2. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 149 ++++++ modules/ocl/perf/perf_bgfg.cpp | 333 +++++++++++++ modules/ocl/src/bgfg_mog.cpp | 630 ++++++++++++++++++++++++ modules/ocl/src/opencl/bgfg_mog.cl | 543 ++++++++++++++++++++ modules/ocl/test/test_bgfg.cpp | 232 +++++++++ modules/ocl/test/test_optflow.cpp | 6 +- modules/ocl/test/utility.cpp | 38 ++ modules/ocl/test/utility.hpp | 3 + samples/ocl/bgfg_segm.cpp | 135 +++++ 9 files changed, 2066 insertions(+), 3 deletions(-) create mode 100644 modules/ocl/perf/perf_bgfg.cpp create mode 100644 modules/ocl/src/bgfg_mog.cpp create mode 100644 modules/ocl/src/opencl/bgfg_mog.cl create mode 100644 modules/ocl/test/test_bgfg.cpp create mode 100644 samples/ocl/bgfg_segm.cpp diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index aa0283fbe..f250646f5 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1698,6 +1698,155 @@ namespace cv // keys = {1, 2, 3} (CV_8UC1) // values = {6,2, 10,5, 4,3} (CV_8UC2) void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false); + /*!Base class for MOG and MOG2!*/ + class CV_EXPORTS BackgroundSubtractor + { + public: + //! the virtual destructor + virtual ~BackgroundSubtractor(); + //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image. + virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate); + + //! computes a background image + virtual void getBackgroundImage(oclMat& backgroundImage) const = 0; + }; + /*! + Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm + + The class implements the following algorithm: + "An improved adaptive background mixture model for real-time tracking with shadow detection" + P. KadewTraKuPong and R. Bowden, + Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001." + http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf + */ + class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor + { + public: + //! the default constructor + MOG(int nmixtures = -1); + + //! re-initiaization method + void initialize(Size frameSize, int frameType); + + //! the update operator + void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f); + + //! computes a background image which are the mean of all background gaussians + void getBackgroundImage(oclMat& backgroundImage) const; + + //! releases all inner buffers + void release(); + + int history; + float varThreshold; + float backgroundRatio; + float noiseSigma; + + private: + int nmixtures_; + + Size frameSize_; + int frameType_; + int nframes_; + + oclMat weight_; + oclMat sortKey_; + oclMat mean_; + oclMat var_; + }; + + /*! + The class implements the following algorithm: + "Improved adaptive Gausian mixture model for background subtraction" + Z.Zivkovic + International Conference Pattern Recognition, UK, August, 2004. + http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf + */ + class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor + { + public: + //! the default constructor + MOG2(int nmixtures = -1); + + //! re-initiaization method + void initialize(Size frameSize, int frameType); + + //! the update operator + void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f); + + //! computes a background image which are the mean of all background gaussians + void getBackgroundImage(oclMat& backgroundImage) const; + + //! releases all inner buffers + void release(); + + // parameters + // you should call initialize after parameters changes + + int history; + + //! here it is the maximum allowed number of mixture components. + //! Actual number is determined dynamically per pixel + float varThreshold; + // threshold on the squared Mahalanobis distance to decide if it is well described + // by the background model or not. Related to Cthr from the paper. + // This does not influence the update of the background. A typical value could be 4 sigma + // and that is varThreshold=4*4=16; Corresponds to Tb in the paper. + + ///////////////////////// + // less important parameters - things you might change but be carefull + //////////////////////// + + float backgroundRatio; + // corresponds to fTB=1-cf from the paper + // TB - threshold when the component becomes significant enough to be included into + // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0. + // For alpha=0.001 it means that the mode should exist for approximately 105 frames before + // it is considered foreground + // float noiseSigma; + float varThresholdGen; + + //correspondts to Tg - threshold on the squared Mahalan. dist. to decide + //when a sample is close to the existing components. If it is not close + //to any a new component will be generated. I use 3 sigma => Tg=3*3=9. + //Smaller Tg leads to more generated components and higher Tg might make + //lead to small number of components but they can grow too large + float fVarInit; + float fVarMin; + float fVarMax; + + //initial variance for the newly generated components. + //It will will influence the speed of adaptation. A good guess should be made. + //A simple way is to estimate the typical standard deviation from the images. + //I used here 10 as a reasonable value + // min and max can be used to further control the variance + float fCT; //CT - complexity reduction prior + //this is related to the number of samples needed to accept that a component + //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get + //the standard Stauffer&Grimson algorithm (maybe not exact but very similar) + + //shadow detection parameters + bool bShadowDetection; //default 1 - do shadow detection + unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value + float fTau; + // Tau - shadow threshold. The shadow is detected if the pixel is darker + //version of the background. Tau is a threshold on how much darker the shadow can be. + //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow + //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003. + + private: + int nmixtures_; + + Size frameSize_; + int frameType_; + int nframes_; + + oclMat weight_; + oclMat variance_; + oclMat mean_; + + oclMat bgmodelUsedModes_; //keep track of number of modes per pixel + }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp new file mode 100644 index 000000000..e7aad759f --- /dev/null +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -0,0 +1,333 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" +using namespace cv; +using namespace cv::ocl; + +void cvtFrameFmt(std::vector& input, std::vector& output, int output_cn) +{ + for(int i=0; i frame_buffer_init; + std::vector frame_buffer(nframe); + std::vector frame_buffer_ocl; + std::vector foreground_buf_ocl; + std::vector foreground_buf_cpu; + BackgroundSubtractorMOG mog_cpu; + cv::ocl::MOG d_mog; + for(int i = 0; i < nframe; i++) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + frame_buffer_init.push_back(frame); + } + + for(unsigned int i = 0; i < sizeof(learningRate)/sizeof(float); i++) + { + for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) + { + SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "< frame_buffer_init; + std::vector frame_buffer(nframe); + std::vector frame_buffer_ocl; + std::vector foreground_buf_ocl; + std::vector foreground_buf_cpu; + cv::ocl::oclMat foreground_ocl; + + for(int i = 0; i < nframe; i++) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + frame_buffer_init.push_back(frame); + } + cv::ocl::MOG2 d_mog; + + for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) + { + SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<> frame; + ASSERT_FALSE(frame.empty()); + + int nframe = 5; + std::vector frame_buffer_init; + std::vector frame_buffer(nframe); + std::vector frame_buffer_ocl; + std::vector foreground_buf_ocl; + std::vector foreground_buf_cpu; + + for(int i = 0; i < nframe; i++) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + frame_buffer_init.push_back(frame); + } + + for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) + { + SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "< 0 ? nmixtures : mog::defaultNMixtures, 8); + history = mog::defaultHistory; + varThreshold = mog::defaultVarThreshold; + backgroundRatio = mog::defaultBackgroundRatio; + noiseSigma = mog::defaultNoiseSigma; +} + +void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType) +{ + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); + + frameSize_ = frameSize; + frameType_ = frameType; + + int ch = CV_MAT_CN(frameType); + int work_ch = ch; + + // for each gaussian mixture of each pixel bg model we store + // the mixture sort key (w/sum_of_variances), the mixture weight (w), + // the mean (nchannels values) and + // the diagonal covariance matrix (another nchannels values) + + weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); + var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); + + weight_.setTo(cv::Scalar::all(0)); + sortKey_.setTo(cv::Scalar::all(0)); + mean_.setTo(cv::Scalar::all(0)); + var_.setTo(cv::Scalar::all(0)); + + nframes_ = 0; +} + +void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate) +{ + using namespace cv::ocl::device::mog; + + CV_Assert(frame.depth() == CV_8U); + + int ch = frame.oclchannels(); + int work_ch = ch; + + if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels()) + initialize(frame.size(), frame.type()); + + fgmask.create(frameSize_, CV_8UC1); + + ++nframes_; + learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history); + CV_Assert(learningRate >= 0.0f); + + mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_, + varThreshold, learningRate, backgroundRatio, noiseSigma); +} + +void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const +{ + using namespace cv::ocl::device::mog; + + backgroundImage.create(frameSize_, frameType_); + + cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio); +} + +void cv::ocl::MOG::release() +{ + frameSize_ = Size(0, 0); + frameType_ = 0; + nframes_ = 0; + + weight_.release(); + sortKey_.release(); + mean_.release(); + var_.release(); + clReleaseMemObject(cl_constants); +} + +static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var, + int nmixtures, float varThreshold, float backgroundRatio) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {frame.cols, frame.rows, 1}; + + int frame_step = (int)(frame.step/frame.elemSize()); + int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); + int weight_step = (int)(weight.step/weight.elemSize()); + int mean_step = (int)(mean.step/mean.elemSize()); + int var_step = (int)(var.step/var.elemSize()); + + int fgmask_offset_y = (int)(fgmask.offset/fgmask.step); + int fgmask_offset_x = (int)(fgmask.offset%fgmask.step); + fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize(); + + int frame_offset_y = (int)(frame.offset/frame.step); + int frame_offset_x = (int)(frame.offset%frame.step); + frame_offset_x = frame_offset_x/(int)frame.elemSize(); + + char build_option[50]; + if(cn == 1) + { + snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); + }else + { + snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); + } + + String kernel_name = "mog_withoutLearning_kernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&var.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&var_step)); + + args.push_back(make_pair(sizeof(cl_float), (void*)&varThreshold)); + args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y)); + + openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); +} + + +static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, + int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {frame.cols, frame.rows, 1}; + + int frame_step = (int)(frame.step/frame.elemSize()); + int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); + int weight_step = (int)(weight.step/weight.elemSize()); + int sortKey_step = (int)(sortKey.step/sortKey.elemSize()); + int mean_step = (int)(mean.step/mean.elemSize()); + int var_step = (int)(var.step/var.elemSize()); + + int fgmask_offset_y = (int)(fgmask.offset/fgmask.step); + int fgmask_offset_x = (int)(fgmask.offset%fgmask.step); + fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize(); + + int frame_offset_y = (int)(frame.offset/frame.step); + int frame_offset_x = (int)(frame.offset%frame.step); + frame_offset_x = frame_offset_x/(int)frame.elemSize(); + + char build_option[50]; + if(cn == 1) + { + snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); + }else + { + snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); + } + + String kernel_name = "mog_withLearning_kernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&sortKey.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&var.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&sortKey_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&var_step)); + + args.push_back(make_pair(sizeof(cl_float), (void*)&varThreshold)); + args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio)); + args.push_back(make_pair(sizeof(cl_float), (void*)&learningRate)); + args.push_back(make_pair(sizeof(cl_float), (void*)&minVar)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y)); + + openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); +} + +void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, + int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma) +{ + const float minVar = noiseSigma * noiseSigma; + + if(learningRate > 0.0f) + mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures, + varThreshold, backgroundRatio, learningRate, minVar); + else + mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio); +} + +void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {dst.cols, dst.rows, 1}; + + int weight_step = (int)(weight.step/weight.elemSize()); + int mean_step = (int)(mean.step/mean.elemSize()); + int dst_step = (int)(dst.step/dst.elemSize()); + + char build_option[50]; + if(cn == 1) + { + snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); + }else + { + snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); + } + + String kernel_name = "getBackgroundImage_kernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + + args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio)); + + openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); +} + +void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) +{ + varMin = cv::min(varMin, varMax); + varMax = cv::max(varMin, varMax); + + c_TB = TB; + + _contant_struct *constants = new _contant_struct; + constants->c_Tb = Tb; + constants->c_TB = TB; + constants->c_Tg = Tg; + constants->c_varInit = varInit; + constants->c_varMin = varMin; + constants->c_varMax = varMax; + constants->c_tau = tau; + constants->c_shadowVal = shadowVal; + + cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), + (void *)constants, sizeof(_contant_struct)); +} + +void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, + oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures) +{ + Context* clCxt = Context::getContext(); + + const float alpha1 = 1.0f - alphaT; + + cl_int detectShadows_flag = 0; + if(detectShadows) + detectShadows_flag = 1; + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {frame.cols, frame.rows, 1}; + + int frame_step = (int)(frame.step/frame.elemSize()); + int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); + int weight_step = (int)(weight.step/weight.elemSize()); + int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize()); + int mean_step = (int)(mean.step/mean.elemSize()); + int var_step = (int)(variance.step/variance.elemSize()); + + int fgmask_offset_y = (int)(fgmask.offset/fgmask.step); + int fgmask_offset_x = (int)(fgmask.offset%fgmask.step); + fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize(); + + int frame_offset_y = (int)(frame.offset/frame.step); + int frame_offset_x = (int)(frame.offset%frame.step); + frame_offset_x = frame_offset_x/(int)frame.elemSize(); + + String kernel_name = "mog2_kernel"; + vector< pair > args; + + char build_option[50]; + if(cn == 1) + { + snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); + }else + { + snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); + } + + args.push_back(make_pair(sizeof(cl_mem), (void*)&frame.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&fgmask.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&modesUsed.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&variance.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&var_step)); + + args.push_back(make_pair(sizeof(cl_float), (void*)&alphaT)); + args.push_back(make_pair(sizeof(cl_float), (void*)&alpha1)); + args.push_back(make_pair(sizeof(cl_float), (void*)&prune)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&detectShadows_flag)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&fgmask_offset_y)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants)); + + openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); +} + +void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1}; + + int weight_step = (int)(weight.step/weight.elemSize()); + int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize()); + int mean_step = (int)(mean.step/mean.elemSize()); + int dst_step = (int)(dst.step/dst.elemSize()); + + int dst_y = (int)(dst.offset/dst.step); + int dst_x = (int)(dst.offset%dst.step); + dst_x = dst_x/(int)dst.elemSize(); + + String kernel_name = "getBackgroundImage2_kernel"; + vector< pair > args; + + char build_option[50]; + if(cn == 1) + { + snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); + }else + { + snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); + } + + args.push_back(make_pair(sizeof(cl_mem), (void*)&modesUsed.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); + args.push_back(make_pair(sizeof(cl_float), (void*)&c_TB)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&modesUsed_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_x)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_y)); + + openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); +} + +///////////////////////////////////////////////////////////////// +// MOG2 + +namespace mog2 +{ + // default parameters of gaussian background detection algorithm + const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 + const float defaultVarThreshold = 4.0f * 4.0f; + const int defaultNMixtures = 5; // maximal number of Gaussians in mixture + const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test + const float defaultVarThresholdGen = 3.0f * 3.0f; + const float defaultVarInit = 15.0f; // initial variance for new components + const float defaultVarMax = 5.0f * defaultVarInit; + const float defaultVarMin = 4.0f; + + // additional parameters + const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components + const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection + const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation +} + +cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0) +{ + nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures; + + history = mog2::defaultHistory; + varThreshold = mog2::defaultVarThreshold; + bShadowDetection = true; + + backgroundRatio = mog2::defaultBackgroundRatio; + fVarInit = mog2::defaultVarInit; + fVarMax = mog2::defaultVarMax; + fVarMin = mog2::defaultVarMin; + + varThresholdGen = mog2::defaultVarThresholdGen; + fCT = mog2::defaultfCT; + nShadowDetection = mog2::defaultnShadowDetection; + fTau = mog2::defaultfTau; +} + +void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType) +{ + using namespace cv::ocl::device::mog; + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); + + frameSize_ = frameSize; + frameType_ = frameType; + nframes_ = 0; + + int ch = CV_MAT_CN(frameType); + int work_ch = ch; + + // for each gaussian mixture of each pixel bg model we store ... + // the mixture weight (w), + // the mean (nchannels values) and + // the covariance + weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + weight_.setTo(Scalar::all(0)); + + variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + variance_.setTo(Scalar::all(0)); + + mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels + mean_.setTo(Scalar::all(0)); + + //make the array for keeping track of the used modes per pixel - all zeros at start + bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.setTo(cv::Scalar::all(0)); + + loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection); +} + +void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate) +{ + using namespace cv::ocl::device::mog; + + int ch = frame.oclchannels(); + int work_ch = ch; + + if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels()) + initialize(frame.size(), frame.type()); + + fgmask.create(frameSize_, CV_8UC1); + fgmask.setTo(cv::Scalar::all(0)); + + ++nframes_; + learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history); + CV_Assert(learningRate >= 0.0f); + + mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_); +} + +void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const +{ + using namespace cv::ocl::device::mog; + + backgroundImage.create(frameSize_, frameType_); + + cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_); +} + +void cv::ocl::MOG2::release() +{ + frameSize_ = Size(0, 0); + frameType_ = 0; + nframes_ = 0; + + weight_.release(); + variance_.release(); + mean_.release(); + + bgmodelUsedModes_.release(); +} + diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl new file mode 100644 index 000000000..4ad6a52f7 --- /dev/null +++ b/modules/ocl/src/opencl/bgfg_mog.cl @@ -0,0 +1,543 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (CN1) +#define T_FRAME uchar +#define T_MEAN_VAR float +#define CONVERT_TYPE convert_uchar_sat +#define F_ZERO (0.0f) +float cvt(uchar val) +{ + return val; +} + +float sqr(float val) +{ + return val * val; +} + +float sum(float val) +{ + return val; +} + +float clamp1(float var, float learningRate, float diff, float minVar) +{ + return fmax(var + learningRate * (diff * diff - var), minVar); +} +#else +#define T_FRAME uchar4 +#define T_MEAN_VAR float4 +#define CONVERT_TYPE convert_uchar4_sat +#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f) +float4 cvt(const uchar4 val) +{ + float4 result; + result.x = val.x; + result.y = val.y; + result.z = val.z; + result.w = val.w; + + return result; +} + +float sqr(const float4 val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} + +float sum(const float4 val) +{ + return (val.x + val.y + val.z); +} + +float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar) +{ + float4 result; + result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar); + result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar); + result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar); + result.w = 0.0f; + return result; +} +#endif + +typedef struct +{ + float c_Tb; + float c_TB; + float c_Tg; + float c_varInit; + float c_varMin; + float c_varMax; + float c_tau; + uchar c_shadowVal; +}con_srtuct_t; + +void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step) +{ + float val = ptr[(k * rows + y) * ptr_step + x]; + ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; + ptr[((k + 1) * rows + y) * ptr_step + x] = val; +} + +void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step) +{ + float4 val = ptr[(k * rows + y) * ptr_step + x]; + ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; + ptr[((k + 1) * rows + y) * ptr_step + x] = val; +} + +__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, + __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, + int frame_row, int frame_col, int frame_step, int fgmask_step, + int weight_step, int mean_step, int var_step, + float varThreshold, float backgroundRatio, int fgmask_offset_x, + int fgmask_offset_y, int frame_offset_x, int frame_offset_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < frame_col && y < frame_row) + { + + T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); + + int kHit = -1; + int kForeground = -1; + + for (int k = 0; k < (NMIXTURES); ++k) + { + if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f) + break; + + T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x]; + T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x]; + + T_MEAN_VAR diff = pix - mu; + + if (sqr(diff) < varThreshold * sum(_var)) + { + kHit = k; + break; + } + } + + if (kHit >= 0) + { + float wsum = 0.0f; + for (int k = 0; k < (NMIXTURES); ++k) + { + wsum += weight[(k * frame_row + y) * weight_step + x]; + + if (wsum > backgroundRatio) + { + kForeground = k + 1; + break; + } + } + } + + if(kHit < 0 || kHit >= kForeground) + fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1); + else + fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0); + + } +} + +__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, + __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, + __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, + int weight_step, int sortKey_step, int mean_step, int var_step, + float varThreshold, float backgroundRatio, float learningRate, float minVar, + int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y) +{ + const float w0 = 0.05f; + const float sk0 = w0 / 30.0f; + const float var0 = 900.f; + + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < frame_col && y < frame_row) + { + + float wsum = 0.0f; + int kHit = -1; + int kForeground = -1; + int k = 0; + + T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); + + for (; k < (NMIXTURES); ++k) + { + float w = weight[(k * frame_row + y) * weight_step + x]; + wsum += w; + + if (w < 1.192092896e-07f) + break; + + T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x]; + T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x]; + + T_MEAN_VAR diff = pix - mu; + + if (sqr(diff) < varThreshold * sum(_var)) + { + wsum -= w; + float dw = learningRate * (1.0f - w); + + _var = clamp1(_var, learningRate, diff, minVar); + + float sortKey_prev = w / sqr(sum(_var)); + sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev; + + float weight_prev = w + dw; + weight[(k * frame_row + y) * weight_step + x] = weight_prev; + + T_MEAN_VAR mean_prev = mu + learningRate * diff; + mean[(k * frame_row + y) * mean_step + x] = mean_prev; + + T_MEAN_VAR var_prev = _var; + var[(k * frame_row + y) * var_step + x] = var_prev; + + int k1 = k - 1; + + if (k1 >= 0) + { + float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x]; + float weight_next = weight[(k1 * frame_row + y) * weight_step + x]; + T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x]; + T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x]; + + for (; sortKey_next < sortKey_prev && k1 >= 0; --k1) + { + sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev; + sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next; + + weight[(k1 * frame_row + y) * weight_step + x] = weight_prev; + weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next; + + mean[(k1 * frame_row + y) * mean_step + x] = mean_prev; + mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next; + + var[(k1 * frame_row + y) * var_step + x] = var_prev; + var[((k1 + 1) * frame_row + y) * var_step + x] = var_next; + + sortKey_prev = sortKey_next; + sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f; + + weight_prev = weight_next; + weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f; + + mean_prev = mean_next; + mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO; + + var_prev = var_next; + var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO; + } + } + + kHit = k1 + 1; + break; + } + } + + if (kHit < 0) + { + kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1); + wsum += w0 - weight[(k * frame_row + y) * weight_step + x]; + + weight[(k * frame_row + y) * weight_step + x] = w0; + mean[(k * frame_row + y) * mean_step + x] = pix; + #if defined (CN1) + var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0); + #else + var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0); + #endif + sortKey[(k * frame_row + y) * sortKey_step + x] = sk0; + } + else + { + for( ; k < (NMIXTURES); k++) + wsum += weight[(k * frame_row + y) * weight_step + x]; + } + + float wscale = 1.0f / wsum; + wsum = 0; + for (k = 0; k < (NMIXTURES); ++k) + { + float w = weight[(k * frame_row + y) * weight_step + x]; + wsum += w *= wscale; + + weight[(k * frame_row + y) * weight_step + x] = w; + sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale; + + if (wsum > backgroundRatio && kForeground < 0) + kForeground = k + 1; + } + if(kHit >= kForeground) + fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-1); + else + fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(0); + } +} + +__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst, + int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, + float backgroundRatio) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < dst_col && y < dst_row) + { + T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO; + float totalWeight = 0.0f; + + for (int mode = 0; mode < (NMIXTURES); ++mode) + { + float _weight = weight[(mode * dst_row + y) * weight_step + x]; + + T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x]; + meanVal = meanVal + _weight * _mean; + + totalWeight += _weight; + + if(totalWeight > backgroundRatio) + break; + } + meanVal = meanVal * (1.f / totalWeight); + dst[y * dst_step + x] = CONVERT_TYPE(meanVal); + } +} + +__kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR * mean, + __global uchar* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, + int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, + int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < frame_col && y < frame_row) + { + T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]); + + bool background = false; // true - the pixel classified as background + + bool fitsPDF = false; //if it remains zero a new GMM mode will be added + + int nmodes = modesUsed[y * modesUsed_step + x]; + int nNewModes = nmodes; //current number of modes in GMM + + float totalWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune; + + if (!fitsPDF) + { + float var = variance[(mode * frame_row + y) * var_step + x]; + + T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x]; + + T_MEAN_VAR diff = _mean - pix; + float dist2 = sqr(diff); + + if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var) + background = true; + + if (dist2 < constants -> c_Tg * var) + { + fitsPDF = true; + _weight += alphaT; + float k = alphaT / _weight; + mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff; + float varnew = var + k * (dist2 - var); + varnew = fmax(varnew, constants -> c_varMin); + varnew = fmin(varnew, constants -> c_varMax); + + variance[(mode * frame_row + y) * var_step + x] = varnew; + for (int i = mode; i > 0; --i) + { + if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x]) + break; + swap(weight, x, y, i - 1, frame_row, weight_step); + swap(variance, x, y, i - 1, frame_row, var_step); + #if defined (CN1) + swap(mean, x, y, i - 1, frame_row, mean_step); + #else + swap4(mean, x, y, i - 1, frame_row, mean_step); + #endif + } + } + } // !fitsPDF + + if (_weight < -prune) + { + _weight = 0.0; + nmodes--; + } + + weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value + totalWeight += _weight; + } + + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + weight[(mode * frame_row + y) * weight_step + x] *= totalWeight; + + nmodes = nNewModes; + + if (!fitsPDF) + { + int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++; + + if (nmodes == 1) + weight[(mode * frame_row + y) * weight_step + x] = 1.f; + else + { + weight[(mode * frame_row + y) * weight_step + x] = alphaT; + + for (int i = 0; i < nmodes - 1; ++i) + weight[(i * frame_row + y) * weight_step + x] *= alpha1; + } + + mean[(mode * frame_row + y) * mean_step + x] = pix; + variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit; + + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x]) + break; + + swap(weight, x, y, i - 1, frame_row, weight_step); + swap(variance, x, y, i - 1, frame_row, var_step); + #if defined (CN1) + swap(mean, x, y, i - 1, frame_row, mean_step); + #else + swap4(mean, x, y, i - 1, frame_row, mean_step); + #endif + } + } + + modesUsed[y * modesUsed_step + x] = nmodes; + + bool isShadow = false; + if (detectShadows_flag && !background) + { + float tWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x]; + + T_MEAN_VAR pix_mean = pix * _mean; + + float numerator = sum(pix_mean); + float denominator = sqr(_mean); + + if (denominator == 0) + break; + + if (numerator <= denominator && numerator >= constants -> c_tau * denominator) + { + float a = numerator / denominator; + + T_MEAN_VAR dD = a * _mean - pix; + + if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a) + { + isShadow = true; + break; + } + } + + tWeight += weight[(mode * frame_row + y) * weight_step + x]; + if (tWeight > constants -> c_TB) + break; + } + } + + fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255; + } +} + +__kernel void getBackgroundImage2_kernel(__global uchar* modesUsed, __global float* weight, __global T_MEAN_VAR* mean, + __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, + int mean_step, int dst_step, int dst_x, int dst_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < modesUsed_col && y < modesUsed_row) + { + int nmodes = modesUsed[y * modesUsed_step + x]; + + T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO; + + float totalWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + float _weight = weight[(mode * modesUsed_row + y) * weight_step + x]; + + T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x]; + meanVal = meanVal + _weight * _mean; + + totalWeight += _weight; + + if(totalWeight > c_TB) + break; + } + + meanVal = meanVal * (1.f / totalWeight); + dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal); + } +} diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp new file mode 100644 index 000000000..f2bda321b --- /dev/null +++ b/modules/ocl/test/test_bgfg.cpp @@ -0,0 +1,232 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; +using namespace std; + +extern string workdir; +////////////////////////////////////////////////////// +// MOG + +namespace +{ + IMPLEMENT_PARAM_CLASS(UseGray, bool) + IMPLEMENT_PARAM_CLASS(LearningRate, double) +} + +PARAM_TEST_CASE(mog, UseGray, LearningRate, bool) +{ + bool useGray; + double learningRate; + bool useRoi; + + virtual void SetUp() + { + useGray = GET_PARAM(0); + + learningRate = GET_PARAM(1); + + useRoi = GET_PARAM(2); + } +}; + +TEST_P(mog, Update) +{ + std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi"; + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::ocl::MOG mog; + cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi); + + cv::BackgroundSubtractorMOG mog_gold; + cv::Mat foreground_gold; + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + if (useGray) + { + cv::Mat temp; + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + cv::swap(temp, frame); + } + + mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate); + + mog_gold(frame, foreground_gold, learningRate); + + EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0); + } +} +INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine( + testing::Values(UseGray(false), UseGray(true)), + testing::Values(LearningRate(0.0), LearningRate(0.01)), + Values(true, false))); + +////////////////////////////////////////////////////// +// MOG2 + +namespace +{ + IMPLEMENT_PARAM_CLASS(DetectShadow, bool) +} + +PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool) +{ + bool useGray; + bool detectShadow; + bool useRoi; + virtual void SetUp() + { + useGray = GET_PARAM(0); + detectShadow = GET_PARAM(1); + useRoi = GET_PARAM(2); + } +}; + +TEST_P(mog2, Update) +{ + std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi"; + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::ocl::MOG2 mog2; + mog2.bShadowDetection = detectShadow; + cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi); + + cv::BackgroundSubtractorMOG2 mog2_gold; + mog2_gold.set("detectShadows", detectShadow); + cv::Mat foreground_gold; + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + if (useGray) + { + cv::Mat temp; + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + cv::swap(temp, frame); + } + + mog2(loadMat_ocl(frame, useRoi), foreground); + + mog2_gold(frame, foreground_gold); + + if (detectShadow) + { + + EXPECT_MAT_SIMILAR(foreground_gold, foreground, 1e-2); + } + else + { + EXPECT_MAT_NEAR(foreground_gold, foreground, 0); + } + } +} + +TEST_P(mog2, getBackgroundImage) +{ + if (useGray) + return; + + std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi"; + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + + cv::ocl::MOG2 mog2; + mog2.bShadowDetection = detectShadow; + cv::ocl::oclMat foreground; + + cv::BackgroundSubtractorMOG2 mog2_gold; + mog2_gold.set("detectShadows", detectShadow); + cv::Mat foreground_gold; + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + mog2(loadMat_ocl(frame, useRoi), foreground); + + mog2_gold(frame, foreground_gold); + } + + cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi); + mog2.getBackgroundImage(background); + + cv::Mat background_gold; + mog2_gold.getBackgroundImage(background_gold); + + EXPECT_MAT_NEAR(background_gold, background, 1.0); +} + +INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine( + testing::Values(UseGray(true), UseGray(false)), + testing::Values(DetectShadow(true), DetectShadow(false)), + Values(true, false))); + +#endif \ No newline at end of file diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index 4693d46dd..8fcc105a1 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -146,10 +146,10 @@ PARAM_TEST_CASE(TVL1, bool) TEST_P(TVL1, Accuracy) { - cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame0 = readImage("F:/mcw/opencv/opencv/samples/gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()); - cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame1 = readImage("../../../opencv/samples/gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; @@ -168,7 +168,7 @@ TEST_P(TVL1, Accuracy) EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3); EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3); } -INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false)); +INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true)); ///////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 440a89d4a..750c3c82b 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -100,6 +100,44 @@ Mat randomMat(Size size, int type, double minVal, double maxVal) return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); } +cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi) +{ + Size size0 = size; + + if (useRoi) + { + size0.width += randomInt(5, 15); + size0.height += randomInt(5, 15); + } + + cv::ocl::oclMat d_m(size0, type); + + if (size0 != size) + d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height)); + + return d_m; +} + +cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi) +{ + CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3); + cv::ocl::oclMat d_m; + d_m = createMat_ocl(m.size(), m.type(), useRoi); + + Size ls; + Point pt; + + d_m.locateROI(ls, pt); + + Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height); + + cv::ocl::oclMat m_ocl(m); + + cv::ocl::oclMat d_m_roi(d_m, roi); + + m_ocl.copyTo(d_m); + return d_m; +} /* void showDiff(InputArray gold_, InputArray actual_, double eps) { diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 0b101ec50..1e17c6dbc 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -70,6 +70,9 @@ double checkNorm(const cv::Mat &m); double checkNorm(const cv::Mat &m1, const cv::Mat &m2); double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); +//oclMat create +cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false); +cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false); #define EXPECT_MAT_NORM(mat, eps) \ { \ EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp new file mode 100644 index 000000000..410f34693 --- /dev/null +++ b/samples/ocl/bgfg_segm.cpp @@ -0,0 +1,135 @@ +#include +#include + +#include "opencv2/core/core.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/highgui/highgui.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +#define M_MOG 1 +#define M_MOG2 2 + +int main(int argc, const char** argv) +{ + + cv::CommandLineParser cmd(argc, argv, + "{ c | camera | false | use camera }" + "{ f | file | 768x576.avi | input video file }" + "{ m | method | mog | method (mog, mog2) }" + "{ h | help | false | print help message }"); + + if (cmd.get("help")) + { + cout << "Usage : bgfg_segm [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + bool useCamera = cmd.get("camera"); + string file = cmd.get("file"); + string method = cmd.get("method"); + + if (method != "mog" && method != "mog2") + { + cerr << "Incorrect method" << endl; + return -1; + } + + int m = method == "mog" ? M_MOG : M_MOG2; + + VideoCapture cap; + + if (useCamera) + cap.open(0); + else + cap.open(file); + + if (!cap.isOpened()) + { + cerr << "can not open camera or video file" << endl; + return -1; + } + + std::vectorinfo; + cv::ocl::getDevice(info); + + Mat frame; + cap >> frame; + + oclMat d_frame(frame); + + cv::ocl::MOG mog; + cv::ocl::MOG2 mog2; + + oclMat d_fgmask; + oclMat d_fgimg; + oclMat d_bgimg; + + d_fgimg.create(d_frame.size(), d_frame.type()); + + Mat fgmask; + Mat fgimg; + Mat bgimg; + + switch (m) + { + case M_MOG: + mog(d_frame, d_fgmask, 0.01f); + break; + + case M_MOG2: + mog2(d_frame, d_fgmask); + break; + } + + for(;;) + { + cap >> frame; + if (frame.empty()) + break; + d_frame.upload(frame); + + int64 start = cv::getTickCount(); + + //update the model + switch (m) + { + case M_MOG: + mog(d_frame, d_fgmask, 0.01f); + mog.getBackgroundImage(d_bgimg); + break; + + case M_MOG2: + mog2(d_frame, d_fgmask); + mog2.getBackgroundImage(d_bgimg); + break; + } + + double fps = cv::getTickFrequency() / (cv::getTickCount() - start); + std::cout << "FPS : " << fps << std::endl; + + d_fgimg.setTo(Scalar::all(0)); + d_frame.copyTo(d_fgimg, d_fgmask); + + d_fgmask.download(fgmask); + d_fgimg.download(fgimg); + if (!d_bgimg.empty()) + d_bgimg.download(bgimg); + + imshow("image", frame); + imshow("foreground mask", fgmask); + imshow("foreground image", fgimg); + if (!bgimg.empty()) + imshow("mean background image", bgimg); + + int key = waitKey(30); + if (key == 27) + break; + } + + return 0; +} From 8feaadc69f5467201913d06eff678588034d9dce Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Fri, 16 Aug 2013 15:24:55 +0800 Subject: [PATCH 02/41] Resolved a compiling error under Linux. --- modules/ocl/src/bgfg_mog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index 33bf143d2..0bdfe6f2f 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -69,7 +69,7 @@ namespace cv } } -#if _MSC_VER +#if defined _MSC_VER #define snprintf sprintf_s #endif From 916b92bc3b323a5e73aae18cef4a9aebfda11035 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Fri, 16 Aug 2013 15:33:43 +0800 Subject: [PATCH 03/41] Resolved a compiling warning under Windows. --- modules/ocl/perf/perf_bgfg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index e7aad759f..358b8ff38 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -48,9 +48,9 @@ using namespace cv::ocl; void cvtFrameFmt(std::vector& input, std::vector& output, int output_cn) { - for(int i=0; i Date: Fri, 16 Aug 2013 16:07:53 +0800 Subject: [PATCH 04/41] Resolved compiling errors under Linux. --- modules/ocl/perf/perf_bgfg.cpp | 2 +- modules/ocl/test/test_bgfg.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index 358b8ff38..a75d6b8b9 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -46,7 +46,7 @@ using namespace cv; using namespace cv::ocl; -void cvtFrameFmt(std::vector& input, std::vector& output, int output_cn) +static void cvtFrameFmt(std::vector& input, std::vector& output, int output_cn) { for(int i = 0; i< (int)(input.size()); i++) { diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp index f2bda321b..f5afd12ee 100644 --- a/modules/ocl/test/test_bgfg.cpp +++ b/modules/ocl/test/test_bgfg.cpp @@ -43,7 +43,7 @@ // //M*/ -#include "precomp.hpp" +#include "test_precomp.hpp" #ifdef HAVE_OPENCL From 1bcd1fd3a2db3c8741e63ff976aab17ac87b23d8 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Fri, 16 Aug 2013 17:39:17 +0800 Subject: [PATCH 05/41] Used perf_precomp.hpp instead of precomp.hpp for test. --- modules/ocl/perf/perf_bgfg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index a75d6b8b9..d507a3b5e 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -42,7 +42,7 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#include "precomp.hpp" +#include "perf_precomp.hpp" using namespace cv; using namespace cv::ocl; From c1de14c27ac555fe6e60fff1f3fd307a26e79a24 Mon Sep 17 00:00:00 2001 From: kdrobnyh Date: Sat, 17 Aug 2013 20:31:03 +0400 Subject: [PATCH 06/41] Fix bilateralFilter function --- modules/imgproc/src/smooth.cpp | 68 ++++++++++++------- .../imgproc/test/test_bilateral_filter.cpp | 2 +- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 00be08618..3dad2c087 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1875,6 +1875,41 @@ private: float *space_weight, *color_weight; }; +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +class IPPBilateralFilter_8u_Invoker : + public ParallelLoopBody +{ +public: + IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok) + { + *ok = true; + } + + virtual void operator() (const Range& range) const + { + int d = radius * 2 + 1; + IppiSize kernel = {d, d}; + IppiSize roi={dst.cols, range.end - range.start}; + int bufsize=0; + ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize); + AutoBuffer buf(bufsize); + IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); + ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ); + if( ippiFilterBilateral_8u_C1R( src.ptr(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0) + *ok = false; + } +private: + Mat &src; + Mat &dst; + double sigma_color; + double sigma_space; + int radius; + bool *ok; + const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&); +}; +#endif + static void bilateralFilter_8u( const Mat& src, Mat& dst, int d, double sigma_color, double sigma_space, @@ -1904,32 +1939,19 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d, radius = MAX(radius, 1); d = radius*2 + 1; -#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) - if(cn == 1) - { - IppiSize kernel = {d, d}; - IppiSize roi={src.cols, src.rows}; - int bufsize=0; - ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize); - AutoBuffer buf(bufsize+128); - IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); - ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec ); - Mat tsrc; - const Mat* psrc = &src; - if( src.data == dst.data ) - { - src.copyTo(tsrc); - psrc = &tsrc; - } - if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0], - dst.data, (int)dst.step[0], - roi, kernel, pSpec) >= 0 ) - return; - } -#endif Mat temp; copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); +#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) + if( cn == 1 ) + { + bool ok; + IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok ); + parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16)); + if( ok ) return; + } +#endif + vector _color_weight(cn*256); vector _space_weight(d*d); vector _space_ofs(d*d); diff --git a/modules/imgproc/test/test_bilateral_filter.cpp b/modules/imgproc/test/test_bilateral_filter.cpp index 2d45fdcf7..0bfc3dc4c 100644 --- a/modules/imgproc/test/test_bilateral_filter.cpp +++ b/modules/imgproc/test/test_bilateral_filter.cpp @@ -251,7 +251,7 @@ namespace cvtest int CV_BilateralFilterTest::validate_test_results(int test_case_index) { - static const double eps = 1; + static const double eps = 4; Mat reference_dst, reference_src; if (_src.depth() == CV_32F) From 1e8194fd3ccf37bb894f4af410f1fc62ff2c8b23 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 22 Aug 2013 15:42:07 +0800 Subject: [PATCH 07/41] Optimized mog and mog2, which have much better performance. --- modules/ocl/src/bgfg_mog.cpp | 15 ++- modules/ocl/src/opencl/bgfg_mog.cl | 207 ++++++++++++++--------------- modules/ocl/test/test_bgfg.cpp | 2 +- 3 files changed, 114 insertions(+), 110 deletions(-) diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index 0bdfe6f2f..c079c6b8f 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -254,7 +254,7 @@ static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, ocl } -static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, +static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar) { Context* clCxt = Context::getContext(); @@ -262,6 +262,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {frame.cols, frame.rows, 1}; + oclMat fgmask(fgmask_raw.size(), CV_32SC1); + int frame_step = (int)(frame.step/frame.elemSize()); int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); int weight_step = (int)(weight.step/weight.elemSize()); @@ -318,6 +320,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); + fgmask.convertTo(fgmask, CV_8U); + fgmask.copyTo(fgmask_raw); } void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, @@ -392,9 +396,11 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var (void *)constants, sizeof(_contant_struct)); } -void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, +void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures) { + oclMat fgmask(fgmaskRaw.size(), CV_32SC1); + Context* clCxt = Context::getContext(); const float alpha1 = 1.0f - alphaT; @@ -464,6 +470,9 @@ void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); + + fgmask.convertTo(fgmask, CV_8U); + fgmask.copyTo(fgmaskRaw); } void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures) @@ -580,7 +589,7 @@ void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType) mean_.setTo(Scalar::all(0)); //make the array for keeping track of the used modes per pixel - all zeros at start - bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.create(frameSize_, CV_32FC1); bgmodelUsedModes_.setTo(cv::Scalar::all(0)); loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection); diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl index 4ad6a52f7..77bdb9c2a 100644 --- a/modules/ocl/src/opencl/bgfg_mog.cl +++ b/modules/ocl/src/opencl/bgfg_mog.cl @@ -188,7 +188,7 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar } } -__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, +__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask, __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, int weight_step, int sortKey_step, int mean_step, int var_step, @@ -202,130 +202,125 @@ __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* f int x = get_global_id(0); int y = get_global_id(1); - if(x < frame_col && y < frame_row) + if(x >= frame_col || y >= frame_row) return; + float wsum = 0.0f; + int kHit = -1; + int kForeground = -1; + int k = 0; + + T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); + + for (; k < (NMIXTURES); ++k) { + float w = weight[(k * frame_row + y) * weight_step + x]; + wsum += w; - float wsum = 0.0f; - int kHit = -1; - int kForeground = -1; - int k = 0; + if (w < 1.192092896e-07f) + break; - T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); - - for (; k < (NMIXTURES); ++k) + T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x]; + T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x]; + + float sortKey_prev, weight_prev; + T_MEAN_VAR mean_prev, var_prev; + if (sqr(pix - mu) < varThreshold * sum(_var)) { - float w = weight[(k * frame_row + y) * weight_step + x]; - wsum += w; + wsum -= w; + float dw = learningRate * (1.0f - w); - if (w < 1.192092896e-07f) - break; + _var = clamp1(_var, learningRate, pix - mu, minVar); - T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x]; - T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x]; + sortKey_prev = w / sqr(sum(_var)); + sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev; - T_MEAN_VAR diff = pix - mu; + weight_prev = w + dw; + weight[(k * frame_row + y) * weight_step + x] = weight_prev; - if (sqr(diff) < varThreshold * sum(_var)) + mean_prev = mu + learningRate * (pix - mu); + mean[(k * frame_row + y) * mean_step + x] = mean_prev; + + var_prev = _var; + var[(k * frame_row + y) * var_step + x] = var_prev; + } + + int k1 = k - 1; + + if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var)) + { + float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x]; + float weight_next = weight[(k1 * frame_row + y) * weight_step + x]; + T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x]; + T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x]; + + for (; sortKey_next < sortKey_prev && k1 >= 0; --k1) { - wsum -= w; - float dw = learningRate * (1.0f - w); + sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev; + sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next; - _var = clamp1(_var, learningRate, diff, minVar); + weight[(k1 * frame_row + y) * weight_step + x] = weight_prev; + weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next; - float sortKey_prev = w / sqr(sum(_var)); - sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev; + mean[(k1 * frame_row + y) * mean_step + x] = mean_prev; + mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next; - float weight_prev = w + dw; - weight[(k * frame_row + y) * weight_step + x] = weight_prev; + var[(k1 * frame_row + y) * var_step + x] = var_prev; + var[((k1 + 1) * frame_row + y) * var_step + x] = var_next; - T_MEAN_VAR mean_prev = mu + learningRate * diff; - mean[(k * frame_row + y) * mean_step + x] = mean_prev; + sortKey_prev = sortKey_next; + sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f; - T_MEAN_VAR var_prev = _var; - var[(k * frame_row + y) * var_step + x] = var_prev; + weight_prev = weight_next; + weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f; - int k1 = k - 1; + mean_prev = mean_next; + mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO; - if (k1 >= 0) - { - float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x]; - float weight_next = weight[(k1 * frame_row + y) * weight_step + x]; - T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x]; - T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x]; - - for (; sortKey_next < sortKey_prev && k1 >= 0; --k1) - { - sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev; - sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next; - - weight[(k1 * frame_row + y) * weight_step + x] = weight_prev; - weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next; - - mean[(k1 * frame_row + y) * mean_step + x] = mean_prev; - mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next; - - var[(k1 * frame_row + y) * var_step + x] = var_prev; - var[((k1 + 1) * frame_row + y) * var_step + x] = var_next; - - sortKey_prev = sortKey_next; - sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f; - - weight_prev = weight_next; - weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f; - - mean_prev = mean_next; - mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO; - - var_prev = var_next; - var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO; - } - } - - kHit = k1 + 1; - break; + var_prev = var_next; + var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO; } } - if (kHit < 0) - { - kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1); - wsum += w0 - weight[(k * frame_row + y) * weight_step + x]; - - weight[(k * frame_row + y) * weight_step + x] = w0; - mean[(k * frame_row + y) * mean_step + x] = pix; - #if defined (CN1) - var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0); - #else - var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0); - #endif - sortKey[(k * frame_row + y) * sortKey_step + x] = sk0; - } - else - { - for( ; k < (NMIXTURES); k++) - wsum += weight[(k * frame_row + y) * weight_step + x]; - } - - float wscale = 1.0f / wsum; - wsum = 0; - for (k = 0; k < (NMIXTURES); ++k) - { - float w = weight[(k * frame_row + y) * weight_step + x]; - wsum += w *= wscale; - - weight[(k * frame_row + y) * weight_step + x] = w; - sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale; - - if (wsum > backgroundRatio && kForeground < 0) - kForeground = k + 1; - } - if(kHit >= kForeground) - fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-1); - else - fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(0); + kHit = k1 + 1; + break; } + + if (kHit < 0) + { + kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1); + wsum += w0 - weight[(k * frame_row + y) * weight_step + x]; + + weight[(k * frame_row + y) * weight_step + x] = w0; + mean[(k * frame_row + y) * mean_step + x] = pix; +#if defined (CN1) + var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0); +#else + var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0); +#endif + sortKey[(k * frame_row + y) * sortKey_step + x] = sk0; + } + else + { + for( ; k < (NMIXTURES); k++) + wsum += weight[(k * frame_row + y) * weight_step + x]; + } + + float wscale = 1.0f / wsum; + wsum = 0; + for (k = 0; k < (NMIXTURES); ++k) + { + float w = weight[(k * frame_row + y) * weight_step + x]; + w *= wscale; + wsum += w; + + weight[(k * frame_row + y) * weight_step + x] = w; + sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale; + + kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0); + } + fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground)); } + __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst, int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, float backgroundRatio) @@ -355,8 +350,8 @@ __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_ } } -__kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR * mean, - __global uchar* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, +__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean, + __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants) { @@ -509,7 +504,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __gl } } -__kernel void getBackgroundImage2_kernel(__global uchar* modesUsed, __global float* weight, __global T_MEAN_VAR* mean, +__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, int mean_step, int dst_step, int dst_x, int dst_y) { diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp index f5afd12ee..e35f26e3b 100644 --- a/modules/ocl/test/test_bgfg.cpp +++ b/modules/ocl/test/test_bgfg.cpp @@ -191,7 +191,7 @@ TEST_P(mog2, getBackgroundImage) if (useGray) return; - std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi"; + std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "video/768x576.avi"; cv::VideoCapture cap(inputFile); ASSERT_TRUE(cap.isOpened()); From 26b5eb3e3990b31be90ff3ca0afe18eaf68768da Mon Sep 17 00:00:00 2001 From: yao Date: Thu, 29 Aug 2013 10:48:15 +0800 Subject: [PATCH 08/41] add adaptive bilateral filter (cpp and ocl version) --- modules/imgproc/doc/filtering.rst | 22 + .../include/opencv2/imgproc/imgproc.hpp | 4 + modules/imgproc/src/smooth.cpp | 230 ++++++++++ modules/ocl/doc/image_filtering.rst | 2 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 10 +- modules/ocl/perf/perf_filters.cpp | 79 ++++ modules/ocl/src/filtering.cpp | 98 ++++ .../opencl/filtering_adaptive_bilateral.cl | 424 ++++++++++++++++++ modules/ocl/test/test_filters.cpp | 76 ++++ modules/ocl/test/test_imgproc.cpp | 65 --- samples/ocl/adaptive_bilateral_filter.cpp | 51 +++ 11 files changed, 994 insertions(+), 67 deletions(-) create mode 100644 modules/ocl/src/opencl/filtering_adaptive_bilateral.cl create mode 100644 samples/ocl/adaptive_bilateral_filter.cpp diff --git a/modules/imgproc/doc/filtering.rst b/modules/imgproc/doc/filtering.rst index 3d230d1ca..1816c6a43 100755 --- a/modules/imgproc/doc/filtering.rst +++ b/modules/imgproc/doc/filtering.rst @@ -412,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht This filter does not work inplace. +adaptiveBilateralFilter +----------------------- +Applies the adaptive bilateral filter to an image. + +.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT ) + +.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst + + :param src: Source 8-bit, 1-channel or 3-channel image. + + :param dst: Destination image of the same size and type as ``src`` . + + :param ksize: filter kernel size. + + :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``. + + :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now. + + :param borderType: border mode used to extrapolate pixels outside of the image. + +The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood. + blur diff --git a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp index f51bbaab7..1981a61d9 100644 --- a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp @@ -398,6 +398,10 @@ CV_EXPORTS_W void GaussianBlur( InputArray src, CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT ); +//! smooths the image using adaptive bilateral filter +CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, + double sigmaSpace, Point anchor=Point(-1, -1), + int borderType=BORDER_DEFAULT ); //! smooths the image using the box filter. Each pixel is processed in O(1) time CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, Size ksize, Point anchor=Point(-1,-1), diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 00be08618..e38487aa5 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -2250,6 +2250,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d, "Bilateral filtering is only implemented for 8u and 32f images" ); } + +/****************************************************************************************\ + Adaptive Bilateral Filtering +\****************************************************************************************/ + +namespace cv +{ +#define CALCVAR 1 +#define FIXED_WEIGHT 0 + +class adaptiveBilateralFilter_8u_Invoker : + public ParallelLoopBody +{ +public: + adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) : + temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor) + { + if( sigma_space <= 0 ) + sigma_space = 1; + CV_Assert((ksize.width & 1) && (ksize.height & 1)); + space_weight.resize(ksize.width * ksize.height); + double sigma2 = sigma_space * sigma_space; + int idx = 0; + int w = ksize.width / 2; + int h = ksize.height / 2; + for(int y=-h; y<=h; y++) + for(int x=-w; x<=w; x++) + { + space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y)); + } + } + virtual void operator()(const Range& range) const + { + int cn = dest->channels(); + int anX = anchor.x; + + const uchar *tptr; + + for(int i = range.start;i < range.end; i++) + { + int startY = i; + if(cn == 1) + { + float var; + int currVal; + int sumVal = 0; + int sumValSqr = 0; + int currValCenter; + int currWRTCenter; + float weight; + float totalWeight = 0.; + float tmpSum = 0.; + + for(int j = 0;j < dest->cols *cn; j+=cn) + { + sumVal = 0; + sumValSqr= 0; + totalWeight = 0.; + tmpSum = 0.; + + // Top row: don't sum the very last element + int startLMJ = 0; + int endLMJ = ksize.width - 1; + int howManyAll = (anX *2 +1)*(ksize.width ); +#if CALCVAR + for(int x = startLMJ; x< endLMJ; x++) + { + tptr = temp->ptr(startY + x) +j; + for(int y=-anX; y<=anX; y++) + { + currVal = tptr[cn*(y+anX)]; + sumVal += currVal; + sumValSqr += (currVal *currVal); + } + } + var = ( (sumValSqr * howManyAll)- sumVal * sumVal ) / ( (float)(howManyAll*howManyAll)); +#else + var = 900.0; +#endif + startLMJ = 0; + endLMJ = ksize.width; + tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2); + currValCenter =tptr[j+cn*anX]; + for(int x = startLMJ; x< endLMJ; x++) + { + tptr = temp->ptr(startY + x) +j; + for(int y=-anX; y<=anX; y++) + { +#if FIXED_WEIGHT + weight = 1.0; +#else + currVal = tptr[cn*(y+anX)]; + currWRTCenter = currVal - currValCenter; + + weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];; +#endif + tmpSum += ((float)tptr[cn*(y+anX)] * weight); + totalWeight += weight; + } + } + tmpSum /= totalWeight; + + dest->at(startY ,j)= static_cast(tmpSum); + } + } + else + { + assert(cn == 3); + float var_b, var_g, var_r; + int currVal_b, currVal_g, currVal_r; + int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0; + int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0; + int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0; + int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r; + float weight_b, weight_g, weight_r; + float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.; + float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.; + + for(int j = 0;j < dest->cols *cn; j+=cn) + { + sumVal_b= 0, sumVal_g= 0, sumVal_r= 0; + sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0; + totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.; + tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.; + + // Top row: don't sum the very last element + int startLMJ = 0; + int endLMJ = ksize.width - 1; + int howManyAll = (anX *2 +1)*(ksize.width); +#if CALCVAR + for(int x = startLMJ; x< endLMJ; x++) + { + tptr = temp->ptr(startY + x) +j; + for(int y=-anX; y<=anX; y++) + { + currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2]; + sumVal_b += currVal_b; + sumVal_g += currVal_g; + sumVal_r += currVal_r; + sumValSqr_b += (currVal_b *currVal_b); + sumValSqr_g += (currVal_g *currVal_g); + sumValSqr_r += (currVal_r *currVal_r); + } + } + var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b ) / ( (float)(howManyAll*howManyAll)); + var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g ) / ( (float)(howManyAll*howManyAll)); + var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r ) / ( (float)(howManyAll*howManyAll)); +#else + var_b = 900.0; var_g = 900.0;var_r = 900.0; +#endif + startLMJ = 0; + endLMJ = ksize.width; + tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j; + currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2]; + for(int x = startLMJ; x< endLMJ; x++) + { + tptr = temp->ptr(startY + x) +j; + for(int y=-anX; y<=anX; y++) + { +#if FIXED_WEIGHT + weight_b = 1.0; + weight_g = 1.0; + weight_r = 1.0; +#else + currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2]; + currWRTCenter_b = currVal_b - currValCenter_b; + currWRTCenter_g = currVal_g - currValCenter_g; + currWRTCenter_r = currVal_r - currValCenter_r; + + float cur_spw = space_weight[x*ksize.width+y+anX]; + weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw; + weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw; + weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw; +#endif + tmpSum_b += ((float)tptr[cn*(y+anX)] * weight_b); + tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g); + tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r); + totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r; + } + } + tmpSum_b /= totalWeight_b; + tmpSum_g /= totalWeight_g; + tmpSum_r /= totalWeight_r; + + dest->at(startY,j )= static_cast(tmpSum_b); + dest->at(startY,j+1)= static_cast(tmpSum_g); + dest->at(startY,j+2)= static_cast(tmpSum_r); + } + } + } + } +private: + const Mat *temp; + Mat *dest; + Size ksize; + double sigma_space; + Point anchor; + vector space_weight; +}; +static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType ) +{ + Size size = src.size(); + + CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) && + src.type() == dst.type() && src.size() == dst.size() && + src.data != dst.data ); + Mat temp; + copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType); + + adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor); + parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16)); +} +} +void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize, + double sigmaSpace, Point anchor, int borderType ) +{ + Mat src = _src.getMat(); + _dst.create(src.size(), src.type()); + Mat dst = _dst.getMat(); + + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); + + anchor = normalizeAnchor(anchor,ksize); + if( src.depth() == CV_8U ) + adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType ); + else + CV_Error( CV_StsUnsupportedFormat, + "Adaptive Bilateral filtering is only implemented for 8u images" ); +} + ////////////////////////////////////////////////////////////////////////////////////////// CV_IMPL void diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst index ce89e85de..1f90eedda 100644 --- a/modules/ocl/doc/image_filtering.rst +++ b/modules/ocl/doc/image_filtering.rst @@ -127,7 +127,7 @@ ocl::bilateralFilter -------------------- Returns void -.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT) +.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT) :param src: The source image diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 5b3642d03..f2b858caa 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -520,7 +520,15 @@ namespace cv //! bilateralFilter // supports 8UC1 8UC4 - CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT); + CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT); + + //! Applies an adaptive bilateral filter to the input image + // This is not truly a bilateral filter. Instead of using user provided fixed parameters, + // the function calculates a constant at each window based on local standard deviation, + // and use this constant to do filtering. + // supports 8UC1 8UC3 + CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT); + //! computes exponent of each matrix element (b = e**a) // supports only CV_32FC1 type CV_EXPORTS void exp(const oclMat &a, oclMat &b); diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 28c290096..5f510d63f 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -321,3 +321,82 @@ PERF_TEST_P(filter2DFixture, filter2D, else OCL_PERF_ELSE } + +///////////// Bilateral//////////////////////// + +typedef Size_MatType BilateralFixture; + +PERF_TEST_P(BilateralFixture, Bilateral, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_8UC3))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params), d = 7; + double sigmacolor = 50.0, sigmaspace = 50.0; + + Mat src(srcSize, type), dst(srcSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + if (srcSize == OCL_SIZE_4000 && type == CV_8UC3) + declare.time(8); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(srcSize, type); + + OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace); + + oclDst.download(dst); + + SANITY_CHECK(dst); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE +} + +///////////// adaptiveBilateral//////////////////////// + +typedef Size_MatType adaptiveBilateralFixture; + +PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_8UC3))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + double sigmaspace = 10.0; + Size ksize(9,9); + + Mat src(srcSize, type), dst(srcSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + if (srcSize == OCL_SIZE_4000) + declare.time(15); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(srcSize, type); + + OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace); + + oclDst.download(dst); + + SANITY_CHECK(dst, 1.); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE +} diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index a08f0ed2b..c0557980b 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -64,6 +64,7 @@ extern const char *filter_sep_row; extern const char *filter_sep_col; extern const char *filtering_laplacian; extern const char *filtering_morph; +extern const char *filtering_adaptive_bilateral; } } @@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si Ptr f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype); f->apply(src, dst); } + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Adaptive Bilateral Filter + +void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType) +{ + CV_Assert((ksize.width & 1) && (ksize.height & 1)); // ksize must be odd + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); // source must be 8bit RGB image + if( sigmaSpace <= 0 ) + sigmaSpace = 1; + Mat lut(Size(ksize.width, ksize.height), CV_32FC1); + double sigma2 = sigmaSpace * sigmaSpace; + int idx = 0; + int w = ksize.width / 2; + int h = ksize.height / 2; + for(int y=-h; y<=h; y++) + for(int x=-w; x<=w; x++) + { + lut.at(idx++) = sigma2 / (sigma2 + x * x + y * y); + } + oclMat dlut(lut); + int depth = src.depth(); + int cn = src.oclchannels(); + + normalizeAnchor(anchor, ksize); + const static String kernelName = "edgeEnhancingFilter"; + + dst.create(src.size(), src.type()); + + char btype[30]; + switch(borderType) + { + case BORDER_CONSTANT: + sprintf(btype, "BORDER_CONSTANT"); + break; + case BORDER_REPLICATE: + sprintf(btype, "BORDER_REPLICATE"); + break; + case BORDER_REFLECT: + sprintf(btype, "BORDER_REFLECT"); + break; + case BORDER_WRAP: + sprintf(btype, "BORDER_WRAP"); + break; + case BORDER_REFLECT101: + sprintf(btype, "BORDER_REFLECT_101"); + break; + default: + CV_Error(CV_StsBadArg, "This border type is not supported"); + break; + } + + //the following constants may be adjusted for performance concerns + const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1; + + //Normalize the result by default + const float alpha = ksize.height * ksize.width; + + const size_t gSize = blockSizeX - ksize.width / 2 * 2; + const size_t globalSizeX = (src.cols) % gSize == 0 ? + src.cols / gSize * blockSizeX : + (src.cols / gSize + 1) * blockSizeX; + const size_t rows_per_thread = 1 + EXTRA; + const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? + ((src.rows + rows_per_thread - 1) / rows_per_thread) : + (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY; + + size_t globalThreads[3] = { globalSizeX, globalSizeY, 1}; + size_t localThreads[3] = { blockSizeX, blockSizeY, 1}; + + char build_options[250]; + + //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance + sprintf(build_options, + "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d" + " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", + static_cast(EXTRA), static_cast(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype); + + std::vector > args; + args.push_back(std::make_pair(sizeof(cl_mem), &src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), &dst.data)); + args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data)); + int lut_step = dlut.step1(); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step)); + + openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName, + globalThreads, localThreads, args, cn, depth, build_options); +} \ No newline at end of file diff --git a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl new file mode 100644 index 000000000..a8e0fd17e --- /dev/null +++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl @@ -0,0 +1,424 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Harris Gasparakis, harris.gasparakis@amd.com +// Xiaopeng Fu, fuxiaopeng2222@163.com +// Yao Wang, bitwangyaoyao@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifdef BORDER_REPLICATE +//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh +#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i)) +#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr)) +#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i)) +#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr)) +#endif + +#ifdef BORDER_REFLECT +//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb +#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i)) +#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)) +#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i)) +#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr)) +#endif + +#ifdef BORDER_REFLECT_101 +//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba +#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i)) +#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)) +#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i)) +#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr)) +#endif + +//blur function does not support BORDER_WRAP +#ifdef BORDER_WRAP +//BORDER_WRAP: cdefgh|abcdefgh|abcdefg +#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i)) +#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr)) +#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i)) +#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr)) +#endif + +__kernel void +edgeEnhancingFilter_C4_D0( + __global const uchar4 * restrict src, + __global uchar4 *dst, + float alpha, + int src_offset, + int src_whole_rows, + int src_whole_cols, + int src_step, + int dst_offset, + int dst_rows, + int dst_cols, + int dst_step, + __global const float* lut, + int lut_step) +{ + int col = get_local_id(0); + const int gX = get_group_id(0); + const int gY = get_group_id(1); + + int src_x_off = (src_offset % src_step) >> 2; + int src_y_off = src_offset / src_step; + int dst_x_off = (dst_offset % dst_step) >> 2; + int dst_y_off = dst_offset / dst_step; + + int startX = gX * (THREADS-ksX+1) - anX + src_x_off; + int startY = (gY * (1+EXTRA)) - anY + src_y_off; + + int dst_startX = gX * (THREADS-ksX+1) + dst_x_off; + int dst_startY = (gY * (1+EXTRA)) + dst_y_off; + + int posX = dst_startX - dst_x_off + col; + int posY = (gY * (1+EXTRA)) ; + + __local uchar4 data[ksY+EXTRA][THREADS]; + + float4 tmp_sum[1+EXTRA]; + for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++) + { + tmp_sum[tmpint] = (float4)(0,0,0,0); + } + +#ifdef BORDER_CONSTANT + bool con; + uchar4 ss; + for(int j = 0; j < ksY+EXTRA; j++) + { + con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows); + + int cur_col = clamp(startX + col, 0, src_whole_cols); + if(con) + { + ss = src[(startY+j)*(src_step>>2) + cur_col]; + } + + data[j][col] = con ? ss : (uchar4)0; + } +#else + for(int j= 0; j < ksY+EXTRA; j++) + { + int selected_row; + int selected_col; + selected_row = ADDR_H(startY+j, 0, src_whole_rows); + selected_row = ADDR_B(startY+j, src_whole_rows, selected_row); + + selected_col = ADDR_L(startX+col, 0, src_whole_cols); + selected_col = ADDR_R(startX+col, src_whole_cols, selected_col); + + data[j][col] = src[selected_row * (src_step>>2) + selected_col]; + } +#endif + + barrier(CLK_LOCAL_MEM_FENCE); + + float4 var[1+EXTRA]; + +#if VAR_PER_CHANNEL + float4 weight; + float4 totalWeight = (float4)(0,0,0,0); +#else + float weight; + float totalWeight = 0; +#endif + + int4 currValCenter; + int4 currWRTCenter; + + int4 sumVal = 0; + int4 sumValSqr = 0; + + if(col < (THREADS-(ksX-1))) + { + int4 currVal; + + int howManyAll = (2*anX+1)*(ksY); + + //find variance of all data + int startLMj; + int endLMj ; +#if CALCVAR + // Top row: don't sum the very last element + for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++) + { + startLMj = extraCnt; + endLMj = ksY+extraCnt-1; + sumVal =0; + sumValSqr=0; + for(int j = startLMj; j < endLMj; j++) + { + for(int i=-anX; i<=anX; i++) + { + currVal = convert_int4(data[j][col+anX+i]) ; + + sumVal += currVal; + sumValSqr += mul24(currVal, currVal); + } + } + var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ; +#else + var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0); +#endif + } + + for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++) + { + + // top row: include the very first element, even on first time + startLMj = extraCnt; + // go all the way, unless this is the last local mem chunk, + // then stay within limits - 1 + endLMj = extraCnt + ksY; + + // Top row: don't sum the very last element + currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] ); + + for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++) + { + for(int i=-anX; i<=anX; i++) + { +#if FIXED_WEIGHT +#if VAR_PER_CHANNEL + weight.x = 1.0f; + weight.y = 1.0f; + weight.z = 1.0f; + weight.w = 1.0f; +#else + weight = 1.0f; +#endif +#else + currVal = convert_int4(data[j][col+anX+i]) ; + currWRTCenter = currVal-currValCenter; + +#if VAR_PER_CHANNEL + weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]); + //weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ; + //weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ; + //weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ; + //weight.w = 0; +#else + weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) + mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z)); +#endif +#endif + tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight; + totalWeight += weight; + } + } + + tmp_sum[extraCnt] /= totalWeight; + + if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows) + { + dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]); + } + +#if VAR_PER_CHANNEL + totalWeight = (float4)(0,0,0,0); +#else + totalWeight = 0; +#endif + } + } +} + + +__kernel void +edgeEnhancingFilter_C1_D0( + __global const uchar * restrict src, + __global uchar *dst, + float alpha, + int src_offset, + int src_whole_rows, + int src_whole_cols, + int src_step, + int dst_offset, + int dst_rows, + int dst_cols, + int dst_step, + __global const float * lut, + int lut_step) +{ + int col = get_local_id(0); + const int gX = get_group_id(0); + const int gY = get_group_id(1); + + int src_x_off = (src_offset % src_step); + int src_y_off = src_offset / src_step; + int dst_x_off = (dst_offset % dst_step); + int dst_y_off = dst_offset / dst_step; + + int startX = gX * (THREADS-ksX+1) - anX + src_x_off; + int startY = (gY * (1+EXTRA)) - anY + src_y_off; + + int dst_startX = gX * (THREADS-ksX+1) + dst_x_off; + int dst_startY = (gY * (1+EXTRA)) + dst_y_off; + + int posX = dst_startX - dst_x_off + col; + int posY = (gY * (1+EXTRA)) ; + + __local uchar data[ksY+EXTRA][THREADS]; + + float tmp_sum[1+EXTRA]; + for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++) + { + tmp_sum[tmpint] = (float)(0); + } + +#ifdef BORDER_CONSTANT + bool con; + uchar ss; + for(int j = 0; j < ksY+EXTRA; j++) + { + con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows); + + int cur_col = clamp(startX + col, 0, src_whole_cols); + if(con) + { + ss = src[(startY+j)*(src_step) + cur_col]; + } + + data[j][col] = con ? ss : 0; + } +#else + for(int j= 0; j < ksY+EXTRA; j++) + { + int selected_row; + int selected_col; + selected_row = ADDR_H(startY+j, 0, src_whole_rows); + selected_row = ADDR_B(startY+j, src_whole_rows, selected_row); + + selected_col = ADDR_L(startX+col, 0, src_whole_cols); + selected_col = ADDR_R(startX+col, src_whole_cols, selected_col); + + data[j][col] = src[selected_row * (src_step) + selected_col]; + } +#endif + + barrier(CLK_LOCAL_MEM_FENCE); + + float var[1+EXTRA]; + + float weight; + float totalWeight = 0; + + int currValCenter; + int currWRTCenter; + + int sumVal = 0; + int sumValSqr = 0; + + if(col < (THREADS-(ksX-1))) + { + int currVal; + + int howManyAll = (2*anX+1)*(ksY); + + //find variance of all data + int startLMj; + int endLMj; +#if CALCVAR + // Top row: don't sum the very last element + for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++) + { + startLMj = extraCnt; + endLMj = ksY+extraCnt-1; + sumVal = 0; + sumValSqr =0; + for(int j = startLMj; j < endLMj; j++) + { + for(int i=-anX; i<=anX; i++) + { + currVal = (uint)(data[j][col+anX+i]) ; + + sumVal += currVal; + sumValSqr += mul24(currVal, currVal); + } + } + var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ; +#else + var[extraCnt] = (float)(900.0); +#endif + } + + for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++) + { + + // top row: include the very first element, even on first time + startLMj = extraCnt; + // go all the way, unless this is the last local mem chunk, + // then stay within limits - 1 + endLMj = extraCnt + ksY; + + // Top row: don't sum the very last element + currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] ); + + for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++) + { + for(int i=-anX; i<=anX; i++) + { +#if FIXED_WEIGHT + weight = 1.0f; +#else + currVal = (int)(data[j][col+anX+i]) ; + currWRTCenter = currVal-currValCenter; + + weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ; +#endif + tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight); + totalWeight += weight; + } + } + + tmp_sum[extraCnt] /= totalWeight; + + + if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows) + { + dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]); + } + + totalWeight = 0; + } + } +} diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp index c98c8f40d..4a22ec503 100644 --- a/modules/ocl/test/test_filters.cpp +++ b/modules/ocl/test/test_filters.cpp @@ -353,6 +353,69 @@ TEST_P(Filter2D, Mat) Near(1); } } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Bilateral +struct Bilateral : FilterTestBase +{ + int type; + cv::Size ksize; + int bordertype; + double sigmacolor, sigmaspace; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(3); + Init(type); + cv::RNG &rng = TS::ptr()->get_rng(); + sigmacolor = rng.uniform(20, 100); + sigmaspace = rng.uniform(10, 40); + } +}; + +TEST_P(Bilateral, Mat) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype); + cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype); + Near(1); + } + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AdaptiveBilateral +struct AdaptiveBilateral : FilterTestBase +{ + int type; + cv::Size ksize; + int bordertype; + Point anchor; + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(3); + Init(type); + anchor = Point(-1,-1); + } +}; + +TEST_P(AdaptiveBilateral, Mat) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype); + cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype); + Near(1); + } + +} + INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine( Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)), @@ -400,4 +463,17 @@ INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine( Values(Size(0, 0)), //not use Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT))); +INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine( + Values(CV_8UC1, CV_8UC3), + Values(Size(5, 5), Size(9, 9)), + Values(Size(0, 0)), //not use + Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, + (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101))); + +INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine( + Values(CV_8UC1, CV_8UC3), + Values(Size(5, 5), Size(9, 9)), + Values(Size(0, 0)), //not use + Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, + (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 46cd257c8..426fcef3f 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat) } - - - -////////////////////////////////bilateralFilter//////////////////////////////////////////// - -struct bilateralFilter : ImgprocTestBase {}; - -TEST_P(bilateralFilter, Mat) -{ - double sigmacolor = 50.0; - int radius = 9; - int d = 2 * radius + 1; - double sigmaspace = 20.0; - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; - //const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; - - if (mat1.depth() != CV_8U || mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) - { - continue; - } - //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows)) - //{ - // dst_roi.adjustROI(radius, radius, radius, radius); - // cldst_roi.adjustROI(radius, radius, radius, radius); - //} - //else - //{ - // continue; - //} - - cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED); - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED); - Near(1.); - } - } -} - - - ////////////////////////////////copyMakeBorder//////////////////////////////////////////// struct CopyMakeBorder : ImgprocTestBase {}; @@ -1622,21 +1572,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( NULL_TYPE, Values(false))); // Values(false) is the reserved parameter -//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( -// ONE_TYPE(CV_8UC1), -// NULL_TYPE, -// ONE_TYPE(CV_8UC1), -// NULL_TYPE, -// NULL_TYPE, -// Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp new file mode 100644 index 000000000..df226b195 --- /dev/null +++ b/samples/ocl/adaptive_bilateral_filter.cpp @@ -0,0 +1,51 @@ +// This sample shows the difference of adaptive bilateral filter and bilateral filter. +#include "opencv2/core/core.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" + +using namespace cv; +using namespace std; + + +int main( int argc, const char** argv ) +{ + const char* keys = + "{ i | input | | specify input image }" + "{ k | ksize | 5 | specify kernel size }"; + CommandLineParser cmd(argc, argv, keys); + string src_path = cmd.get("i"); + int ks = cmd.get("k"); + const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"}; + + Mat src = imread(src_path); + Mat abFilterCPU; + if(src.empty()){ + //cout << "error read image: " << src_path << endl; + return -1; + } + + std::vector infos; + ocl::getDevice(infos); + + ocl::oclMat dsrc(src), dABFilter, dBFilter; + + Size ksize(ks, ks); + adaptiveBilateralFilter(src,abFilterCPU, ksize, 10); + ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10); + ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9); + + Mat abFilter = dABFilter; + Mat bFilter = dBFilter; + imshow(winName[0], src); + + imshow(winName[1], abFilterCPU); + + imshow(winName[2], abFilter); + + imshow(winName[3], bFilter); + + waitKey(); + return 0; + +} \ No newline at end of file From ab235cda74577aa93a65b0da959b4273e5498575 Mon Sep 17 00:00:00 2001 From: yao Date: Thu, 29 Aug 2013 11:59:19 +0800 Subject: [PATCH 09/41] fix warnings --- modules/ocl/perf/perf_filters.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 5f510d63f..aa562412b 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -333,7 +333,7 @@ PERF_TEST_P(BilateralFixture, Bilateral, const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); const int type = get<1>(params), d = 7; - double sigmacolor = 50.0, sigmaspace = 50.0; + double sigmacolor = 50.0, sigmaspace = 50.0; Mat src(srcSize, type), dst(srcSize, type); declare.in(src, WARMUP_RNG).out(dst); @@ -372,8 +372,8 @@ PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral, const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); const int type = get<1>(params); - double sigmaspace = 10.0; - Size ksize(9,9); + double sigmaspace = 10.0; + Size ksize(9,9); Mat src(srcSize, type), dst(srcSize, type); declare.in(src, WARMUP_RNG).out(dst); From 5728612f95bf1fa19debdd7245b6f956cc70781f Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 29 Aug 2013 14:05:56 +0800 Subject: [PATCH 10/41] Removed the trailing whitespace --- modules/ocl/src/bgfg_mog.cpp | 13 ++++++------- modules/ocl/src/opencl/bgfg_mog.cl | 17 +++++++---------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index c079c6b8f..d39f86394 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -46,7 +46,7 @@ #include "precomp.hpp" using namespace cv; using namespace cv::ocl; -namespace cv +namespace cv { namespace ocl { @@ -82,10 +82,10 @@ namespace cv { namespace ocl { namespace device void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio); - void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, + void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, + void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures); void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures); @@ -392,11 +392,11 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var constants->c_tau = tau; constants->c_shadowVal = shadowVal; - cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), + cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)constants, sizeof(_contant_struct)); } -void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance, +void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures) { oclMat fgmask(fgmaskRaw.size(), CV_32SC1); @@ -635,5 +635,4 @@ void cv::ocl::MOG2::release() mean_.release(); bgmodelUsedModes_.release(); -} - +} \ No newline at end of file diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl index 77bdb9c2a..2e269999a 100644 --- a/modules/ocl/src/opencl/bgfg_mog.cl +++ b/modules/ocl/src/opencl/bgfg_mog.cl @@ -134,7 +134,7 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, int weight_step, int mean_step, int var_step, - float varThreshold, float backgroundRatio, int fgmask_offset_x, + float varThreshold, float backgroundRatio, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y) { int x = get_global_id(0); @@ -142,7 +142,6 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar if (x < frame_col && y < frame_row) { - T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); int kHit = -1; @@ -179,20 +178,18 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar } } } - if(kHit < 0 || kHit >= kForeground) fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1); else fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0); - } } __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask, - __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, + __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, int weight_step, int sortKey_step, int mean_step, int var_step, - float varThreshold, float backgroundRatio, float learningRate, float minVar, + float varThreshold, float backgroundRatio, float learningRate, float minVar, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y) { const float w0 = 0.05f; @@ -322,7 +319,7 @@ __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgm __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst, - int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, + int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, float backgroundRatio) { int x = get_global_id(0); @@ -351,8 +348,8 @@ __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_ } __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean, - __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, - int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, + __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, + int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants) { int x = get_global_id(0); @@ -505,7 +502,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob } __kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean, - __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, + __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, int mean_step, int dst_step, int dst_x, int dst_y) { int x = get_global_id(0); From 14e083f1e0736ef0119a2cb3588b179c7d06a5ee Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 29 Aug 2013 14:08:56 +0800 Subject: [PATCH 11/41] Removed trailing whitespace --- modules/ocl/test/utility.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 750c3c82b..5d3195719 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -119,7 +119,7 @@ cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi) } cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi) -{ +{ CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3); cv::ocl::oclMat d_m; d_m = createMat_ocl(m.size(), m.type(), useRoi); @@ -130,11 +130,11 @@ cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi) d_m.locateROI(ls, pt); Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height); - + cv::ocl::oclMat m_ocl(m); cv::ocl::oclMat d_m_roi(d_m, roi); - + m_ocl.copyTo(d_m); return d_m; } @@ -289,4 +289,3 @@ double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& o } return final_test_result; } - From 0233c4c198a359014307e7da11b66b6f5f1f14ea Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 29 Aug 2013 15:49:02 +0800 Subject: [PATCH 12/41] Removed whitespace. --- modules/ocl/src/bgfg_mog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index d39f86394..3051ac82f 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -330,7 +330,7 @@ void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, const float minVar = noiseSigma * noiseSigma; if(learningRate > 0.0f) - mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures, + mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar); else mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio); From 4f3349ffe43a8b486f6e7358350dcedc8b31ab88 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 29 Aug 2013 16:24:26 +0800 Subject: [PATCH 13/41] Added perf namespace. --- modules/ocl/perf/perf_bgfg.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index d507a3b5e..5bf406daf 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -43,6 +43,7 @@ // //M*/ #include "perf_precomp.hpp" +using namespace perf; using namespace cv; using namespace cv::ocl; From 114f3266d801a23dcdbbda70b5cbb99026ac4fd4 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Thu, 29 Aug 2013 17:35:47 +0800 Subject: [PATCH 14/41] Removed performance test. --- modules/ocl/perf/perf_bgfg.cpp | 334 --------------------------------- 1 file changed, 334 deletions(-) delete mode 100644 modules/ocl/perf/perf_bgfg.cpp diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp deleted file mode 100644 index 5bf406daf..000000000 --- a/modules/ocl/perf/perf_bgfg.cpp +++ /dev/null @@ -1,334 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jin Ma, jin@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ -#include "perf_precomp.hpp" -using namespace perf; -using namespace cv; -using namespace cv::ocl; - -static void cvtFrameFmt(std::vector& input, std::vector& output, int output_cn) -{ - for(int i = 0; i< (int)(input.size()); i++) - { - if(output_cn == 1) - cvtColor(input[i], output[i], COLOR_RGB2GRAY); - else - cvtColor(input[i], output[i], COLOR_RGB2RGBA); - } -} -///////////// MOG//////////////////////// -PERFTEST(mog) -{ - const string inputFile[] = {"768x576.avi", "1920x1080.avi"}; - int cn[] = {1, 3}; - - float learningRate[] = {0.0f, 0.01f}; - - for(unsigned int idx = 0; idx < sizeof(inputFile)/sizeof(string); idx++) - { - VideoCapture cap(inputFile[idx]); - ASSERT_TRUE(cap.isOpened()); - - Mat frame; - int nframe = 5; - Mat foreground_cpu; - oclMat foreground_ocl; - std::vector frame_buffer_init; - std::vector frame_buffer(nframe); - std::vector frame_buffer_ocl; - std::vector foreground_buf_ocl; - std::vector foreground_buf_cpu; - BackgroundSubtractorMOG mog_cpu; - cv::ocl::MOG d_mog; - for(int i = 0; i < nframe; i++) - { - cap >> frame; - ASSERT_FALSE(frame.empty()); - frame_buffer_init.push_back(frame); - } - - for(unsigned int i = 0; i < sizeof(learningRate)/sizeof(float); i++) - { - for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) - { - SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "< frame_buffer_init; - std::vector frame_buffer(nframe); - std::vector frame_buffer_ocl; - std::vector foreground_buf_ocl; - std::vector foreground_buf_cpu; - cv::ocl::oclMat foreground_ocl; - - for(int i = 0; i < nframe; i++) - { - cap >> frame; - ASSERT_FALSE(frame.empty()); - frame_buffer_init.push_back(frame); - } - cv::ocl::MOG2 d_mog; - - for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) - { - SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "<> frame; - ASSERT_FALSE(frame.empty()); - - int nframe = 5; - std::vector frame_buffer_init; - std::vector frame_buffer(nframe); - std::vector frame_buffer_ocl; - std::vector foreground_buf_ocl; - std::vector foreground_buf_cpu; - - for(int i = 0; i < nframe; i++) - { - cap >> frame; - ASSERT_FALSE(frame.empty()); - frame_buffer_init.push_back(frame); - } - - for(unsigned int j = 0; j < sizeof(cn)/sizeof(int); j++) - { - SUBTEST << frame.cols << 'x' << frame.rows << ".avi; "<<"channels: "< Date: Sat, 31 Aug 2013 23:35:03 +1000 Subject: [PATCH 15/41] changed int -> size_t when accessing std::vector --- .../objdetect/cascade_classifier/cascade_classifier.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst index 03080fec5..8a4f25cc2 100644 --- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst +++ b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst @@ -90,7 +90,7 @@ This tutorial code's is shown lines below. You can also download it from `here < //-- Detect faces face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) ); - for( int i = 0; i < faces.size(); i++ ) + for( size_t i = 0; i < faces.size(); i++ ) { Point center( faces[i].x + faces[i].width*0.5, faces[i].y + faces[i].height*0.5 ); ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 ); @@ -101,7 +101,7 @@ This tutorial code's is shown lines below. You can also download it from `here < //-- In each face, detect eyes eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CV_HAAR_SCALE_IMAGE, Size(30, 30) ); - for( int j = 0; j < eyes.size(); j++ ) + for( size_t j = 0; j < eyes.size(); j++ ) { Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 ); int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 ); From 3c2a8912ee3b239ac8b22de4233de82eb7f7007f Mon Sep 17 00:00:00 2001 From: peng xiao Date: Mon, 2 Sep 2013 10:06:01 +0800 Subject: [PATCH 16/41] Let clAmdBlas library initialize once during program lifetime. --- modules/ocl/src/gemm.cpp | 47 ++++++++++++++++++++++++++++-- modules/ocl/src/initialization.cpp | 3 ++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 44f23da69..6e04baca4 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -46,16 +46,59 @@ #include #include "precomp.hpp" +namespace cv { namespace ocl { + +// used for clAmdBlas library to avoid redundant setup/teardown +void clBlasSetup(); +void clBlasTeardown(); + +}} /* namespace cv { namespace ocl */ + + #if !defined HAVE_CLAMDBLAS void cv::ocl::gemm(const oclMat&, const oclMat&, double, const oclMat&, double, oclMat&, int) { CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); } + +void cv::ocl::clBlasSetup() +{ + CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); +} + +void cv::ocl::clBlasTeardown() +{ + CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); +} + #else #include "clAmdBlas.h" using namespace cv; +static bool clBlasInitialized = false; +static Mutex cs; + +void cv::ocl::clBlasSetup() +{ + AutoLock al(cs); + if(!clBlasInitialized) + { + openCLSafeCall(clAmdBlasSetup()); + clBlasInitialized = true; + } +} + +void cv::ocl::clBlasTeardown() +{ + AutoLock al(cs); + if(clBlasInitialized) + { + clAmdBlasTeardown(); + clBlasInitialized = false; + } +} + void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, const oclMat &src3, double beta, oclMat &dst, int flags) { @@ -71,7 +114,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, dst.create(src1.rows, src2.cols, src1.type()); dst.setTo(Scalar::all(0)); } - openCLSafeCall( clAmdBlasSetup() ); + + clBlasSetup(); const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; @@ -156,6 +200,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, } break; } - clAmdBlasTeardown(); } #endif diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index b990e09fe..564b40357 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -68,6 +68,7 @@ namespace cv namespace ocl { extern void fft_teardown(); + extern void clBlasTeardown(); /* * The binary caching system to eliminate redundant program source compilation. * Strictly, this is not a cache because we do not implement evictions right now. @@ -1050,6 +1051,7 @@ namespace cv void Info::release() { fft_teardown(); + clBlasTeardown(); impl->release(); impl = new Impl; DeviceName.clear(); @@ -1058,6 +1060,7 @@ namespace cv Info::~Info() { fft_teardown(); + clBlasTeardown(); impl->release(); } From 7e638cb0b54b4c37bcfdbcc8a8191fa5f986fbfb Mon Sep 17 00:00:00 2001 From: StevenPuttemans Date: Fri, 30 Aug 2013 14:21:11 +0200 Subject: [PATCH 17/41] Bugfix 3115: Added not to documentation for python version for facerecognizer interface + white + whitespaces remove --- modules/contrib/doc/facerec/facerec_api.rst | 2 ++ samples/python2/facerec_demo.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/modules/contrib/doc/facerec/facerec_api.rst b/modules/contrib/doc/facerec/facerec_api.rst index 9e8170d48..3100cfd8f 100644 --- a/modules/contrib/doc/facerec/facerec_api.rst +++ b/modules/contrib/doc/facerec/facerec_api.rst @@ -70,6 +70,8 @@ Moreover every :ocv:class:`FaceRecognizer` supports the: * **Loading/Saving** the model state from/to a given XML or YAML. +.. note:: When using the FaceRecognizer interface in combination with Python, please stick to Python 2. Some underlying scripts like create_csv will not work in other versions, like Python 3. + Setting the Thresholds +++++++++++++++++++++++ diff --git a/samples/python2/facerec_demo.py b/samples/python2/facerec_demo.py index 1b0adcc21..9eeb04e0b 100755 --- a/samples/python2/facerec_demo.py +++ b/samples/python2/facerec_demo.py @@ -31,6 +31,11 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +# ------------------------------------------------------------------------------------------------ +# Note: +# When using the FaceRecognizer interface in combination with Python, please stick to Python 2. +# Some underlying scripts like create_csv will not work in other versions, like Python 3. +# ------------------------------------------------------------------------------------------------ import os import sys From e6ec3dd17f9fe5165de49106c935fc1117f90615 Mon Sep 17 00:00:00 2001 From: kdrobnyh Date: Sun, 18 Aug 2013 02:13:44 +0400 Subject: [PATCH 18/41] Add IPP support in resize, warpAffine, warpPerspective functions --- modules/imgproc/src/imgwarp.cpp | 311 ++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index e6c189421..4c9063da5 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -50,9 +50,73 @@ #include #include +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +static IppStatus sts = ippInit(); +#endif + namespace cv { +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize); + typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int); + typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int); + typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *); + + template + bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func) + { + Type values[channels]; + for( int i = 0; i < channels; i++ ) + values[i] = (Type)value[i]; + return func(values, dataPointer, step, size) >= 0; + } + + bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth) + { + if( channels == 1 ) + { + switch( depth ) + { + case CV_8U: + return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0; + case CV_16U: + return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0; + case CV_32F: + return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0; + } + } + else + { + if( channels == 3 ) + { + switch( depth ) + { + case CV_8U: + return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R); + case CV_16U: + return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R); + case CV_32F: + return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R); + } + } + else if( channels == 4 ) + { + switch( depth ) + { + case CV_8U: + return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R); + case CV_16U: + return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R); + case CV_32F: + return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R); + } + } + } + return false; + } +#endif + /************** interpolation formulas and tables ***************/ const int INTER_RESIZE_COEF_BITS=11; @@ -1604,6 +1668,45 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec return k; } +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +class IPPresizeInvoker : + public ParallelLoopBody +{ +public: + IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok) + { + *ok = true; + } + + virtual void operator() (const Range& range) const + { + int cn = src.channels(); + IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start }; + int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows); + int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols); + int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows); + IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty }; + int bufsize; + ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize ); + Ipp8u *buf; + buf = ippsMalloc_8u( bufsize ); + IppStatus sts; + if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 ) + *ok = false; + ippsFree(buf); + } +private: + Mat &src; + Mat &dst; + double inv_scale_x; + double inv_scale_y; + int mode; + ippiResizeSqrPixelFunc func; + bool *ok; + const IPPresizeInvoker& operator= (const IPPresizeInvoker&); +}; +#endif } @@ -1745,6 +1848,39 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, int depth = src.depth(), cn = src.channels(); double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y; int k, sx, sy, dx, dy; + +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + int mode = + interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : + interpolation == INTER_NEAREST ? IPPI_INTER_NN : + interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : + interpolation == INTER_AREA && inv_scale_x * inv_scale_y > 1 ? IPPI_INTER_NN : + 0; + int type = src.type(); + ippiResizeSqrPixelFunc ippFunc = + type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R : + type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R : + type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R : + type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R : + type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R : + type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R : + type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R : + type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R : + type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R : + type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R : + type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R : + type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R : + 0; + if( ippFunc && mode ) + { + bool ok; + Range range(0, src.rows); + IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; + } +#endif if( interpolation == INTER_NEAREST ) { @@ -3257,6 +3393,49 @@ private: double *M; }; +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +class IPPwarpAffineInvoker : + public ParallelLoopBody +{ +public: + IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } + + virtual void operator() (const Range& range) const + { + IppiSize srcsize = { src.cols, src.rows }; + IppiRect srcroi = { 0, 0, src.cols, src.rows }; + IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start }; + int cnn = src.channels(); + if( borderType == BORDER_CONSTANT ) + { + IppiSize setSize = { dst.cols, range.end - range.start }; + void *dataPointer = dst.data + dst.step[0] * range.start; + if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) + { + *ok = false; + return; + } + } + if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + *ok = false; + } +private: + Mat &src; + Mat &dst; + double (&coeffs)[2][3]; + int mode; + int borderType; + Scalar borderValue; + ippiWarpAffineBackFunc func; + bool *ok; + const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&); +}; +#endif + } @@ -3303,6 +3482,50 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_SCALE = 1 << AB_BITS; +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + int depth = src.depth(); + int channels = src.channels(); + if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && + ( channels == 1 || channels == 3 || channels == 4 ) && + ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) ) + { + int type = src.type(); + ippiWarpAffineBackFunc ippFunc = + type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R : + type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R : + type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R : + type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R : + type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R : + type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R : + type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R : + type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R : + type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R : + 0; + int mode = + flags == INTER_LINEAR ? IPPI_INTER_LINEAR : + flags == INTER_NEAREST ? IPPI_INTER_NN : + flags == INTER_CUBIC ? IPPI_INTER_CUBIC : + 0; + if( mode && ippFunc ) + { + double coeffs[2][3]; + for( int i = 0; i < 2; i++ ) + { + for( int j = 0; j < 3; j++ ) + { + coeffs[i][j] = matM.at(i, j); + } + } + bool ok; + Range range(0, dst.rows); + IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; + } + } +#endif + for( x = 0; x < dst.cols; x++ ) { adelta[x] = saturate_cast(M[0]*x*AB_SCALE); @@ -3410,6 +3633,50 @@ private: Scalar borderValue; }; +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +class IPPwarpPerspectiveInvoker : + public ParallelLoopBody +{ +public: + IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } + + virtual void operator() (const Range& range) const + { + IppiSize srcsize = {src.cols, src.rows}; + IppiRect srcroi = {0, 0, src.cols, src.rows}; + IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start}; + int cnn = src.channels(); + + if( borderType == BORDER_CONSTANT ) + { + IppiSize setSize = {dst.cols, range.end - range.start}; + void *dataPointer = dst.data + dst.step[0] * range.start; + if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) + { + *ok = false; + return; + } + } + if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0) + *ok = false; + } +private: + Mat &src; + Mat &dst; + double (&coeffs)[3][3]; + int mode; + int borderType; + const Scalar borderValue; + ippiWarpPerspectiveBackFunc func; + bool *ok; + const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&); +}; +#endif + } void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, @@ -3439,6 +3706,50 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, if( !(flags & WARP_INVERSE_MAP) ) invert(matM, matM); + +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + int depth = src.depth(); + int channels = src.channels(); + if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && + ( channels == 1 || channels == 3 || channels == 4 ) && + ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) ) + { + int type = src.type(); + ippiWarpPerspectiveBackFunc ippFunc = + type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R : + type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R : + type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R : + type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R : + type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R : + type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R : + type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R : + type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R : + type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R : + 0; + int mode = + flags == INTER_LINEAR ? IPPI_INTER_LINEAR : + flags == INTER_NEAREST ? IPPI_INTER_NN : + flags == INTER_CUBIC ? IPPI_INTER_CUBIC : + 0; + if( mode && ippFunc ) + { + double coeffs[3][3]; + for( int i = 0; i < 3; i++ ) + { + for( int j = 0; j < 3; j++ ) + { + coeffs[i][j] = matM.at(i, j); + } + } + bool ok; + Range range(0, dst.rows); + IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; + } + } +#endif Range range(0, dst.rows); warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); From e85e4d3ab90da55d0461c63f1502278ee34f4143 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 2 Sep 2013 18:34:50 +0400 Subject: [PATCH 19/41] fixed bug in IPP-accelerated morphology; added several IPP imgwarp functions (by Klim) --- modules/imgproc/src/imgwarp.cpp | 57 +++++++++++++++------------------ modules/imgproc/src/morph.cpp | 29 ++++++++--------- 2 files changed, 39 insertions(+), 47 deletions(-) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 4c9063da5..a4fda282d 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -52,7 +52,7 @@ #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) static IppStatus sts = ippInit(); -#endif +#endif namespace cv { @@ -76,15 +76,15 @@ namespace cv { if( channels == 1 ) { - switch( depth ) - { - case CV_8U: + switch( depth ) + { + case CV_8U: return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0; case CV_16U: return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0; case CV_32F: return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0; - } + } } else { @@ -98,7 +98,7 @@ namespace cv return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R); case CV_32F: return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R); - } + } } else if( channels == 4 ) { @@ -110,7 +110,7 @@ namespace cv return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R); case CV_32F: return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R); - } + } } } return false; @@ -1848,17 +1848,12 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, int depth = src.depth(), cn = src.channels(); double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y; int k, sx, sy, dx, dy; - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - int mode = - interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : - interpolation == INTER_NEAREST ? IPPI_INTER_NN : - interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : - interpolation == INTER_AREA && inv_scale_x * inv_scale_y > 1 ? IPPI_INTER_NN : - 0; + int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0; int type = src.type(); - ippiResizeSqrPixelFunc ippFunc = - type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R : + ippiResizeSqrPixelFunc ippFunc = + type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R : type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R : type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R : type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R : @@ -1869,9 +1864,9 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R : type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R : type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R : - type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R : + type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R : 0; - if( ippFunc && mode ) + if( ippFunc && mode != 0 ) { bool ok; Range range(0, src.rows); @@ -3485,12 +3480,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) int depth = src.depth(); int channels = src.channels(); - if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && + if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && + ( channels == 1 || channels == 3 || channels == 4 ) && ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) ) { int type = src.type(); - ippiWarpAffineBackFunc ippFunc = + ippiWarpAffineBackFunc ippFunc = type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R : type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R : type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R : @@ -3501,10 +3496,10 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R : type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R : 0; - int mode = + int mode = flags == INTER_LINEAR ? IPPI_INTER_LINEAR : flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : + flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 0; if( mode && ippFunc ) { @@ -3525,7 +3520,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, } } #endif - + for( x = 0; x < dst.cols; x++ ) { adelta[x] = saturate_cast(M[0]*x*AB_SCALE); @@ -3638,7 +3633,7 @@ class IPPwarpPerspectiveInvoker : public ParallelLoopBody { public: - IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : + IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) { *ok = true; @@ -3706,16 +3701,16 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, if( !(flags & WARP_INVERSE_MAP) ) invert(matM, matM); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) int depth = src.depth(); int channels = src.channels(); - if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && + if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && + ( channels == 1 || channels == 3 || channels == 4 ) && ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) ) { int type = src.type(); - ippiWarpPerspectiveBackFunc ippFunc = + ippiWarpPerspectiveBackFunc ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R : type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R : type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R : @@ -3726,10 +3721,10 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R : type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R : 0; - int mode = + int mode = flags == INTER_LINEAR ? IPPI_INTER_LINEAR : flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : + flags == INTER_CUBIC ? IPPI_INTER_CUBIC : 0; if( mode && ippFunc ) { diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index b8bb7cf38..19636bc96 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1213,11 +1213,10 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne } static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, - InputArray _kernel, - const Point &anchor, int iterations, + const Mat& _kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue) { - Mat src = _src.getMat(), kernel = _kernel.getMat(); + Mat src = _src.getMat(), kernel = _kernel; if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) || !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) ) || !( op == MORPH_DILATE || op == MORPH_ERODE) ) @@ -1248,9 +1247,6 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, } Size ksize = kernel.data ? kernel.size() : Size(3,3); - Point normanchor = normalizeAnchor(anchor, ksize); - - CV_Assert( normanchor.inside(Rect(0, 0, ksize.width, ksize.height)) ); _dst.create( src.size(), src.type() ); Mat dst = _dst.getMat(); @@ -1265,7 +1261,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, if( !kernel.data ) { ksize = Size(1+iterations*2,1+iterations*2); - normanchor = Point(iterations, iterations); + anchor = Point(iterations, iterations); rectKernel = true; iterations = 1; } @@ -1273,7 +1269,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, { ksize = Size(ksize.width + (iterations-1)*(ksize.width-1), ksize.height + (iterations-1)*(ksize.height-1)), - normanchor = Point(normanchor.x*iterations, normanchor.y*iterations); + anchor = Point(anchor.x*iterations, anchor.y*iterations); kernel = Mat(); rectKernel = true; iterations = 1; @@ -1283,7 +1279,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, if( iterations > 1 ) return false; - return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel ); + return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel ); } #endif @@ -1292,18 +1288,19 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { - -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - if( IPPMorphOp(op, _src, _dst, _kernel, anchor, iterations, borderType, borderValue) ) - return; -#endif - - Mat src = _src.getMat(), kernel = _kernel.getMat(); + Mat kernel = _kernel.getMat(); Size ksize = kernel.data ? kernel.size() : Size(3,3); anchor = normalizeAnchor(anchor, ksize); CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) ); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) ) + return; +#endif + + Mat src = _src.getMat(); + _dst.create( src.size(), src.type() ); Mat dst = _dst.getMat(); From 7b3e3f69fb8600a43d51a464a812f3c00d770194 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 3 Sep 2013 09:31:13 +0800 Subject: [PATCH 20/41] Modify according to @alalek. --- modules/ocl/src/gemm.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 6e04baca4..a9533b5d8 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -81,11 +81,14 @@ static Mutex cs; void cv::ocl::clBlasSetup() { - AutoLock al(cs); if(!clBlasInitialized) { - openCLSafeCall(clAmdBlasSetup()); - clBlasInitialized = true; + AutoLock al(cs); + if(!clBlasInitialized) + { + openCLSafeCall(clAmdBlasSetup()); + clBlasInitialized = true; + } } } From ea165394484460f6907c32d447003baab89eaeb4 Mon Sep 17 00:00:00 2001 From: pengxiao Date: Tue, 3 Sep 2013 10:30:37 +0800 Subject: [PATCH 21/41] Fix a crash of ocl program if clAmdBlas is not linked. --- modules/ocl/src/gemm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index a9533b5d8..7e31cdbf4 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -69,7 +69,7 @@ void cv::ocl::clBlasSetup() void cv::ocl::clBlasTeardown() { - CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); + //intentionally do nothing } #else From a70bdfc13f8279ee86be1a0281fbd9ee357245b7 Mon Sep 17 00:00:00 2001 From: yao Date: Tue, 3 Sep 2013 15:02:18 +0800 Subject: [PATCH 22/41] a little fix to tests and sample --- modules/ocl/test/test_imgproc.cpp | 24 ++++++++++-------------- samples/ocl/clahe.cpp | 4 ++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 46cd257c8..f723e13bc 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -1396,14 +1396,10 @@ TEST_P(calcHist, Mat) } /////////////////////////////////////////////////////////////////////////////////////////////////////// // CLAHE -namespace -{ - IMPLEMENT_PARAM_CLASS(ClipLimit, double) -} -PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit) +PARAM_TEST_CASE(CLAHE, cv::Size, double) { - cv::Size size; + cv::Size gridSize; double clipLimit; cv::Mat src; @@ -1414,22 +1410,22 @@ PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit) virtual void SetUp() { - size = GET_PARAM(0); + gridSize = GET_PARAM(0); clipLimit = GET_PARAM(1); cv::RNG &rng = TS::ptr()->get_rng(); - src = randomMat(rng, size, CV_8UC1, 0, 256, false); + src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false); g_src.upload(src); } }; TEST_P(CLAHE, Accuracy) { - cv::Ptr clahe = cv::ocl::createCLAHE(clipLimit); + cv::Ptr clahe = cv::ocl::createCLAHE(clipLimit, gridSize); clahe->apply(g_src, g_dst); cv::Mat dst(g_dst); - cv::Ptr clahe_gold = cv::createCLAHE(clipLimit); + cv::Ptr clahe_gold = cv::createCLAHE(clipLimit, gridSize); clahe_gold->apply(src, dst_gold); EXPECT_MAT_NEAR(dst_gold, dst, 1.0); @@ -1725,10 +1721,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( ONE_TYPE(CV_32SC1) //no use )); -INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine( - Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), - Values(0.0, 40.0))); +INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE, Combine( + Values(cv::Size(4, 4), cv::Size(32, 8), cv::Size(8, 64)), + Values(0.0, 10.0, 62.0, 300.0))); -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); +INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES); #endif // HAVE_OPENCL diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp index c2f4b27bf..1fbf49fac 100644 --- a/samples/ocl/clahe.cpp +++ b/samples/ocl/clahe.cpp @@ -44,6 +44,10 @@ int main(int argc, char** argv) namedWindow("CLAHE"); createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback); createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback); + + vector info; + CV_Assert(ocl::getDevice(info)); + Mat frame, outframe; ocl::oclMat d_outframe; From e528f39deff8f1b31ff53c9e3676e07b8227d0ed Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Wed, 4 Sep 2013 14:37:46 +0800 Subject: [PATCH 23/41] Added the performance test for mog and mog2. --- modules/ocl/perf/perf_bgfg.cpp | 337 +++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 modules/ocl/perf/perf_bgfg.cpp diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp new file mode 100644 index 000000000..188dffa13 --- /dev/null +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -0,0 +1,337 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "perf_precomp.hpp" + +///////////// PyrLKOpticalFlow //////////////////////// + +using namespace perf; +using std::tr1::get; +using std::tr1::tuple; +using std::tr1::make_tuple; + +#if defined(HAVE_XINE) || \ + defined(HAVE_GSTREAMER) || \ + defined(HAVE_QUICKTIME) || \ + defined(HAVE_AVFOUNDATION) || \ + defined(HAVE_FFMPEG) || \ + defined(WIN32) /* assume that we have ffmpeg */ + +# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 +#else +# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 +#endif + +#if BUILD_WITH_VIDEO_INPUT_SUPPORT + +typedef tuple VideoMOGParamType; +typedef TestBaseWithParam VideoMOGFixture; + +PERF_TEST_P(VideoMOGFixture, Video_MOG, + ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"), + ::testing::Values(1, 3), + ::testing::Values(0.0, 0.01))) +{ + VideoMOGParamType params = GetParam(); + + const string inputFile = perf::TestBase::getDataPath(get<0>(params)); + const int cn = get<1>(params); + const float learningRate = static_cast(get<2>(params)); + + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + if(RUN_PLAIN_IMPL) + { + cv::BackgroundSubtractorMOG mog; + cv::Mat foreground; + + mog(frame, foreground, learningRate); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + + cv::swap(temp, frame); + + TEST_CYCLE() + mog(frame, foreground, learningRate); + + SANITY_CHECK(foreground); + } + }else if(RUN_OCL_IMPL) + { + cv::ocl::oclMat d_frame(frame); + cv::ocl::MOG d_mog; + cv::ocl::oclMat foreground; + cv::Mat foreground_h; + + d_mog(d_frame, foreground, learningRate); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + d_frame.upload(frame); + + OCL_TEST_CYCLE() + d_mog(d_frame, foreground, learningRate); + + foreground.download(foreground_h); + SANITY_CHECK(foreground_h); + } + }else + OCL_PERF_ELSE +} +#endif + +#if BUILD_WITH_VIDEO_INPUT_SUPPORT + +typedef tuple VideoMOG2ParamType; +typedef TestBaseWithParam VideoMOG2Fixture; + +PERF_TEST_P(VideoMOG2Fixture, Video_MOG2, + ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"), + ::testing::Values(1, 3))) +{ + VideoMOG2ParamType params = GetParam(); + + const string inputFile = perf::TestBase::getDataPath(get<0>(params)); + const int cn = get<1>(params); + + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + if(RUN_PLAIN_IMPL) + { + cv::BackgroundSubtractorMOG2 mog2; + cv::Mat foreground; + + mog2.set("detectShadows", false); + mog2(frame, foreground); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + + cv::swap(temp, frame); + + TEST_CYCLE() + mog2(frame, foreground); + + SANITY_CHECK(foreground); + } + }else if(RUN_OCL_IMPL) + { + cv::ocl::oclMat d_frame(frame); + cv::ocl::MOG2 d_mog2; + cv::ocl::oclMat foreground; + cv::Mat foreground_h; + + d_mog2(d_frame, foreground); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + d_frame.upload(frame); + + OCL_TEST_CYCLE() + d_mog2(d_frame, foreground); + + foreground.download(foreground_h); + SANITY_CHECK(foreground_h); + } + }else + OCL_PERF_ELSE +} +#endif + +#if BUILD_WITH_VIDEO_INPUT_SUPPORT + +typedef TestBaseWithParam Video_MOG2GetBackgroundImage; + +PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2, + ::testing::Combine(::testing::Values("768x576.avi", "1920x1080.avi"), + ::testing::Values(1, 3))) +{ + VideoMOG2ParamType params = GetParam(); + + const string inputFile = perf::TestBase::getDataPath(get<0>(params)); + const int cn = get<1>(params); + + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + + cv::Mat frame; + + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + if(RUN_PLAIN_IMPL) + { + cv::BackgroundSubtractorMOG2 mog2; + cv::Mat foreground; + + mog2.set("detectShadows", false); + mog2(frame, foreground); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + + cv::swap(temp, frame); + + TEST_CYCLE() + mog2(frame, foreground); + } + cv::Mat background; + TEST_CYCLE() + mog2.getBackgroundImage(background); + + SANITY_CHECK(background); + }else if(RUN_OCL_IMPL) + { + cv::ocl::oclMat d_frame(frame); + cv::ocl::MOG2 d_mog2; + cv::ocl::oclMat foreground; + cv::Mat background_h; + + d_mog2(d_frame, foreground); + + for (int i = 0; i < 10; ++i) + { + cap >> frame; + ASSERT_FALSE(frame.empty()); + + cv::Mat temp; + if (cn == 1) + cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); + cv::swap(temp, frame); + + d_frame.upload(frame); + d_mog2(d_frame, foreground); + } + cv::ocl::oclMat background; + OCL_TEST_CYCLE() + d_mog2.getBackgroundImage(background); + + background.download(background_h); + SANITY_CHECK(background_h); + }else + OCL_PERF_ELSE +} +#endif + From dd73016c8ba1b70dd371c44995f51f3c206c2636 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Wed, 4 Sep 2013 15:00:36 +0800 Subject: [PATCH 24/41] Removed whitespace. --- modules/ocl/perf/perf_bgfg.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index 188dffa13..185f704b3 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -79,7 +79,7 @@ PERF_TEST_P(VideoMOGFixture, Video_MOG, const string inputFile = perf::TestBase::getDataPath(get<0>(params)); const int cn = get<1>(params); const float learningRate = static_cast(get<2>(params)); - + cv::VideoCapture cap(inputFile); ASSERT_TRUE(cap.isOpened()); @@ -296,7 +296,7 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2, mog2(frame, foreground); } cv::Mat background; - TEST_CYCLE() + TEST_CYCLE() mog2.getBackgroundImage(background); SANITY_CHECK(background); @@ -333,5 +333,4 @@ PERF_TEST_P(Video_MOG2GetBackgroundImage, Video_MOG2, }else OCL_PERF_ELSE } -#endif - +#endif \ No newline at end of file From f538e503853b42c43ddf5ffb59338e2c3e38c805 Mon Sep 17 00:00:00 2001 From: ilya-lavrenov Date: Wed, 4 Sep 2013 11:56:22 +0400 Subject: [PATCH 25/41] updated .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4fd406edd..0bcffd726 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ tegra/ .sw[a-z] .*.swp tags +*.autosave From a9975b144a22e76228125eb0a25f78ec13db6815 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 4 Sep 2013 14:06:34 +0400 Subject: [PATCH 26/41] Fixed a bug in FLANN resulting in uninitialized accesses. This is fixed upstream in mariusmuja/flann@b615f26, but that fix would break binary compatibility, so I had to make a different one. Since the bug isn't quite obvious, here's an explanation. In the const version of any::cast, if policy is a small_any_policy, its get_value returns its input argument. So r becomes a pointer to obj, and the return value is a reference to a local variable, which is invalidated when the function exits. --- modules/flann/include/opencv2/flann/any.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/flann/include/opencv2/flann/any.h b/modules/flann/include/opencv2/flann/any.h index 89189c64e..7140b2a08 100644 --- a/modules/flann/include/opencv2/flann/any.h +++ b/modules/flann/include/opencv2/flann/any.h @@ -255,8 +255,7 @@ public: const T& cast() const { if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast(); - void* obj = const_cast(object); - T* r = reinterpret_cast(policy->get_value(&obj)); + T* r = reinterpret_cast(policy->get_value(const_cast(&object))); return *r; } From 6ebfa87181af66511098c49b317493bdf854abaf Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 4 Sep 2013 16:13:27 +0400 Subject: [PATCH 27/41] Delete a bunch more trailing whitespace that slipped through the cracks. --- modules/imgproc/src/color.cpp | 38 +++++++++---------- modules/imgproc/src/smooth.cpp | 2 +- modules/java/android_test/AndroidManifest.xml | 4 +- modules/java/android_test/res/layout/main.xml | 6 +-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 9469925ec..bac63ad6f 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -398,7 +398,7 @@ private: struct IPPColor2GrayFunctor { - IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func) + IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func) { coeffs[0] = 0.114f; coeffs[1] = 0.587f; @@ -454,7 +454,7 @@ struct IPPReorderGeneralFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - Mat temp; + Mat temp; temp.create(rows, cols, CV_MAKETYPE(depth, 3)); if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0) return false; @@ -478,7 +478,7 @@ struct IPPGeneralReorderFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - Mat temp; + Mat temp; temp.create(rows, cols, CV_MAKETYPE(depth, 3)); if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0) return false; @@ -3651,8 +3651,8 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create( sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); - -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( code == CV_BGR2BGRA || code == CV_RGB2RGBA) { if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) @@ -3737,7 +3737,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 3 || scn == 4 ); _dst.create(sz, CV_MAKETYPE(depth, 1)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( code == CV_BGR2GRAY ) { @@ -3789,13 +3789,13 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 1 && (dcn == 3 || dcn == 4)); _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( code == CV_GRAY2BGR ) { if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) return; - } + } else if( code == CV_GRAY2BGRA ) { if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) @@ -3882,7 +3882,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, 3)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( code == CV_BGR2XYZ && scn == 3 ) { @@ -3898,7 +3898,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) ) return; - } + } else if( code == CV_RGB2XYZ && scn == 4 ) { if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) ) @@ -3921,7 +3921,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( code == CV_XYZ2BGR && dcn == 3 ) { @@ -3964,7 +3964,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, 3)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( depth == CV_8U || depth == CV_16U ) { @@ -3982,12 +3982,12 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) ) return; - } + } else if( code == CV_RGB2HSV_FULL && scn == 4 ) { if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) return; - } + } else if( code == CV_BGR2HLS_FULL && scn == 3 ) { if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) @@ -4002,7 +4002,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) return; - } + } else if( code == CV_RGB2HLS_FULL && scn == 4 ) { if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) @@ -4045,7 +4045,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); - + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( depth == CV_8U || depth == CV_16U ) { @@ -4063,12 +4063,12 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) return; - } + } else if( code == CV_HSV2RGB_FULL && dcn == 4 ) { if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) return; - } + } else if( code == CV_HLS2BGR_FULL && dcn == 3 ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) @@ -4083,7 +4083,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) return; - } + } else if( code == CV_HLS2RGB_FULL && dcn == 4 ) { if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 3dad2c087..3d42e3be5 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1951,7 +1951,7 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d, if( ok ) return; } #endif - + vector _color_weight(cn*256); vector _space_weight(d*d); vector _space_ofs(d*d); diff --git a/modules/java/android_test/AndroidManifest.xml b/modules/java/android_test/AndroidManifest.xml index dfe25fff0..81f2bc134 100644 --- a/modules/java/android_test/AndroidManifest.xml +++ b/modules/java/android_test/AndroidManifest.xml @@ -3,7 +3,7 @@ package="org.opencv.test" android:versionCode="1" android:versionName="1.0"> - +