From 9b3c318e8536ce58d3007d0b298c338b56aecc56 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 26 Jun 2013 08:43:23 +0800 Subject: [PATCH 01/18] Add super resolution's OpenCL implementation. Accuracy and performance tests are also provided. --- modules/superres/CMakeLists.txt | 2 +- .../include/opencv2/superres/optical_flow.hpp | 2 + .../include/opencv2/superres/superres.hpp | 1 + modules/superres/perf/perf_superres_ocl.cpp | 138 ++++ modules/superres/src/btv_l1_ocl.cpp | 748 ++++++++++++++++++ modules/superres/src/frame_source.cpp | 14 +- modules/superres/src/input_array_utility.cpp | 121 ++- modules/superres/src/input_array_utility.hpp | 7 + modules/superres/src/opencl/superres_btvl1.cl | 261 ++++++ modules/superres/src/optical_flow.cpp | 192 +++++ modules/superres/src/precomp.hpp | 4 + modules/superres/test/test_superres.cpp | 9 +- 12 files changed, 1483 insertions(+), 16 deletions(-) create mode 100644 modules/superres/perf/perf_superres_ocl.cpp create mode 100644 modules/superres/src/btv_l1_ocl.cpp create mode 100644 modules/superres/src/opencl/superres_btvl1.cl diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 6c6022c72..44e9dc0f3 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -4,4 +4,4 @@ endif() set(the_description "Super Resolution") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef) -ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui) +ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_ocl) diff --git a/modules/superres/include/opencv2/superres/optical_flow.hpp b/modules/superres/include/opencv2/superres/optical_flow.hpp index 7a0ed833f..13ea9905c 100644 --- a/modules/superres/include/opencv2/superres/optical_flow.hpp +++ b/modules/superres/include/opencv2/superres/optical_flow.hpp @@ -63,10 +63,12 @@ namespace cv CV_EXPORTS Ptr createOptFlow_DualTVL1(); CV_EXPORTS Ptr createOptFlow_DualTVL1_GPU(); + CV_EXPORTS Ptr createOptFlow_DualTVL1_OCL(); CV_EXPORTS Ptr createOptFlow_Brox_GPU(); CV_EXPORTS Ptr createOptFlow_PyrLK_GPU(); + CV_EXPORTS Ptr createOptFlow_PyrLK_OCL(); } } diff --git a/modules/superres/include/opencv2/superres/superres.hpp b/modules/superres/include/opencv2/superres/superres.hpp index 1245c122a..8daeb5ba0 100644 --- a/modules/superres/include/opencv2/superres/superres.hpp +++ b/modules/superres/include/opencv2/superres/superres.hpp @@ -92,6 +92,7 @@ namespace cv // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. CV_EXPORTS Ptr createSuperResolution_BTVL1(); CV_EXPORTS Ptr createSuperResolution_BTVL1_GPU(); + CV_EXPORTS Ptr createSuperResolution_BTVL1_OCL(); } } diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp new file mode 100644 index 000000000..524be2dcf --- /dev/null +++ b/modules/superres/perf/perf_superres_ocl.cpp @@ -0,0 +1,138 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" +#include "opencv2/ocl/ocl.hpp" + +#ifdef HAVE_OPENCL + +using namespace std; +using namespace testing; +using namespace perf; +using namespace cv; +using namespace cv::superres; + +namespace +{ + class OneFrameSource_OCL : public FrameSource + { + public: + explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {} + + void nextFrame(OutputArray) + { + } + + void nextFrame(ocl::oclMat& frame) + { + frame_.copyTo(frame); + } + void reset() + { + } + + private: + ocl::oclMat frame_; + }; + + + class ZeroOpticalFlowOCL : public DenseOpticalFlowExt + { + public: + void calc(ocl::oclMat& frame0, ocl::oclMat&, ocl::oclMat& flow1, ocl::oclMat& flow2) + { + cv::Size size = frame0.size(); + + flow1.create(size, CV_32FC1); + flow2.create(size, CV_32FC1); + + flow1.setTo(Scalar::all(0)); + flow2.setTo(Scalar::all(0)); + } + + void collectGarbage() + { + } + }; +} + +PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, + Combine(Values(szSmall64, szSmall128), + Values(MatType(CV_8UC1), MatType(CV_8UC3)))) +{ + std::vectorinfo; + cv::ocl::getDevice(info); + + declare.time(5 * 60); + + const Size size = get<0>(GetParam()); + const int type = get<1>(GetParam()); + + Mat frame(size, type); + declare.in(frame, WARMUP_RNG); + + ocl::oclMat frame_ocl; + frame_ocl.upload(frame); + + + const int scale = 2; + const int iterations = 50; + const int temporalAreaRadius = 1; + Ptr opticalFlowOcl(new ZeroOpticalFlowOCL); + + Ptr superRes_ocl = createSuperResolution_BTVL1_OCL(); + + superRes_ocl->set("scale", scale); + superRes_ocl->set("iterations", iterations); + superRes_ocl->set("temporalAreaRadius", temporalAreaRadius); + superRes_ocl->set("opticalFlow", opticalFlowOcl); + + superRes_ocl->setInput(new OneFrameSource_OCL(frame_ocl)); + + ocl::oclMat dst_ocl; + superRes_ocl->nextFrame(dst_ocl); + + TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl); + frame_ocl.release(); + CPU_SANITY_CHECK(dst_ocl); +} +#endif \ No newline at end of file diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp new file mode 100644 index 000000000..5f9e32675 --- /dev/null +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -0,0 +1,748 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution. +// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. + +#include "precomp.hpp" + +#if !defined(HAVE_OPENCL) || !defined(HAVE_OPENCV_OCL) + +cv::Ptr cv::superres::createSuperResolution_BTVL1_OCL() +{ + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); + return Ptr(); +} + +#else + +using namespace std; +using namespace cv; +using namespace cv::ocl; +using namespace cv::superres; +using namespace cv::superres::detail; + +namespace cv +{ + namespace ocl + { + extern const char* superres_btvl1; + + float* btvWeights_ = NULL; + size_t btvWeights_size = 0; + } +} + +namespace btv_l1_device_ocl +{ + void buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, + const oclMat& backwardMotionX, const oclMat& bacwardMotionY, + oclMat& forwardMapX, oclMat& forwardMapY, + oclMat& backwardMapX, oclMat& backwardMapY); + + void upscale(const oclMat& src, oclMat& dst, int scale); + + float diffSign(float a, float b); + + Point3f diffSign(Point3f a, Point3f b); + + void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst); + + void calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize); +} + +void btv_l1_device_ocl::buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, + const oclMat& backwardMotionX, const oclMat& backwardMotionY, + oclMat& forwardMapX, oclMat& forwardMapY, + oclMat& backwardMapX, oclMat& backwardMapY) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {forwardMapX.cols, forwardMapX.rows, 1}; + + int forwardMotionX_step = (int)(forwardMotionX.step/forwardMotionX.elemSize()); + int forwardMotionY_step = (int)(forwardMotionY.step/forwardMotionY.elemSize()); + int backwardMotionX_step = (int)(backwardMotionX.step/backwardMotionX.elemSize()); + int backwardMotionY_step = (int)(backwardMotionY.step/backwardMotionY.elemSize()); + int forwardMapX_step = (int)(forwardMapX.step/forwardMapX.elemSize()); + int forwardMapY_step = (int)(forwardMapY.step/forwardMapY.elemSize()); + int backwardMapX_step = (int)(backwardMapX.step/backwardMapX.elemSize()); + int backwardMapY_step = (int)(backwardMapY.step/backwardMapY.elemSize()); + + String kernel_name = "buildMotionMapsKernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapY.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapY_step)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); +} + +void btv_l1_device_ocl::upscale(const oclMat& src, oclMat& dst, int scale) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src.cols, src.rows, 1}; + + int src_step = (int)(src.step/src.elemSize()); + int dst_step = (int)(dst.step/dst.elemSize()); + + String kernel_name = "upscaleKernel"; + vector< pair > args; + + int cn = src.oclchannels(); + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&scale)); + args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); + +} + +float btv_l1_device_ocl::diffSign(float a, float b) +{ + return a > b ? 1.0f : a < b ? -1.0f : 0.0f; +} + +Point3f btv_l1_device_ocl::diffSign(Point3f a, Point3f b) +{ + return Point3f( + a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f, + a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f, + a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f + ); +} + +void btv_l1_device_ocl::diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) +{ + Context* clCxt = Context::getContext(); + + oclMat src1_ = src1.reshape(1); + oclMat src2_ = src2.reshape(1); + oclMat dst_ = dst.reshape(1); + + int src1_step = (int)(src1_.step/src1_.elemSize()); + int src2_step = (int)(src2_.step/src2_.elemSize()); + int dst_step = (int)(dst_.step/dst_.elemSize()); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src1_.cols, src1_.rows, 1}; + + String kernel_name = "diffSignKernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src1_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&src2_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src2_step)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); +} + +void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize) +{ + Context* clCxt = Context::getContext(); + + oclMat src_ = src.reshape(1); + oclMat dst_ = dst.reshape(1); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src.cols, src.rows, 1}; + + int src_step = (int)(src_.step/src_.elemSize()); + int dst_step = (int)(dst_.step/dst_.elemSize()); + + String kernel_name = "calcBtvRegularizationKernel"; + vector< pair > args; + + int cn = src.oclchannels(); + + cl_mem c_btvRegWeights; + size_t count = btvWeights_size * sizeof(float); + c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count); + int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); + CV_Assert(cl_safe_check == CL_SUCCESS); + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&ksize)); + args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&c_btvRegWeights)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); + cl_safe_check = clReleaseMemObject(c_btvRegWeights); + CV_Assert(cl_safe_check == CL_SUCCESS); +} + +namespace +{ + void calcRelativeMotions(const vector >& forwardMotions, const vector >& backwardMotions, + vector >& relForwardMotions, vector >& relBackwardMotions, + int baseIdx, Size size) + { + const int count = static_cast(forwardMotions.size()); + + relForwardMotions.resize(count); + relForwardMotions[baseIdx].first.create(size, CV_32FC1); + relForwardMotions[baseIdx].first.setTo(Scalar::all(0)); + relForwardMotions[baseIdx].second.create(size, CV_32FC1); + relForwardMotions[baseIdx].second.setTo(Scalar::all(0)); + + relBackwardMotions.resize(count); + relBackwardMotions[baseIdx].first.create(size, CV_32FC1); + relBackwardMotions[baseIdx].first.setTo(Scalar::all(0)); + relBackwardMotions[baseIdx].second.create(size, CV_32FC1); + relBackwardMotions[baseIdx].second.setTo(Scalar::all(0)); + + for (int i = baseIdx - 1; i >= 0; --i) + { + ocl::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first); + ocl::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second); + + ocl::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first); + ocl::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second); + } + + for (int i = baseIdx + 1; i < count; ++i) + { + ocl::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first); + ocl::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second); + + ocl::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first); + ocl::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second); + } + } + + void upscaleMotions(const vector >& lowResMotions, vector >& highResMotions, int scale) + { + highResMotions.resize(lowResMotions.size()); + + for (size_t i = 0; i < lowResMotions.size(); ++i) + { + ocl::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_LINEAR); + ocl::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_LINEAR); + + ocl::multiply(scale, highResMotions[i].first, highResMotions[i].first); + ocl::multiply(scale, highResMotions[i].second, highResMotions[i].second); + } + } + + void buildMotionMaps(const pair& forwardMotion, const pair& backwardMotion, + pair& forwardMap, pair& backwardMap) + { + forwardMap.first.create(forwardMotion.first.size(), CV_32FC1); + forwardMap.second.create(forwardMotion.first.size(), CV_32FC1); + + backwardMap.first.create(forwardMotion.first.size(), CV_32FC1); + backwardMap.second.create(forwardMotion.first.size(), CV_32FC1); + + btv_l1_device_ocl::buildMotionMaps(forwardMotion.first, forwardMotion.second, + backwardMotion.first, backwardMotion.second, + forwardMap.first, forwardMap.second, + backwardMap.first, backwardMap.second); + } + + void upscale(const oclMat& src, oclMat& dst, int scale) + { + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); + + dst.create(src.rows * scale, src.cols * scale, src.type()); + dst.setTo(Scalar::all(0)); + + btv_l1_device_ocl::upscale(src, dst, scale); + } + + void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) + { + dst.create(src1.size(), src1.type()); + + btv_l1_device_ocl::diffSign(src1, src2, dst); + } + + void calcBtvWeights(int btvKernelSize, double alpha, vector& btvWeights) + { + const size_t size = btvKernelSize * btvKernelSize; + + btvWeights.resize(size); + + const int ksize = (btvKernelSize - 1) / 2; + const float alpha_f = static_cast(alpha); + + for (int m = 0, ind = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++ind) + btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l)); + } + + btvWeights_ = &btvWeights[0]; + btvWeights_size = size; + } + + void calcBtvRegularization(const oclMat& src, oclMat& dst, int btvKernelSize) + { + dst.create(src.size(), src.type()); + dst.setTo(Scalar::all(0)); + + const int ksize = (btvKernelSize - 1) / 2; + + btv_l1_device_ocl::calcBtvRegularization(src, dst, ksize); + } + + class BTVL1_OCL_Base + { + public: + BTVL1_OCL_Base(); + + void process(const vector& src, oclMat& dst, + const vector >& forwardMotions, const vector >& backwardMotions, + int baseIdx); + + void collectGarbage(); + + protected: + int scale_; + int iterations_; + double lambda_; + double tau_; + double alpha_; + int btvKernelSize_; + int blurKernelSize_; + double blurSigma_; + Ptr opticalFlow_; + + private: + vector > filters_; + int curBlurKernelSize_; + double curBlurSigma_; + int curSrcType_; + + vector btvWeights_; + int curBtvKernelSize_; + double curAlpha_; + + vector > lowResForwardMotions_; + vector > lowResBackwardMotions_; + + vector > highResForwardMotions_; + vector > highResBackwardMotions_; + + vector > forwardMaps_; + vector > backwardMaps_; + + oclMat highRes_; + + vector diffTerms_; + vector a_, b_, c_; + oclMat regTerm_; + }; + + BTVL1_OCL_Base::BTVL1_OCL_Base() + { + scale_ = 4; + iterations_ = 180; + lambda_ = 0.03; + tau_ = 1.3; + alpha_ = 0.7; + btvKernelSize_ = 7; + blurKernelSize_ = 5; + blurSigma_ = 0.0; + opticalFlow_ = createOptFlow_DualTVL1_OCL(); + + curBlurKernelSize_ = -1; + curBlurSigma_ = -1.0; + curSrcType_ = -1; + + curBtvKernelSize_ = -1; + curAlpha_ = -1.0; + } + + void BTVL1_OCL_Base::process(const vector& src, oclMat& dst, + const vector >& forwardMotions, const vector >& backwardMotions, + int baseIdx) + { + CV_Assert( scale_ > 1 ); + CV_Assert( iterations_ > 0 ); + CV_Assert( tau_ > 0.0 ); + CV_Assert( alpha_ > 0.0 ); + CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 ); + CV_Assert( blurKernelSize_ > 0 ); + CV_Assert( blurSigma_ >= 0.0 ); + + // update blur filter and btv weights + + if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_) + { + filters_.resize(src.size()); + for (size_t i = 0; i < src.size(); ++i) + filters_[i] = cv::ocl::createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); + curBlurKernelSize_ = blurKernelSize_; + curBlurSigma_ = blurSigma_; + curSrcType_ = src[0].type(); + } + + if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_) + { + calcBtvWeights(btvKernelSize_, alpha_, btvWeights_); + curBtvKernelSize_ = btvKernelSize_; + curAlpha_ = alpha_; + } + + // calc motions between input frames + + calcRelativeMotions(forwardMotions, backwardMotions, + lowResForwardMotions_, lowResBackwardMotions_, + baseIdx, src[0].size()); + + upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_); + upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_); + + forwardMaps_.resize(highResForwardMotions_.size()); + backwardMaps_.resize(highResForwardMotions_.size()); + for (size_t i = 0; i < highResForwardMotions_.size(); ++i) + { + buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]); + } + // initial estimation + + const Size lowResSize = src[0].size(); + const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_); + + ocl::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_LINEAR); + + // iterations + + diffTerms_.resize(src.size()); + a_.resize(src.size()); + b_.resize(src.size()); + c_.resize(src.size()); + + for (int i = 0; i < iterations_; ++i) + { + for (size_t k = 0; k < src.size(); ++k) + { + diffTerms_[k].create(highRes_.size(), highRes_.type()); + a_[k].create(highRes_.size(), highRes_.type()); + b_[k].create(highRes_.size(), highRes_.type()); + c_[k].create(lowResSize, highRes_.type()); + + // a = M * Ih + ocl::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); + // b = HM * Ih + filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1)); + // c = DHF * Ih + ocl::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST); + + diffSign(src[k], c_[k], c_[k]); + + // a = Dt * diff + upscale(c_[k], a_[k], scale_); + // b = HtDt * diff + filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1)); + // diffTerm = MtHtDt * diff + ocl::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); + } + + if (lambda_ > 0) + { + calcBtvRegularization(highRes_, regTerm_, btvKernelSize_); + ocl::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_); + } + + for (size_t k = 0; k < src.size(); ++k) + { + ocl::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_); + } + } + + Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_); + highRes_(inner).copyTo(dst); + } + + void BTVL1_OCL_Base::collectGarbage() + { + filters_.clear(); + + lowResForwardMotions_.clear(); + lowResBackwardMotions_.clear(); + + highResForwardMotions_.clear(); + highResBackwardMotions_.clear(); + + forwardMaps_.clear(); + backwardMaps_.clear(); + + highRes_.release(); + + diffTerms_.clear(); + a_.clear(); + b_.clear(); + c_.clear(); + regTerm_.release(); + } + + //////////////////////////////////////////////////////////// + + class BTVL1_OCL : public SuperResolution, private BTVL1_OCL_Base + { + public: + AlgorithmInfo* info() const; + + BTVL1_OCL(); + + void collectGarbage(); + + protected: + void initImpl(Ptr& frameSource); + void processImpl(Ptr& frameSource, OutputArray output); + + private: + int temporalAreaRadius_; + + void readNextFrame(Ptr& frameSource); + void processFrame(int idx); + + oclMat curFrame_; + oclMat prevFrame_; + + vector frames_; + vector > forwardMotions_; + vector > backwardMotions_; + vector outputs_; + + int storePos_; + int procPos_; + int outPos_; + + vector srcFrames_; + vector > srcForwardMotions_; + vector > srcBackwardMotions_; + oclMat finalOutput_; + }; + + CV_INIT_ALGORITHM(BTVL1_OCL, "SuperResolution.BTVL1_OCL", + obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor."); + obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count."); + obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method."); + obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term."); + obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV."); + obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter."); + obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size."); + obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma."); + obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area."); + obj.info()->addParam(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm.")); + + BTVL1_OCL::BTVL1_OCL() + { + temporalAreaRadius_ = 4; + } + + void BTVL1_OCL::collectGarbage() + { + curFrame_.release(); + prevFrame_.release(); + + frames_.clear(); + forwardMotions_.clear(); + backwardMotions_.clear(); + outputs_.clear(); + + srcFrames_.clear(); + srcForwardMotions_.clear(); + srcBackwardMotions_.clear(); + finalOutput_.release(); + + SuperResolution::collectGarbage(); + BTVL1_OCL_Base::collectGarbage(); + } + + void BTVL1_OCL::initImpl(Ptr& frameSource) + { + const int cacheSize = 2 * temporalAreaRadius_ + 1; + + frames_.resize(cacheSize); + forwardMotions_.resize(cacheSize); + backwardMotions_.resize(cacheSize); + outputs_.resize(cacheSize); + + storePos_ = -1; + + for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t) + readNextFrame(frameSource); + + for (int i = 0; i <= temporalAreaRadius_; ++i) + processFrame(i); + + procPos_ = temporalAreaRadius_; + outPos_ = -1; + } + + void BTVL1_OCL::processImpl(Ptr& frameSource, OutputArray _output) + { + if (outPos_ >= storePos_) + { + if(_output.kind() == _InputArray::OCL_MAT) + { + getOclMatRef(_output).release(); + } + else + { + _output.release(); + } + return; + } + + readNextFrame(frameSource); + + if (procPos_ < storePos_) + { + ++procPos_; + processFrame(procPos_); + } + + ++outPos_; + const oclMat& curOutput = at(outPos_, outputs_); + + if (_output.kind() == _InputArray::OCL_MAT) + curOutput.convertTo(getOclMatRef(_output), CV_8U); + else + { + curOutput.convertTo(finalOutput_, CV_8U); + arrCopy(finalOutput_, _output); + } + } + + void BTVL1_OCL::readNextFrame(Ptr& frameSource) + { + curFrame_.release(); + frameSource->nextFrame(curFrame_); + + if (curFrame_.empty()) + return; + + ++storePos_; + curFrame_.convertTo(at(storePos_, frames_), CV_32F); + + if (storePos_ > 0) + { + pair& forwardMotion = at(storePos_ - 1, forwardMotions_); + pair& backwardMotion = at(storePos_, backwardMotions_); + + opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second); + opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second); + } + + curFrame_.copyTo(prevFrame_); + } + + void BTVL1_OCL::processFrame(int idx) + { + const int startIdx = max(idx - temporalAreaRadius_, 0); + const int procIdx = idx; + const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_); + + const int count = endIdx - startIdx + 1; + + srcFrames_.resize(count); + srcForwardMotions_.resize(count); + srcBackwardMotions_.resize(count); + + int baseIdx = -1; + + for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k) + { + if (i == procIdx) + baseIdx = k; + + srcFrames_[k] = at(i, frames_); + + if (i < endIdx) + srcForwardMotions_[k] = at(i, forwardMotions_); + if (i > startIdx) + srcBackwardMotions_[k] = at(i, backwardMotions_); + } + + process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx); + } +} + +Ptr cv::superres::createSuperResolution_BTVL1_OCL() +{ + return new BTVL1_OCL; +} +#endif \ No newline at end of file diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index 052141616..20e45d951 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -119,11 +119,23 @@ namespace { vc_ >> _frame.getMatRef(); } - else + else if(_frame.kind() == _InputArray::GPU_MAT) { vc_ >> frame_; arrCopy(frame_, _frame); } + else if(_frame.kind() == _InputArray::OCL_MAT) + { + vc_ >> frame_; + if(!frame_.empty()) + { + arrCopy(frame_, _frame); + } + } + else + { + //should never get here + } } class VideoFrameSource : public CaptureFrameSource diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp index 5a6682526..075cf9514 100644 --- a/modules/superres/src/input_array_utility.cpp +++ b/modules/superres/src/input_array_utility.cpp @@ -125,30 +125,59 @@ namespace { src.getGpuMat().copyTo(dst.getGpuMatRef()); } +#ifdef HAVE_OPENCV_OCL + void ocl2mat(InputArray src, OutputArray dst) + { + dst.getMatRef() = (Mat)ocl::getOclMatRef(src); + } + void mat2ocl(InputArray src, OutputArray dst) + { + Mat m = src.getMat(); + ocl::getOclMatRef(dst) = (ocl::oclMat)m; + } + void ocl2ocl(InputArray src, OutputArray dst) + { + ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst)); + } +#else + void ocl2mat(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");; + } + void mat2ocl(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");; + } + void ocl2ocl(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); + } +#endif } void cv::superres::arrCopy(InputArray src, OutputArray dst) { typedef void (*func_t)(InputArray src, OutputArray dst); - static const func_t funcs[10][10] = + static const func_t funcs[11][11] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr}, - {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr}, - {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu} + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 }, + {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, 0 }, + {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu, 0 }, + {0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0, 0, 0, ocl2ocl} }; const int src_kind = src.kind() >> _InputArray::KIND_SHIFT; const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT; - CV_DbgAssert( src_kind >= 0 && src_kind < 10 ); - CV_DbgAssert( dst_kind >= 0 && dst_kind < 10 ); + CV_DbgAssert( src_kind >= 0 && src_kind < 11 ); + CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 ); const func_t func = funcs[src_kind][dst_kind]; CV_DbgAssert( func != 0 ); @@ -190,7 +219,6 @@ namespace break; } } - void convertToDepth(InputArray src, OutputArray dst, int depth) { CV_Assert( src.depth() <= CV_64F ); @@ -271,3 +299,70 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp convertToDepth(buf0, buf1, depth); return buf1; } +#ifdef HAVE_OPENCV_OCL +namespace +{ + // TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat + void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn) + { + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); + CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + + static const int codes[5][5] = + { + {-1, -1, -1, -1, -1}, + {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA}, + {-1, -1, -1, -1, -1}, + {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA}, + {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1}, + }; + + const int code = codes[src.channels()][cn]; + CV_DbgAssert( code >= 0 ); + + ocl::cvtColor(src, dst, code, cn); + } + void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth) + { + CV_Assert( src.depth() <= CV_64F ); + CV_Assert( depth == CV_8U || depth == CV_32F ); + + static const double maxVals[] = + { + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + 1.0, + 1.0, + }; + const double scale = maxVals[depth] / maxVals[src.depth()]; + src.convertTo(dst, depth, scale); + } +} +ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1) +{ + if (src.type() == type) + return src; + + const int depth = CV_MAT_DEPTH(type); + const int cn = CV_MAT_CN(type); + + if (src.depth() == depth) + { + convertToCn(src, buf0, cn); + return buf0; + } + + if (src.channels() == cn) + { + convertToDepth(src, buf1, depth); + return buf1; + } + + convertToCn(src, buf0, cn); + convertToDepth(buf0, buf1, depth); + return buf1; +} +#endif diff --git a/modules/superres/src/input_array_utility.hpp b/modules/superres/src/input_array_utility.hpp index 975783dc6..9fa63da53 100644 --- a/modules/superres/src/input_array_utility.hpp +++ b/modules/superres/src/input_array_utility.hpp @@ -45,6 +45,9 @@ #include "opencv2/core/core.hpp" #include "opencv2/core/gpumat.hpp" +#ifdef HAVE_OPENCV_OCL +#include "opencv2/ocl/ocl.hpp" +#endif namespace cv { @@ -57,6 +60,10 @@ namespace cv CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1); CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1); + +#ifdef HAVE_OPENCV_OCL + CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1); +#endif } } diff --git a/modules/superres/src/opencl/superres_btvl1.cl b/modules/superres/src/opencl/superres_btvl1.cl new file mode 100644 index 000000000..0efa1709c --- /dev/null +++ b/modules/superres/src/opencl/superres_btvl1.cl @@ -0,0 +1,261 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void buildMotionMapsKernel(__global float* forwardMotionX, + __global float* forwardMotionY, + __global float* backwardMotionX, + __global float* backwardMotionY, + __global float* forwardMapX, + __global float* forwardMapY, + __global float* backwardMapX, + __global float* backwardMapY, + int forwardMotionX_row, + int forwardMotionX_col, + int forwardMotionX_step, + int forwardMotionY_step, + int backwardMotionX_step, + int backwardMotionY_step, + int forwardMapX_step, + int forwardMapY_step, + int backwardMapX_step, + int backwardMapY_step + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < forwardMotionX_col && y < forwardMotionX_row) + { + float fx = forwardMotionX[y * forwardMotionX_step + x]; + float fy = forwardMotionY[y * forwardMotionY_step + x]; + + float bx = backwardMotionX[y * backwardMotionX_step + x]; + float by = backwardMotionY[y * backwardMotionY_step + x]; + + forwardMapX[y * forwardMapX_step + x] = x + bx; + forwardMapY[y * forwardMapY_step + x] = y + by; + + backwardMapX[y * backwardMapX_step + x] = x + fx; + backwardMapY[y * backwardMapY_step + x] = y + fy; + } +} + +__kernel void upscaleKernel(__global float* src, + __global float* dst, + int src_step, + int dst_step, + int src_row, + int src_col, + int scale, + int channels + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < src_col && y < src_row) + { + if(channels == 1) + { + dst[y * scale * dst_step + x * scale] = src[y * src_step + x]; + }else if(channels == 3) + { + dst[y * channels * scale * dst_step + 3 * x * scale + 0] = src[y * channels * src_step + 3 * x + 0]; + dst[y * channels * scale * dst_step + 3 * x * scale + 1] = src[y * channels * src_step + 3 * x + 1]; + dst[y * channels * scale * dst_step + 3 * x * scale + 2] = src[y * channels * src_step + 3 * x + 2]; + }else + { + dst[y * channels * scale * dst_step + 4 * x * scale + 0] = src[y * channels * src_step + 4 * x + 0]; + dst[y * channels * scale * dst_step + 4 * x * scale + 1] = src[y * channels * src_step + 4 * x + 1]; + dst[y * channels * scale * dst_step + 4 * x * scale + 2] = src[y * channels * src_step + 4 * x + 2]; + dst[y * channels * scale * dst_step + 4 * x * scale + 3] = src[y * channels * src_step + 4 * x + 3]; + } + } +} + + +float diffSign(float a, float b) +{ + return a > b ? 1.0f : a < b ? -1.0f : 0.0f; +} + +float3 diffSign3(float3 a, float3 b) +{ + float3 pos; + pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f; + pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f; + pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f; + return pos; +} + +float4 diffSign4(float4 a, float4 b) +{ + float4 pos; + pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f; + pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f; + pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f; + pos.w = 0.0f; + return pos; +} + +__kernel void diffSignKernel(__global float* src1, + __global float* src2, + __global float* dst, + int src1_row, + int src1_col, + int dst_step, + int src1_step, + int src2_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < src1_col && y < src1_row) + { + dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]); + } + barrier(CLK_LOCAL_MEM_FENCE); +} + +__kernel void calcBtvRegularizationKernel(__global float* src, + __global float* dst, + int src_step, + int dst_step, + int src_row, + int src_col, + int ksize, + int channels, + __global float* c_btvRegWeights + ) +{ + int x = get_global_id(0) + ksize; + int y = get_global_id(1) + ksize; + + if ((y < src_row - ksize) && (x < src_col - ksize)) + { + if(channels == 1) + { + const float srcVal = src[y * src_step + x]; + float dstVal = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal)); + } + dst[y * dst_step + x] = dstVal; + }else if(channels == 3) + { + float3 srcVal; + srcVal.x = src[y * src_step + 3 * x + 0]; + srcVal.y = src[y * src_step + 3 * x + 1]; + srcVal.z = src[y * src_step + 3 * x + 2]; + + float3 dstVal; + dstVal.x = 0.0f; + dstVal.y = 0.0f; + dstVal.z = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + { + float3 src1; + src1.x = src[(y + m) * src_step + 3 * (x + l) + 0]; + src1.y = src[(y + m) * src_step + 3 * (x + l) + 1]; + src1.z = src[(y + m) * src_step + 3 * (x + l) + 2]; + + float3 src2; + src2.x = src[(y - m) * src_step + 3 * (x - l) + 0]; + src2.y = src[(y - m) * src_step + 3 * (x - l) + 1]; + src2.z = src[(y - m) * src_step + 3 * (x - l) + 2]; + + dstVal = dstVal + c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal)); + } + } + dst[y * dst_step + 3 * x + 0] = dstVal.x; + dst[y * dst_step + 3 * x + 1] = dstVal.y; + dst[y * dst_step + 3 * x + 2] = dstVal.z; + }else + { + float4 srcVal; + srcVal.x = src[y * src_step + 4 * x + 0];//r type =float + srcVal.y = src[y * src_step + 4 * x + 1];//g + srcVal.z = src[y * src_step + 4 * x + 2];//b + srcVal.w = src[y * src_step + 4 * x + 3];//a + + float4 dstVal; + dstVal.x = 0.0f; + dstVal.y = 0.0f; + dstVal.z = 0.0f; + dstVal.w = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + { + float4 src1; + src1.x = src[(y + m) * src_step + 4 * (x + l) + 0]; + src1.y = src[(y + m) * src_step + 4 * (x + l) + 1]; + src1.z = src[(y + m) * src_step + 4 * (x + l) + 2]; + src1.w = src[(y + m) * src_step + 4 * (x + l) + 3]; + + float4 src2; + src2.x = src[(y - m) * src_step + 4 * (x - l) + 0]; + src2.y = src[(y - m) * src_step + 4 * (x - l) + 1]; + src2.z = src[(y - m) * src_step + 4 * (x - l) + 2]; + src2.w = src[(y - m) * src_step + 4 * (x - l) + 3]; + + dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal)); + + } + } + dst[y * dst_step + 4 * x + 0] = dstVal.x; + dst[y * dst_step + 4 * x + 1] = dstVal.y; + dst[y * dst_step + 4 * x + 2] = dstVal.z; + dst[y * dst_step + 4 * x + 3] = dstVal.w; + } + } +} \ No newline at end of file diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index 12642175d..6947d1901 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -719,3 +719,195 @@ Ptr cv::superres::createOptFlow_DualTVL1_GPU() } #endif // HAVE_OPENCV_GPU +#ifdef HAVE_OPENCV_OCL + +namespace +{ + class oclOpticalFlow : public DenseOpticalFlowExt + { + public: + explicit oclOpticalFlow(int work_type); + + void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2); + void collectGarbage(); + + protected: + virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0; + + private: + int work_type_; + cv::ocl::oclMat buf_[6]; + cv::ocl::oclMat u_, v_, flow_; + }; + + oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type) + { + } + + void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2) + { + ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0); + ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1); + ocl::oclMat& _flow1 = ocl::getOclMatRef(flow1); + ocl::oclMat& _flow2 = ocl::getOclMatRef(flow2); + + CV_Assert( _frame1.type() == _frame0.type() ); + CV_Assert( _frame1.size() == _frame0.size() ); + + cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]); + cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]); + + impl(input0_, input1_, u_, v_);//go to tvl1 algorithm + + u_.copyTo(_flow1); + v_.copyTo(_flow2); + } + + void oclOpticalFlow::collectGarbage() + { + for (int i = 0; i < 6; ++i) + buf_[i].release(); + u_.release(); + v_.release(); + flow_.release(); + } +} +/////////////////////////////////////////////////////////////////// +// PyrLK_OCL + +namespace +{ + class PyrLK_OCL : public oclOpticalFlow + { + public: + AlgorithmInfo* info() const; + + PyrLK_OCL(); + + void collectGarbage(); + + protected: + void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2); + + private: + int winSize_; + int maxLevel_; + int iterations_; + + ocl::PyrLKOpticalFlow alg_; + }; + + CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL", + obj.info()->addParam(obj, "winSize", obj.winSize_); + obj.info()->addParam(obj, "maxLevel", obj.maxLevel_); + obj.info()->addParam(obj, "iterations", obj.iterations_)); + + PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1) + { + winSize_ = alg_.winSize.width; + maxLevel_ = alg_.maxLevel; + iterations_ = alg_.iters; + } + + void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) + { + alg_.winSize.width = winSize_; + alg_.winSize.height = winSize_; + alg_.maxLevel = maxLevel_; + alg_.iters = iterations_; + + alg_.dense(input0, input1, dst1, dst2); + } + + void PyrLK_OCL::collectGarbage() + { + alg_.releaseMemory(); + oclOpticalFlow::collectGarbage(); + } +} + +Ptr cv::superres::createOptFlow_PyrLK_OCL() +{ + return new PyrLK_OCL; +} + +/////////////////////////////////////////////////////////////////// +// DualTVL1_OCL + +namespace +{ + class DualTVL1_OCL : public oclOpticalFlow + { + public: + AlgorithmInfo* info() const; + + DualTVL1_OCL(); + + void collectGarbage(); + + protected: + void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2); + + private: + double tau_; + double lambda_; + double theta_; + int nscales_; + int warps_; + double epsilon_; + int iterations_; + bool useInitialFlow_; + + ocl::OpticalFlowDual_TVL1_OCL alg_; + }; + + CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL", + obj.info()->addParam(obj, "tau", obj.tau_); + obj.info()->addParam(obj, "lambda", obj.lambda_); + obj.info()->addParam(obj, "theta", obj.theta_); + obj.info()->addParam(obj, "nscales", obj.nscales_); + obj.info()->addParam(obj, "warps", obj.warps_); + obj.info()->addParam(obj, "epsilon", obj.epsilon_); + obj.info()->addParam(obj, "iterations", obj.iterations_); + obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_)); + + DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1) + { + tau_ = alg_.tau; + lambda_ = alg_.lambda; + theta_ = alg_.theta; + nscales_ = alg_.nscales; + warps_ = alg_.warps; + epsilon_ = alg_.epsilon; + iterations_ = alg_.iterations; + useInitialFlow_ = alg_.useInitialFlow; + } + + void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) + { + alg_.tau = tau_; + alg_.lambda = lambda_; + alg_.theta = theta_; + alg_.nscales = nscales_; + alg_.warps = warps_; + alg_.epsilon = epsilon_; + alg_.iterations = iterations_; + alg_.useInitialFlow = useInitialFlow_; + + alg_(input0, input1, dst1, dst2); + + } + + void DualTVL1_OCL::collectGarbage() + { + alg_.collectGarbage(); + oclOpticalFlow::collectGarbage(); + } +} + +Ptr cv::superres::createOptFlow_DualTVL1_OCL() +{ + return new DualTVL1_OCL; +} + +#endif \ No newline at end of file diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp index 82b591b3c..51e6c336f 100644 --- a/modules/superres/src/precomp.hpp +++ b/modules/superres/src/precomp.hpp @@ -65,6 +65,10 @@ #endif #endif +#ifdef HAVE_OPENCV_OCL + #include "opencv2/ocl/private/util.hpp" +#endif + #ifdef HAVE_OPENCV_HIGHGUI #include "opencv2/highgui/highgui.hpp" #endif diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp index b4a546c62..9aa9a44bf 100644 --- a/modules/superres/test/test_superres.cpp +++ b/modules/superres/test/test_superres.cpp @@ -274,5 +274,12 @@ TEST_F(SuperResolution, BTVL1_GPU) { RunTest(cv::superres::createSuperResolution_BTVL1_GPU()); } - #endif +#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL) +TEST_F(SuperResolution, BTVL1_OCL) +{ + std::vector infos; + cv::ocl::getDevice(infos); + RunTest(cv::superres::createSuperResolution_BTVL1_OCL()); +} +#endif From 30239ad58e0d9312685c347ccd2fc241ed701ca0 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 26 Jun 2013 16:06:05 +0800 Subject: [PATCH 02/18] Fix build error. --- modules/superres/perf/perf_superres_ocl.cpp | 36 +++++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp index 524be2dcf..e75a8775d 100644 --- a/modules/superres/perf/perf_superres_ocl.cpp +++ b/modules/superres/perf/perf_superres_ocl.cpp @@ -41,10 +41,10 @@ //M*/ #include "perf_precomp.hpp" -#include "opencv2/ocl/ocl.hpp" #ifdef HAVE_OPENCL +#include "opencv2/ocl/ocl.hpp" using namespace std; using namespace testing; using namespace perf; @@ -58,13 +58,9 @@ namespace public: explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {} - void nextFrame(OutputArray) + void nextFrame(OutputArray frame) { - } - - void nextFrame(ocl::oclMat& frame) - { - frame_.copyTo(frame); + ocl::getOclMatRef(frame) = frame_; } void reset() { @@ -78,15 +74,27 @@ namespace class ZeroOpticalFlowOCL : public DenseOpticalFlowExt { public: - void calc(ocl::oclMat& frame0, ocl::oclMat&, ocl::oclMat& flow1, ocl::oclMat& flow2) + void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2) { - cv::Size size = frame0.size(); + ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0); + ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1); + ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2); - flow1.create(size, CV_32FC1); - flow2.create(size, CV_32FC1); + cv::Size size = frame0_.size(); - flow1.setTo(Scalar::all(0)); - flow2.setTo(Scalar::all(0)); + if(!flow2.needed()) + { + flow1_.create(size, CV_32FC2); + flow1_.setTo(Scalar::all(0)); + } + else + { + flow1_.create(size, CV_32FC1); + flow2_.create(size, CV_32FC1); + + flow1_.setTo(Scalar::all(0)); + flow2_.setTo(Scalar::all(0)); + } } void collectGarbage() @@ -135,4 +143,4 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, frame_ocl.release(); CPU_SANITY_CHECK(dst_ocl); } -#endif \ No newline at end of file +#endif From 8d8dc29ced791a2a2de1fe2aada4adfacddbc15e Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 3 Jul 2013 13:13:04 +0800 Subject: [PATCH 03/18] add kmeans --- modules/ocl/include/opencv2/ocl/ocl.hpp | 12 + modules/ocl/src/kmeans.cpp | 471 ++++++++++++++++++++++++ modules/ocl/src/opencl/kmeans_kernel.cl | 83 +++++ modules/ocl/test/test_kmeans.cpp | 162 ++++++++ 4 files changed, 728 insertions(+) create mode 100644 modules/ocl/src/kmeans.cpp create mode 100644 modules/ocl/src/opencl/kmeans_kernel.cl create mode 100644 modules/ocl/test/test_kmeans.cpp diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index fa11177c4..7dcf96fd5 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -834,6 +834,18 @@ namespace cv CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + /////////////////////////////////// ML /////////////////////////////////////////// + + //! Compute closest centers for each lines in source and lable it after center's index + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + void DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers); + + //!Does k-means procedure on GPU + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels, + TermCriteria criteria, int attemps, int flags, oclMat ¢ers); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp new file mode 100644 index 000000000..3922fd8dc --- /dev/null +++ b/modules/ocl/src/kmeans.cpp @@ -0,0 +1,471 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "precomp.hpp" + +using namespace cv; +using namespace ocl; + +namespace cv +{ + namespace ocl + { + ////////////////////////////////////OpenCL kernel strings////////////////////////// + extern const char *kmeans_kernel; + } +} +////////////////////////////////////////////////////////////////////////// +//////////////////common///////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +void swap( Mat& a, Mat& b ) +{ + std::swap(a.flags, b.flags); + std::swap(a.dims, b.dims); + std::swap(a.rows, b.rows); + std::swap(a.cols, b.cols); + std::swap(a.data, b.data); + std::swap(a.refcount, b.refcount); + std::swap(a.datastart, b.datastart); + std::swap(a.dataend, b.dataend); + std::swap(a.datalimit, b.datalimit); + std::swap(a.allocator, b.allocator); + + std::swap(a.size.p, b.size.p); + std::swap(a.step.p, b.step.p); + std::swap(a.step.buf[0], b.step.buf[0]); + std::swap(a.step.buf[1], b.step.buf[1]); + + if( a.step.p == b.step.buf ) + { + a.step.p = a.step.buf; + a.size.p = &a.rows; + } + + if( b.step.p == a.step.buf ) + { + b.step.p = b.step.buf; + b.size.p = &b.rows; + } +} + +static void generateRandomCenter(const vector& box, float* center, RNG& rng) +{ + size_t j, dims = box.size(); + float margin = 1.f/dims; + for( j = 0; j < dims; j++ ) + center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; +} + +// This class is copied from matrix.cpp in core module. +class KMeansPPDistanceComputer : public ParallelLoopBody +{ +public: + KMeansPPDistanceComputer( float *_tdist2, + const float *_data, + const float *_dist, + int _dims, + size_t _step, + size_t _stepci ) + : tdist2(_tdist2), + data(_data), + dist(_dist), + dims(_dims), + step(_step), + stepci(_stepci) { } + + void operator()( const cv::Range& range ) const + { + const int begin = range.start; + const int end = range.end; + + for ( int i = begin; i _centers(K); + int* centers = &_centers[0]; + vector _dist(N*3); + float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; + double sum0 = 0; + + centers[0] = (unsigned)rng % N; + + for( i = 0; i < N; i++ ) + { + dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims); + sum0 += dist[i]; + } + + for( k = 1; k < K; k++ ) + { + double bestSum = DBL_MAX; + int bestCenter = -1; + + for( j = 0; j < trials; j++ ) + { + double p = (double)rng*sum0, s = 0; + for( i = 0; i < N-1; i++ ) + if( (p -= dist[i]) <= 0 ) + break; + int ci = i; + + parallel_for_(Range(0, N), + KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); + for( i = 0; i < N; i++ ) + { + s += tdist2[i]; + } + + if( s < bestSum ) + { + bestSum = s; + bestCenter = ci; + std::swap(tdist, tdist2); + } + } + centers[k] = bestCenter; + sum0 = bestSum; + std::swap(dist, tdist); + } + + for( k = 0; k < K; k++ ) + { + const float* src = data + step*centers[k]; + float* dst = _out_centers.ptr(k); + for( j = 0; j < dims; j++ ) + dst[j] = src[j]; + } +} + +void cv::ocl::DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) +{ + //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + //{ + // CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); + // return; + //} + + Context *clCxt = src.clCxt; + int labels_step = (int)(labels.step/labels.elemSize()); + string kernelname = "kmeansComputeDistance"; + int threadNum = src.rows > 256 ? 256 : src.rows; + size_t localThreads[3] = {1, threadNum, 1}; + size_t globalThreads[3] = {1, src.rows, 1}; + + vector > args; + args.push_back(make_pair(sizeof(cl_int), (void *)&labels_step)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dists.data)); + + openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL); +} +///////////////////////////////////k - means ///////////////////////////////////////////////////////// +double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, + TermCriteria criteria, int attempts, int flags, oclMat &_centers) +{ + const int SPP_TRIALS = 3; + bool isrow = _src.rows == 1 && _src.oclchannels() > 1; + int N = !isrow ? _src.rows : _src.cols; + int dims = (!isrow ? _src.cols : 1) * _src.oclchannels(); + int type = _src.depth(); + + attempts = std::max(attempts, 1); + CV_Assert(type == CV_32F && K > 0 ); + CV_Assert( N >= K ); + + Mat _labels; + if( flags & CV_KMEANS_USE_INITIAL_LABELS ) + { + CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) && + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S ); + _bestLabels.download(_labels); + } + else + { + if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) && + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S && + _bestLabels.isContinuous())) + _bestLabels.create(N, 1, CV_32S); + _labels.create(_bestLabels.size(), _bestLabels.type()); + } + int* labels = _labels.ptr(); + + Mat data; + _src.download(data); + Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); + vector counters(K); + vector _box(dims); + Vec2f* box = &_box[0]; + double best_compactness = DBL_MAX, compactness = 0; + RNG& rng = theRNG(); + int a, iter, i, j, k; + + if( criteria.type & TermCriteria::EPS ) + criteria.epsilon = std::max(criteria.epsilon, 0.); + else + criteria.epsilon = FLT_EPSILON; + criteria.epsilon *= criteria.epsilon; + + if( criteria.type & TermCriteria::COUNT ) + criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); + else + criteria.maxCount = 100; + + if( K == 1 ) + { + attempts = 1; + criteria.maxCount = 2; + } + + const float* sample = data.ptr(); + for( j = 0; j < dims; j++ ) + box[j] = Vec2f(sample[j], sample[j]); + + for( i = 1; i < N; i++ ) + { + sample = data.ptr(i); + for( j = 0; j < dims; j++ ) + { + float v = sample[j]; + box[j][0] = std::min(box[j][0], v); + box[j][1] = std::max(box[j][1], v); + } + } + + for( a = 0; a < attempts; a++ ) + { + double max_center_shift = DBL_MAX; + for( iter = 0;; ) + { + swap(centers, old_centers); + + if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) + { + if( flags & KMEANS_PP_CENTERS ) + generateCentersPP(data, centers, K, rng, SPP_TRIALS); + else + { + for( k = 0; k < K; k++ ) + generateRandomCenter(_box, centers.ptr(k), rng); + } + } + else + { + if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) + { + for( i = 0; i < N; i++ ) + CV_Assert( (unsigned)labels[i] < (unsigned)K ); + } + + // compute centers + centers = Scalar(0); + for( k = 0; k < K; k++ ) + counters[k] = 0; + + for( i = 0; i < N; i++ ) + { + sample = data.ptr(i); + k = labels[i]; + float* center = centers.ptr(k); + j=0; + #if CV_ENABLE_UNROLLED + for(; j <= dims - 4; j += 4 ) + { + float t0 = center[j] + sample[j]; + float t1 = center[j+1] + sample[j+1]; + + center[j] = t0; + center[j+1] = t1; + + t0 = center[j+2] + sample[j+2]; + t1 = center[j+3] + sample[j+3]; + + center[j+2] = t0; + center[j+3] = t1; + } + #endif + for( ; j < dims; j++ ) + center[j] += sample[j]; + counters[k]++; + } + + if( iter > 0 ) + max_center_shift = 0; + + for( k = 0; k < K; k++ ) + { + if( counters[k] != 0 ) + continue; + + // if some cluster appeared to be empty then: + // 1. find the biggest cluster + // 2. find the farthest from the center point in the biggest cluster + // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. + int max_k = 0; + for( int k1 = 1; k1 < K; k1++ ) + { + if( counters[max_k] < counters[k1] ) + max_k = k1; + } + + double max_dist = 0; + int farthest_i = -1; + float* new_center = centers.ptr(k); + float* old_center = centers.ptr(max_k); + float* _old_center = temp.ptr(); // normalized + float scale = 1.f/counters[max_k]; + for( j = 0; j < dims; j++ ) + _old_center[j] = old_center[j]*scale; + + for( i = 0; i < N; i++ ) + { + if( labels[i] != max_k ) + continue; + sample = data.ptr(i); + double dist = normL2Sqr_(sample, _old_center, dims); + + if( max_dist <= dist ) + { + max_dist = dist; + farthest_i = i; + } + } + + counters[max_k]--; + counters[k]++; + labels[farthest_i] = k; + sample = data.ptr(farthest_i); + + for( j = 0; j < dims; j++ ) + { + old_center[j] -= sample[j]; + new_center[j] += sample[j]; + } + } + + for( k = 0; k < K; k++ ) + { + float* center = centers.ptr(k); + CV_Assert( counters[k] != 0 ); + + float scale = 1.f/counters[k]; + for( j = 0; j < dims; j++ ) + center[j] *= scale; + + if( iter > 0 ) + { + double dist = 0; + const float* old_center = old_centers.ptr(k); + for( j = 0; j < dims; j++ ) + { + double t = center[j] - old_center[j]; + dist += t*t; + } + max_center_shift = std::max(max_center_shift, dist); + } + } + } + + if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) + break; + + // assign labels + oclMat _dists(1, N, CV_64F); + + _bestLabels.upload(_labels); + _centers.upload(centers); + DistanceComputer(_dists, _bestLabels, _src, _centers); + + Mat dists; + _dists.download(dists); + _bestLabels.download(_labels); + + double* dist = dists.ptr(0); + compactness = 0; + for( i = 0; i < N; i++ ) + { + compactness += dist[i]; + } + } + + if( compactness < best_compactness ) + { + best_compactness = compactness; + } + } + + return best_compactness; +} + diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl new file mode 100644 index 000000000..d6f6c721a --- /dev/null +++ b/modules/ocl/src/opencl/kmeans_kernel.cl @@ -0,0 +1,83 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void kmeansComputeDistance( + int label_step, int K, + __global float *src, + __global int *labels, int dims, int rows, + __global float *centers, + __global float *dists) +{ + int gid = get_global_id(1); + + float dist, euDist, min; + int minCentroid; + + if(gid >= rows) + return; + + for(int i = 0 ;i < K; i++) + { + euDist = 0; + for(int j = 0; j < dims; j++) + { + dist = (src[j + gid * dims] + - centers[j + i * dims]); + euDist += dist * dist; + } + + if(i == 0) + { + min = euDist; + minCentroid = 0; + } else if(euDist < min) + { + min = euDist; + minCentroid = i; + } + } + dists[gid] = min; + labels[label_step * gid] = minCentroid; +} diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp new file mode 100644 index 000000000..ebade3bbc --- /dev/null +++ b/modules/ocl/test/test_kmeans.cpp @@ -0,0 +1,162 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Erping Pang, pang_er_ping@163.com +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +using namespace cv; + +#define OCL_KMEANS_USE_INITIAL_LABELS 1 +#define OCL_KMEANS_PP_CENTERS 2 + +PARAM_TEST_CASE(Kmeans, int, int, int) +{ + int type; + int K; + int flags; + cv::Mat src ; + ocl::oclMat d_src, d_dists; + + Mat labels, centers; + ocl::oclMat d_labels, d_centers; + cv::RNG rng ; + virtual void SetUp(){ + K = GET_PARAM(0); + type = GET_PARAM(1); + flags = GET_PARAM(2); + rng = TS::ptr()->get_rng(); + + // MWIDTH=256, MHEIGHT=256. defined in utility.hpp + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + src.create(size, type); + int row_idx = 0; + const int max_neighbour = MHEIGHT / K - 1; + CV_Assert(K <= MWIDTH); + for(int i = 0; i < K; i++ ) + { + Mat center_row_header = src.row(row_idx); + center_row_header.setTo(0); + int nchannel = center_row_header.channels(); + for(int j = 0; j < nchannel; j++) + center_row_header.at(0, i*nchannel+j) = 50000.0; + + for(int j = 0; (j < max_neighbour) || + (i == K-1 && j < max_neighbour + MHEIGHT%K); j ++) + { + Mat cur_row_header = src.row(row_idx + 1 + j); + center_row_header.copyTo(cur_row_header); + Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), -200, 200, false); + cur_row_header += tmpmat; + } + row_idx += 1 + max_neighbour; + } + } +}; +TEST_P(Kmeans, Mat){ + + if(flags & KMEANS_USE_INITIAL_LABELS) + { + // inital a given labels + labels.create(src.rows, 1, CV_32S); + int *label = labels.ptr(); + for(int i = 0; i < src.rows; i++) + label[i] = rng.uniform(0, K); + d_labels.upload(labels); + } + d_src.upload(src); + + for(int j = 0; j < LOOP_TIMES; j++) + { + kmeans(src, K, labels, + TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), + 1, flags, centers); + + ocl::kmeans(d_src, K, d_labels, + TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), + 1, flags, d_centers); + + Mat dd_labels(d_labels); + Mat dd_centers(d_centers); + if(flags & KMEANS_USE_INITIAL_LABELS) + { + EXPECT_MAT_NEAR(labels, dd_labels, 0); + EXPECT_MAT_NEAR(centers, dd_centers, 1e-3); + } + else + { + int row_idx = 0; + for(int i = 0; i < K; i++) + { + // verify lables with ground truth resutls + int label = labels.at(row_idx); + int header_label = dd_labels.at(row_idx); + for(int j = 0; (j < MHEIGHT/K)||(i == K-1 && j < MHEIGHT/K+MHEIGHT%K); j++) + { + ASSERT_NEAR(labels.at(row_idx+j), label, 0); + ASSERT_NEAR(dd_labels.at(row_idx+j), header_label, 0); + } + + // verify centers + float *center = centers.ptr(label); + float *header_center = dd_centers.ptr(header_label); + for(int t = 0; t < centers.cols; t++) + ASSERT_NEAR(center[t], header_center[t], 1e-3); + + row_idx += MHEIGHT/K; + } + } + } +} +INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine( + Values(3, 5, 8), + Values(CV_32FC1, CV_32FC2, CV_32FC4), + Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/))); + +#endif From a26c4fa2a2bc974f8f8bdd902dce56fc8d0944aa Mon Sep 17 00:00:00 2001 From: Heinz Hofbauer Date: Wed, 3 Jul 2013 14:58:40 +0200 Subject: [PATCH 04/18] Bugfix for an overlapping size of image and template for matchTemplate. Example: img of size 10x10 and templ of size 11x9. In subsequent code this will results in either width or height of corrSize to be zero (0). Line 261 will call crossCorr which will then have a zero size of either blocksize.width or blocksize.height resulting in a division by zero crach in lines 137 or 138. --- modules/imgproc/src/templmatch.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index ec7a92a22..18d7da9d9 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -248,6 +248,8 @@ void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, CV_Assert( (img.depth() == CV_8U || img.depth() == CV_32F) && img.type() == templ.type() ); + CV_Assert( img.rows >= templ.rows && img.cols >= templ.cols); + Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1); _result.create(corrSize, CV_32F); Mat result = _result.getMat(); From c23510785bd61bf4ab287e40d2caca060aa94724 Mon Sep 17 00:00:00 2001 From: yao Date: Thu, 4 Jul 2013 08:59:42 +0800 Subject: [PATCH 05/18] remove the redundant function --- modules/ocl/src/kmeans.cpp | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index 3922fd8dc..e1a91caa5 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -57,39 +57,6 @@ namespace cv extern const char *kmeans_kernel; } } -////////////////////////////////////////////////////////////////////////// -//////////////////common///////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -void swap( Mat& a, Mat& b ) -{ - std::swap(a.flags, b.flags); - std::swap(a.dims, b.dims); - std::swap(a.rows, b.rows); - std::swap(a.cols, b.cols); - std::swap(a.data, b.data); - std::swap(a.refcount, b.refcount); - std::swap(a.datastart, b.datastart); - std::swap(a.dataend, b.dataend); - std::swap(a.datalimit, b.datalimit); - std::swap(a.allocator, b.allocator); - - std::swap(a.size.p, b.size.p); - std::swap(a.step.p, b.step.p); - std::swap(a.step.buf[0], b.step.buf[0]); - std::swap(a.step.buf[1], b.step.buf[1]); - - if( a.step.p == b.step.buf ) - { - a.step.p = a.step.buf; - a.size.p = &a.rows; - } - - if( b.step.p == a.step.buf ) - { - b.step.p = b.step.buf; - b.size.p = &b.rows; - } -} static void generateRandomCenter(const vector& box, float* center, RNG& rng) { From eaa29110e1620aea589b7953e9d29d66a902a2ea Mon Sep 17 00:00:00 2001 From: Alexander Pacha Date: Thu, 4 Jul 2013 16:36:29 +1200 Subject: [PATCH 06/18] Fixed issue 3130 (http://code.opencv.org/issues/3130), where one argument of the BRISK-call was ignored. Previously it was not possible to use BRISK without creating descriptors. Now it behaves like ORB (and how it is documented), and you can call BRISK to just generate feature points and no descriptors. --- modules/features2d/src/brisk.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/features2d/src/brisk.cpp b/modules/features2d/src/brisk.cpp index d1fa0c9c8..622f77229 100644 --- a/modules/features2d/src/brisk.cpp +++ b/modules/features2d/src/brisk.cpp @@ -525,7 +525,11 @@ BRISK::operator()( InputArray _image, InputArray _mask, vector& keypoi bool doOrientation=true; if (useProvidedKeypoints) doOrientation = false; - computeDescriptorsAndOrOrientation(_image, _mask, keypoints, _descriptors, true, doOrientation, + + // If the user specified cv::noArray(), this will yield false. Otherwise it will return true. + bool doDescriptors = _descriptors.needed(); + + computeDescriptorsAndOrOrientation(_image, _mask, keypoints, _descriptors, doDescriptors, doOrientation, useProvidedKeypoints); } From 88ed74a7ecbfc88b1f394e6cc8b4a20aef5a1f2b Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 5 Jul 2013 08:59:21 +0800 Subject: [PATCH 07/18] fix the function name --- modules/ocl/include/opencv2/ocl/ocl.hpp | 2 +- modules/ocl/src/kmeans.cpp | 36 ++++++++++++------------- modules/ocl/src/opencl/kmeans_kernel.cl | 15 ++++++----- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 7dcf96fd5..e7b133e67 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -838,7 +838,7 @@ namespace cv //! Compute closest centers for each lines in source and lable it after center's index // supports CV_32FC1/CV_32FC2/CV_32FC4 data type - void DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers); + CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers); //!Does k-means procedure on GPU // supports CV_32FC1/CV_32FC2/CV_32FC4 data type diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index e1a91caa5..22f86600a 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -51,11 +51,11 @@ using namespace ocl; namespace cv { - namespace ocl - { - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *kmeans_kernel; - } +namespace ocl +{ +////////////////////////////////////OpenCL kernel strings////////////////////////// +extern const char *kmeans_kernel; +} } static void generateRandomCenter(const vector& box, float* center, RNG& rng) @@ -142,7 +142,7 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers, int ci = i; parallel_for_(Range(0, N), - KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); + KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); for( i = 0; i < N; i++ ) { s += tdist2[i]; @@ -169,7 +169,7 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers, } } -void cv::ocl::DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) +void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) { //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) //{ @@ -179,7 +179,7 @@ void cv::ocl::DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, Context *clCxt = src.clCxt; int labels_step = (int)(labels.step/labels.elemSize()); - string kernelname = "kmeansComputeDistance"; + string kernelname = "distanceToCenters"; int threadNum = src.rows > 256 ? 256 : src.rows; size_t localThreads[3] = {1, threadNum, 1}; size_t globalThreads[3] = {1, src.rows, 1}; @@ -198,7 +198,7 @@ void cv::ocl::DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, } ///////////////////////////////////k - means ///////////////////////////////////////////////////////// double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, - TermCriteria criteria, int attempts, int flags, oclMat &_centers) + TermCriteria criteria, int attempts, int flags, oclMat &_centers) { const int SPP_TRIALS = 3; bool isrow = _src.rows == 1 && _src.oclchannels() > 1; @@ -214,16 +214,16 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, if( flags & CV_KMEANS_USE_INITIAL_LABELS ) { CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) && - _bestLabels.cols * _bestLabels.rows == N && - _bestLabels.type() == CV_32S ); + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S ); _bestLabels.download(_labels); } else { if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) && - _bestLabels.cols * _bestLabels.rows == N && - _bestLabels.type() == CV_32S && - _bestLabels.isContinuous())) + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S && + _bestLabels.isContinuous())) _bestLabels.create(N, 1, CV_32S); _labels.create(_bestLabels.size(), _bestLabels.type()); } @@ -307,7 +307,7 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, k = labels[i]; float* center = centers.ptr(k); j=0; - #if CV_ENABLE_UNROLLED +#if CV_ENABLE_UNROLLED for(; j <= dims - 4; j += 4 ) { float t0 = center[j] + sample[j]; @@ -322,7 +322,7 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, center[j+2] = t0; center[j+3] = t1; } - #endif +#endif for( ; j < dims; j++ ) center[j] += sample[j]; counters[k]++; @@ -410,10 +410,10 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, // assign labels oclMat _dists(1, N, CV_64F); - + _bestLabels.upload(_labels); _centers.upload(centers); - DistanceComputer(_dists, _bestLabels, _src, _centers); + distanceToCenters(_dists, _bestLabels, _src, _centers); Mat dists; _dists.download(dists); diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl index d6f6c721a..c6af0ad24 100644 --- a/modules/ocl/src/opencl/kmeans_kernel.cl +++ b/modules/ocl/src/opencl/kmeans_kernel.cl @@ -43,7 +43,7 @@ // //M*/ -__kernel void kmeansComputeDistance( +__kernel void distanceToCenters( int label_step, int K, __global float *src, __global int *labels, int dims, int rows, @@ -51,20 +51,20 @@ __kernel void kmeansComputeDistance( __global float *dists) { int gid = get_global_id(1); - + float dist, euDist, min; int minCentroid; - + if(gid >= rows) return; - for(int i = 0 ;i < K; i++) + for(int i = 0 ; i < K; i++) { euDist = 0; for(int j = 0; j < dims; j++) { - dist = (src[j + gid * dims] - - centers[j + i * dims]); + dist = (src[j + gid * dims] + - centers[j + i * dims]); euDist += dist * dist; } @@ -72,7 +72,8 @@ __kernel void kmeansComputeDistance( { min = euDist; minCentroid = 0; - } else if(euDist < min) + } + else if(euDist < min) { min = euDist; minCentroid = i; From fcb4c0e51c282bee7c0a1319d7ccb2008a5b2c93 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Mon, 8 Jul 2013 21:12:21 +0400 Subject: [PATCH 08/18] fixing working with test data and a small fix for init code - set init value for `numsdev` to prevent use of uninitialized value - stop use of 'workdir' and files from samples - forcing use of 'opencv_extra' instead Note: set OPENCV_TEST_DATA_PATH to full path to 'opencv_extra/testdata' (gitolite@code.opencv.org:opencv_extra.git) before running the test! --- modules/ocl/src/initialization.cpp | 2 +- modules/ocl/test/main.cpp | 5 +---- modules/ocl/test/test_calib3d.cpp | 19 +++++++++---------- modules/ocl/test/test_canny.cpp | 3 +-- modules/ocl/test/test_moments.cpp | 2 +- modules/ocl/test/test_objdetect.cpp | 24 +++++++----------------- modules/ocl/test/test_optflow.cpp | 14 +++++++------- 7 files changed, 27 insertions(+), 42 deletions(-) diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index d1ad695ff..5d8151795 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -319,7 +319,7 @@ namespace cv char clVersion[256]; for (unsigned i = 0; i < numPlatforms; ++i) { - cl_uint numsdev; + cl_uint numsdev = 0; cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev); if(status != CL_DEVICE_NOT_FOUND) openCLVerifyCall(status); diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 4ba02cf9b..1250691a1 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -73,14 +73,12 @@ void print_info() #endif } -std::string workdir; int main(int argc, char **argv) { - TS::ptr()->init("ocl"); + TS::ptr()->init("."); InitGoogleTest(&argc, argv); const char *keys = "{ h | help | false | print help message }" - "{ w | workdir | ../../../samples/c/| set working directory }" "{ t | type | gpu | set device type:cpu or gpu}" "{ p | platform | 0 | set platform id }" "{ d | device | 0 | set device id }"; @@ -92,7 +90,6 @@ int main(int argc, char **argv) cmd.printParams(); return 0; } - workdir = cmd.get("workdir"); string type = cmd.get("type"); unsigned int pid = cmd.get("platform"); int device = cmd.get("device"); diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp index 14fb31f53..950f19d3c 100644 --- a/modules/ocl/test/test_calib3d.cpp +++ b/modules/ocl/test/test_calib3d.cpp @@ -50,7 +50,6 @@ using namespace cv; -extern std::string workdir; PARAM_TEST_CASE(StereoMatchBM, int, int) { int n_disp; @@ -66,9 +65,9 @@ PARAM_TEST_CASE(StereoMatchBM, int, int) TEST_P(StereoMatchBM, Regression) { - Mat left_image = readImage("stereobm/aloe-L.png", IMREAD_GRAYSCALE); - Mat right_image = readImage("stereobm/aloe-R.png", IMREAD_GRAYSCALE); - Mat disp_gold = readImage("stereobm/aloe-disp.png", IMREAD_GRAYSCALE); + Mat left_image = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE); + Mat right_image = readImage("gpu/stereobm/aloe-R.png", IMREAD_GRAYSCALE); + Mat disp_gold = readImage("gpu/stereobm/aloe-disp.png", IMREAD_GRAYSCALE); ocl::oclMat d_left, d_right; ocl::oclMat d_disp(left_image.size(), CV_8U); Mat disp; @@ -113,9 +112,9 @@ PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float) }; TEST_P(StereoMatchBP, Regression) { - Mat left_image = readImage("stereobp/aloe-L.png"); - Mat right_image = readImage("stereobp/aloe-R.png"); - Mat disp_gold = readImage("stereobp/aloe-disp.png", IMREAD_GRAYSCALE); + Mat left_image = readImage("gpu/stereobp/aloe-L.png"); + Mat right_image = readImage("gpu/stereobp/aloe-R.png"); + Mat disp_gold = readImage("gpu/stereobp/aloe-disp.png", IMREAD_GRAYSCALE); ocl::oclMat d_left, d_right; ocl::oclMat d_disp; Mat disp; @@ -166,9 +165,9 @@ PARAM_TEST_CASE(StereoMatchConstSpaceBP, int, int, int, int, float, float, float }; TEST_P(StereoMatchConstSpaceBP, Regression) { - Mat left_image = readImage("csstereobp/aloe-L.png"); - Mat right_image = readImage("csstereobp/aloe-R.png"); - Mat disp_gold = readImage("csstereobp/aloe-disp.png", IMREAD_GRAYSCALE); + Mat left_image = readImage("gpu/csstereobp/aloe-L.png"); + Mat right_image = readImage("gpu/csstereobp/aloe-R.png"); + Mat disp_gold = readImage("gpu/csstereobp/aloe-disp.png", IMREAD_GRAYSCALE); ocl::oclMat d_left, d_right; ocl::oclMat d_disp; diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index 10032e897..b378b2281 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -48,7 +48,6 @@ //////////////////////////////////////////////////////// // Canny -extern std::string workdir; IMPLEMENT_PARAM_CLASS(AppertureSize, int); IMPLEMENT_PARAM_CLASS(L2gradient, bool); @@ -67,7 +66,7 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient) TEST_P(Canny, Accuracy) { - cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE); + cv::Mat img = readImage("cv/shared/fruits.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(img.empty()); double low_thresh = 50.0; diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp index 86f4779d6..e3ab1fa3c 100644 --- a/modules/ocl/test/test_moments.cpp +++ b/modules/ocl/test/test_moments.cpp @@ -45,7 +45,7 @@ TEST_P(MomentsTest, Mat) { if(test_contours) { - Mat src = imread( workdir + "../cpp/pic3.png", IMREAD_GRAYSCALE ); + Mat src = readImage( "cv/shared/pic3.png", IMREAD_GRAYSCALE ); ASSERT_FALSE(src.empty()); Mat canny_output; vector > contours; diff --git a/modules/ocl/test/test_objdetect.cpp b/modules/ocl/test/test_objdetect.cpp index bc719b097..e9b571e60 100644 --- a/modules/ocl/test/test_objdetect.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -63,11 +63,8 @@ PARAM_TEST_CASE(HOG, Size, int) { winSize = GET_PARAM(0); type = GET_PARAM(1); - img_rgb = readImage(workdir + "../gpu/road.png"); - if(img_rgb.empty()) - { - std::cout << "Couldn't read road.png" << std::endl; - } + img_rgb = readImage("gpu/hog/road.png"); + ASSERT_FALSE(img_rgb.empty()); } }; @@ -211,18 +208,11 @@ PARAM_TEST_CASE(Haar, int, CascadeName) virtual void SetUp() { flags = GET_PARAM(0); - cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1)); - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) ) - { - std::cout << "ERROR: Could not load classifier cascade" << std::endl; - return; - } - img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE); - if(img.empty()) - { - std::cout << "Couldn't read lena.jpg" << std::endl; - return ; - } + cascadeName = (string(cvtest::TS::ptr()->get_data_path()) + "cv/cascadeandhog/cascades/").append(GET_PARAM(1)); + ASSERT_TRUE(cascade.load( cascadeName )); + ASSERT_TRUE(cpucascade.load(cascadeName)); + img = readImage("cv/shared/lena.png", IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); equalizeHist(img, img); d_img.upload(img); } diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index 34adb352c..941ade129 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -75,7 +75,7 @@ PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance) TEST_P(GoodFeaturesToTrack, Accuracy) { - cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame.empty()); int maxCorners = 1000; @@ -146,10 +146,10 @@ PARAM_TEST_CASE(TVL1, bool) TEST_P(TVL1, Accuracy) { - cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()); - cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; @@ -188,10 +188,10 @@ PARAM_TEST_CASE(Sparse, bool, bool) TEST_P(Sparse, Mat) { - cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); ASSERT_FALSE(frame0.empty()); - cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); ASSERT_FALSE(frame1.empty()); cv::Mat gray_frame; @@ -301,10 +301,10 @@ PARAM_TEST_CASE(Farneback, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow) TEST_P(Farneback, Accuracy) { - cv::Mat frame0 = imread(workdir + "/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()); - cv::Mat frame1 = imread(workdir + "/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); double polySigma = polyN <= 5 ? 1.1 : 1.5; From f77d1f57eee1a8b9160b5bd2e312736bd6294b9e Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 9 Jul 2013 16:07:55 +0400 Subject: [PATCH 09/18] V4L and V4L2 based Video capture bug fix (Bugfix #3144). --- modules/highgui/src/cap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index cc92da3d0..0d0fd41dd 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -220,8 +220,8 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) return capture; break; #endif -#ifdef HAVE_VFW case CV_CAP_VFW: +#ifdef HAVE_VFW capture = cvCreateCameraCapture_VFW (index); if (capture) return capture; From 5032240c9d2aa9edbbec6978add572213e26af2f Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 10 Jul 2013 14:51:40 +0800 Subject: [PATCH 10/18] Fix ocl gftt file name typo. --- modules/ocl/src/{gfft.cpp => gftt.cpp} | 10 +++++----- .../src/opencl/{imgproc_gfft.cl => imgproc_gftt.cl} | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename modules/ocl/src/{gfft.cpp => gftt.cpp} (97%) rename modules/ocl/src/opencl/{imgproc_gfft.cl => imgproc_gftt.cl} (100%) diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gftt.cpp similarity index 97% rename from modules/ocl/src/gfft.cpp rename to modules/ocl/src/gftt.cpp index 7fd5e3a17..06844177a 100644 --- a/modules/ocl/src/gfft.cpp +++ b/modules/ocl/src/gftt.cpp @@ -55,7 +55,7 @@ namespace cv namespace ocl { ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *imgproc_gfft; + extern const char *imgproc_gftt; } } @@ -133,7 +133,7 @@ struct Sorter for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) { args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); - openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); } } } @@ -160,12 +160,12 @@ struct Sorter args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); args.push_back( std::make_pair( lds_size, (void*)NULL) ); - openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); //final kernelname = "sortCorners_selectionSortFinal"; args.pop_back(); - openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); } }; @@ -201,7 +201,7 @@ int findCorners_caller( size_t localThreads[3] = {16, 16, 1}; const char * opt = mask.empty() ? "" : "-D WITH_MASK"; - openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt); + openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt); return std::min(Mat(g_counter).at(0), max_count); } }//unnamed namespace diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gftt.cl similarity index 100% rename from modules/ocl/src/opencl/imgproc_gfft.cl rename to modules/ocl/src/opencl/imgproc_gftt.cl From 63d151a29c08d3fa0027224b197a2ec53edb4137 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 5 Jul 2013 16:10:28 +0400 Subject: [PATCH 11/18] xls-report.py: add ability to specify arbitrary sheet properties --- modules/ts/misc/xls-report.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index 131f3fab5..b0839581c 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -64,6 +64,10 @@ Name for the sheet. If this parameter is missing, the name of sheet's directory will be used. + * 'sheet_properties': [(string, string)] + List of arbitrary (key, value) pairs that somehow describe the sheet. Will be + dumped into the first row of the sheet in string form. + Note that all keys are optional, although to get useful results, you'll want to specify at least 'configurations' and 'configuration_matchers'. @@ -231,24 +235,32 @@ def main(): sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) - sheet.row(0).height = 800 + sheet_properties = sheet_conf.get('sheet_properties', []) + + sheet.write(0, 0, 'Properties:') + + sheet.write(0, 1, + 'N/A' if len(sheet_properties) == 0 else + ' '.join(str(k) + '=' + repr(v) for (k, v) in sheet_properties)) + + sheet.row(2).height = 800 sheet.panes_frozen = True sheet.remove_splits = True - sheet.horz_split_pos = 1 - sheet.horz_split_first_visible = 1 + sheet.horz_split_pos = 3 + sheet.horz_split_first_visible = 3 sheet_comparisons = sheet_conf.get('comparisons', []) - for i, w in enumerate([2000, 15000, 2500, 2000, 15000] + for i, w in enumerate([2500, 15000, 2500, 2000, 15000] + (len(config_names) + 1 + len(sheet_comparisons)) * [4000]): sheet.col(i).width = w for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + config_names + [None] + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): - sheet.row(0).write(i, caption, header_style) + sheet.row(2).write(i, caption, header_style) - row = 1 + row = 3 module_colors = sheet_conf.get('module_colors', {}) module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) From 1080c4295ae74dfcd3a05f82e8a64104e4f9d80d Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 5 Jul 2013 18:41:03 +0400 Subject: [PATCH 12/18] xls-report.py: removed image size and type from the list of other parameters. Also, shrunk the corresponding column and the test name column. --- modules/ts/misc/xls-report.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index b0839581c..b12048431 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -251,11 +251,11 @@ def main(): sheet_comparisons = sheet_conf.get('comparisons', []) - for i, w in enumerate([2500, 15000, 2500, 2000, 15000] + for i, w in enumerate([2500, 10000, 2500, 2000, 7500] + (len(config_names) + 1 + len(sheet_comparisons)) * [4000]): sheet.col(i).width = w - for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Other parameters'] + config_names + [None] + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): sheet.row(2).write(i, caption, header_style) @@ -271,11 +271,19 @@ def main(): sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) sheet.write(row, 1, test) - param_list = param[1:-1].split(", ") - sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) - sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + param_list = param[1:-1].split(', ') if param.startswith('(') and param.endswith(')') else [param] - sheet.row(row).write(4, param) + image_size = next(ifilter(re_image_size.match, param_list), None) + if image_size is not None: + sheet.write(row, 2, image_size) + del param_list[param_list.index(image_size)] + + data_type = next(ifilter(re_data_type.match, param_list), None) + if data_type is not None: + sheet.write(row, 3, data_type) + del param_list[param_list.index(data_type)] + + sheet.row(row).write(4, ' | '.join(param_list)) for i, c in enumerate(config_names): if c in configs: sheet.write(row, 5 + i, configs[c], time_style) From 215b3e749fe35577bfd5789fcb1977e681cde8a4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 5 Jul 2013 19:05:42 +0400 Subject: [PATCH 13/18] Added to the test log parser a crude ability to detect non-implemented tests. --- modules/ts/misc/testlog_parser.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 5d478645b..841ad2e13 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -13,10 +13,17 @@ class TestInfo(object): self.name = xmlnode.getAttribute("name") self.value_param = xmlnode.getAttribute("value_param") self.type_param = xmlnode.getAttribute("type_param") - if xmlnode.getElementsByTagName("failure"): - self.status = "failed" + + failures = xmlnode.getElementsByTagName("failure") + if len(failures) > 0: + if any("No equivalent implementation" in f.getAttribute("message") + for f in failures): + self.status = "notimpl" + else: + self.status = "failed" else: self.status = xmlnode.getAttribute("status") + if self.name.startswith("DISABLED_"): self.status = "disabled" self.fixture = self.fixture.replace("DISABLED_", "") From 5b2dc26f2ce9be1227910cfa45292fbfead73ab8 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 9 Jul 2013 18:57:22 +0400 Subject: [PATCH 14/18] Made the crude ability less crude. --- modules/ts/include/opencv2/ts/ts.hpp | 7 +++++++ modules/ts/misc/testlog_parser.py | 12 ++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/modules/ts/include/opencv2/ts/ts.hpp b/modules/ts/include/opencv2/ts/ts.hpp index fcef5896c..ea0c97911 100644 --- a/modules/ts/include/opencv2/ts/ts.hpp +++ b/modules/ts/include/opencv2/ts/ts.hpp @@ -578,6 +578,13 @@ int main(int argc, char **argv) \ return RUN_ALL_TESTS(); \ } +// This usually only makes sense in perf tests with several implementations, +// some of which are not available. +#define CV_TEST_FAIL_NO_IMPL() do { \ + ::testing::Test::RecordProperty("custom_status", "noimpl"); \ + FAIL() << "No equivalent implementation."; \ +} while (0) + #endif #include "ts_perf.hpp" diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 841ad2e13..4ab0a3ef2 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -14,13 +14,13 @@ class TestInfo(object): self.value_param = xmlnode.getAttribute("value_param") self.type_param = xmlnode.getAttribute("type_param") + custom_status = xmlnode.getAttribute("custom_status") failures = xmlnode.getElementsByTagName("failure") - if len(failures) > 0: - if any("No equivalent implementation" in f.getAttribute("message") - for f in failures): - self.status = "notimpl" - else: - self.status = "failed" + + if len(custom_status) > 0: + self.status = custom_status + elif len(failures) > 0: + self.status = "failed" else: self.status = xmlnode.getAttribute("status") From ea3239a00efa23e2017c936d0219cfa2708ff616 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 10 Jul 2013 14:50:51 +0400 Subject: [PATCH 15/18] xls-report.py: Added an option to show per-pixel times --- modules/ts/misc/xls-report.py | 91 ++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 17 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index b12048431..d5db73d48 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -104,6 +104,7 @@ bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') no_speedup_style = no_time_style error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True') +subheader_style = xlwt.easyxf('alignment: horizontal centre, vertical top') class Collector(object): def __init__(self, config_match_func, include_unmatched): @@ -193,6 +194,8 @@ def main(): arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file') arg_parser.add_argument('--include-unmatched', action='store_true', help='include results from XML files that were not recognized by configuration matchers') + arg_parser.add_argument('--show-times-per-pixel', action='store_true', + help='for tests that have an image size parameter, show per-pixel time, as well as total time') args = arg_parser.parse_args() @@ -246,21 +249,53 @@ def main(): sheet.row(2).height = 800 sheet.panes_frozen = True sheet.remove_splits = True - sheet.horz_split_pos = 3 - sheet.horz_split_first_visible = 3 sheet_comparisons = sheet_conf.get('comparisons', []) - for i, w in enumerate([2500, 10000, 2500, 2000, 7500] - + (len(config_names) + 1 + len(sheet_comparisons)) * [4000]): - sheet.col(i).width = w + row = 2 - for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Other parameters'] - + config_names + [None] - + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): - sheet.row(2).write(i, caption, header_style) + col = 0 - row = 3 + for (w, caption) in [ + (2500, 'Module'), + (10000, 'Test'), + (2500, 'Image\nsize'), + (2000, 'Data\ntype'), + (7500, 'Other parameters')]: + sheet.col(col).width = w + if args.show_times_per_pixel: + sheet.write_merge(row, row + 1, col, col, caption, header_style) + else: + sheet.write(row, col, caption, header_style) + col += 1 + + for config_name in config_names: + if args.show_times_per_pixel: + sheet.col(col).width = 3000 + sheet.col(col + 1).width = 3000 + sheet.write_merge(row, row, col, col + 1, config_name, header_style) + sheet.write(row + 1, col, 'total, ms', subheader_style) + sheet.write(row + 1, col + 1, 'per pixel, ns', subheader_style) + col += 2 + else: + sheet.col(col).width = 4000 + sheet.write(row, col, config_name, header_style) + col += 1 + + col += 1 # blank column between configurations and comparisons + + for comp in sheet_comparisons: + sheet.col(col).width = 4000 + caption = comp['to'] + '\nvs\n' + comp['from'] + if args.show_times_per_pixel: + sheet.write_merge(row, row + 1, col, col, caption, header_style) + else: + sheet.write(row, col, caption, header_style) + + row += 2 if args.show_times_per_pixel else 1 + + sheet.horz_split_pos = row + sheet.horz_split_first_visible = row module_colors = sheet_conf.get('module_colors', {}) module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) @@ -284,16 +319,36 @@ def main(): del param_list[param_list.index(data_type)] sheet.row(row).write(4, ' | '.join(param_list)) - for i, c in enumerate(config_names): - if c in configs: - sheet.write(row, 5 + i, configs[c], time_style) - else: - sheet.write(row, 5 + i, None, no_time_style) - for i, comp in enumerate(sheet_comparisons): + col = 5 + + for c in config_names: + if c in configs: + sheet.write(row, col, configs[c], time_style) + else: + sheet.write(row, col, None, no_time_style) + col += 1 + if args.show_times_per_pixel: + sheet.write(row, col, + xlwt.Formula( + ''' + {0} * 1000000 / ( + VALUE(MID({1}; 1; SEARCH("x"; {1}) - 1)) + * VALUE(MID({1}; SEARCH("x"; {1}) + 1; LEN({1}))) + ) + '''.replace('\n', '').replace(' ', '').format( + xlwt.Utils.rowcol_to_cell(row, col - 1), + xlwt.Utils.rowcol_to_cell(row, 2) + ) + ), + time_style) + col += 1 + + col += 1 # blank column + + for comp in sheet_comparisons: cmp_from = configs.get(comp["from"]) cmp_to = configs.get(comp["to"]) - col = 5 + len(config_names) + 1 + i if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number): try: @@ -306,6 +361,8 @@ def main(): else: sheet.write(row, col, None, no_speedup_style) + col += 1 + row += 1 if row % 1000 == 0: sheet.flush_row_data() From 2b2e02166ef81973e040ff36e9812d8e83599380 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Wed, 10 Jul 2013 15:12:39 +0400 Subject: [PATCH 16/18] setting version to 2.4.6.1 (hot-fix release for Linux camera support) --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 0a7760873..ba71a8259 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -50,7 +50,7 @@ #define CV_VERSION_EPOCH 2 #define CV_VERSION_MAJOR 4 #define CV_VERSION_MINOR 6 -#define CV_VERSION_REVISION 0 +#define CV_VERSION_REVISION 1 #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) From 05aeb7083114f5cb72cefc433a987ecfc19d6cad Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Wed, 10 Jul 2013 17:43:47 +0400 Subject: [PATCH 17/18] Fixed VideoCapture::open() does not release previous capture sources (Bug #3150). VideoCapture didn't call release method and just ignored the new capture sources. OpenCV documentation: bool VideoCapture::open(const string& filename); bool VideoCapture::open(int device); The methods first call VideoCapture::release() to close the already opened file or camera. --- modules/highgui/src/cap.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 0d0fd41dd..b5cdc5e9f 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -489,14 +489,14 @@ VideoCapture::~VideoCapture() bool VideoCapture::open(const string& filename) { - if (!isOpened()) + if (isOpened()) release(); cap = cvCreateFileCapture(filename.c_str()); return isOpened(); } bool VideoCapture::open(int device) { - if (!isOpened()) + if (isOpened()) release(); cap = cvCreateCameraCapture(device); return isOpened(); } From c55dc3e8ab493f4c98a339389f3e8640455042c6 Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Thu, 11 Jul 2013 16:06:25 +0400 Subject: [PATCH 18/18] Fixed "FileStorage (minor) error in doc (Feature #3151)" It was just a typo in the documentation. --- modules/core/doc/xml_yaml_persistence.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/doc/xml_yaml_persistence.rst b/modules/core/doc/xml_yaml_persistence.rst index 28bae2450..42fa40161 100644 --- a/modules/core/doc/xml_yaml_persistence.rst +++ b/modules/core/doc/xml_yaml_persistence.rst @@ -11,7 +11,7 @@ You can store and then restore various OpenCV data structures to/from XML (http: Use the following procedure to write something to XML or YAML: #. Create new :ocv:class:`FileStorage` and open it for writing. It can be done with a single call to :ocv:func:`FileStorage::FileStorage` constructor that takes a filename, or you can use the default constructor and then call :ocv:func:`FileStorage::open`. Format of the file (XML or YAML) is determined from the filename extension (".xml" and ".yml"/".yaml", respectively) - #. Write all the data you want using the streaming operator ``>>``, just like in the case of STL streams. + #. Write all the data you want using the streaming operator ``<<``, just like in the case of STL streams. #. Close the file using :ocv:func:`FileStorage::release`. ``FileStorage`` destructor also closes the file. Here is an example: ::