update some of the functions in ocl module to the latest version

This commit is contained in:
yao 2012-09-03 17:03:37 +08:00
parent 3fb3851c7a
commit 0fdb55a54d
24 changed files with 2931 additions and 1728 deletions

View File

@ -901,7 +901,7 @@ namespace cv
oclMat dx_buf, dy_buf; oclMat dx_buf, dy_buf;
oclMat edgeBuf; oclMat edgeBuf;
oclMat trackBuf1, trackBuf2; oclMat trackBuf1, trackBuf2;
oclMat counter; void * counter;
Ptr<FilterEngine_GPU> filterDX, filterDY; Ptr<FilterEngine_GPU> filterDX, filterDY;
}; };
@ -981,6 +981,9 @@ namespace cv
int nlevels; int nlevels;
protected: protected:
// initialize buffers; only need to do once in case of multiscale detection
void init_buffer(const oclMat& img, Size win_stride);
void computeBlockHistograms(const oclMat& img); void computeBlockHistograms(const oclMat& img);
void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle); void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
@ -1004,7 +1007,11 @@ namespace cv
// Gradients conputation results // Gradients conputation results
oclMat grad, qangle; oclMat grad, qangle;
std::vector<oclMat> image_scales; // scaled image
oclMat image_scale;
// effect size of input image (might be different from original size after scaling)
Size effect_size;
}; };
//! Speeded up robust features, port from GPU module. //! Speeded up robust features, port from GPU module.

View File

@ -0,0 +1,122 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(Blend, MatType, int)
{
int type;
int channels;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH);
}
};
TEST_P(Blend, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
double totalgputick_all = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
{
t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2;
cv::Mat m;
gdst.download(m);
t1 = (double)cvGetTickCount() - t1;
if (j == 0)
{
continue;
}
totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel;
};
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif

View File

@ -0,0 +1,155 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#define FILTER_IMAGE "../../../samples/gpu/road.png"
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////
// Canny1
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
{
int apperture_size;
bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(Canny1, Performance)
{
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif //Have opencl

View File

@ -0,0 +1,120 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai fangfang@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
///////////////////////////////////////////////////////////////////////////////
/// ColumnSum
#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////
// ColumnSum
PARAM_TEST_CASE(ColumnSum)
{
cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_F(ColumnSum, Performance)
{
cv::Size size(MWIDTH,MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_src(src);
t2=(double)cvGetTickCount();//kernel
cv::ocl::columnSum(d_src,d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#endif

View File

@ -0,0 +1,126 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfangbai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
{
cv::Size dft_size;
bool dft_rows;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info);
}
};
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_b.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)),
// testing::Values(false, true)));
#endif // HAVE_CLAMDFFT

View File

@ -0,0 +1,113 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Performance)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif

View File

@ -0,0 +1,218 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang BAI, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencv2/core/core.hpp"
#include <iomanip>
using namespace std;
#ifdef HAVE_OPENCL
PARAM_TEST_CASE(HOG,cv::Size,int)
{
cv::Size winSize;
int type;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
winSize = GET_PARAM(0);
type = GET_PARAM(1);
int devnums = getDevice(oclinfo);
CV_Assert(devnums > 0);
}
};
TEST_P(HOG, GetDescriptors)
{
// Load image
cv::Mat img_rgb = readImage("D:road.png");
ASSERT_FALSE(img_rgb.empty());
// Convert image
cv::Mat img;
switch (type)
{
case CV_8UC1:
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
break;
case CV_8UC4:
default:
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
break;
}
// HOGs
cv::ocl::HOGDescriptor ocl_hog;
ocl_hog.gamma_correction = true;
// Compute descriptor
cv::ocl::oclMat d_descriptors;
//down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
t2=(double)cvGetTickCount();//kernel
ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat down_descriptors;
d_descriptors.download(down_descriptors);
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
TEST_P(HOG, Detect)
{
// Load image
cv::Mat img_rgb = readImage("D:road.png");
ASSERT_FALSE(img_rgb.empty());
// Convert image
cv::Mat img;
switch (type)
{
case CV_8UC1:
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
break;
case CV_8UC4:
default:
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
break;
}
// HOGs
if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
winSize = cv::Size(64, 128);
cv::ocl::HOGDescriptor ocl_hog(winSize);
ocl_hog.gamma_correction = true;
cv::HOGDescriptor hog;
hog.winSize = winSize;
hog.gammaCorrection = true;
if (winSize.width == 48 && winSize.height == 96)
{
// daimler's base
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
hog.setSVMDetector(hog.getDaimlerPeopleDetector());
}
else if (winSize.width == 64 && winSize.height == 128)
{
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
hog.setSVMDetector(hog.getDefaultPeopleDetector());
}
else
{
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
hog.setSVMDetector(hog.getDefaultPeopleDetector());
}
// OpenCL detection
std::vector<cv::Point> d_v_locations;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
t2=(double)cvGetTickCount();//kernel
ocl_hog.detect(d_img, d_v_locations, 0);
t2 = (double)cvGetTickCount() - t2;//kernel
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
#endif //HAVE_OPENCL

View File

@ -0,0 +1,232 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
//////// Utility
#ifndef DIFFERENT_SIZES
#else
#undef DIFFERENT_SIZES
#endif
#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000))
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size;
cv::Size templ_size;
int cn;
int method;
//vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
struct MatchTemplate8U : MatchTemplate {};
TEST_P(MatchTemplate8U, Performance)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
struct MatchTemplate32F : MatchTemplate {};
TEST_P(MatchTemplate32F, Performance)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif //HAVE_OPENCL

View File

@ -0,0 +1,137 @@
///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, int)
{
int type;
int channels;
//src mat
cv::Mat mat1;
cv::Mat dst;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gdst;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
#define VARNAME(A) string(#A);
////////////////////////////////PyrDown/////////////////////////////////////////////////
TEST_P(PyrDown, Mat)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng();
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
cv::ocl::oclMat gdst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat gmat1(mat1);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrDown(gmat1, gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst.download(cpu_dst);
t1 = (double)cvGetTickCount() - t1;//gpu end1
if (j == 0)
{
continue;
}
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL

View File

@ -0,0 +1,122 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/core.hpp"
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp, MatType, int)
{
int type;
int channels;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(PyrUp, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrUp(srcMat, dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download(cpu_dst); //download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if (j == 0)
{
continue;
}
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL

View File

@ -88,11 +88,11 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
Mat kx, ky; Mat kx, ky;
if (!filterDX) if (!filterDX)
{ {
filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE); filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
} }
if (!filterDY) if (!filterDY)
{ {
filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE); filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
} }
} }
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1); edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
@ -100,7 +100,10 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2); trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2); trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
counter.create(1,1, CV_32SC1); float counter_f [1] = { 0 };
int err = 0;
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
openCLSafeCall(err);
} }
void cv::ocl::CannyBuf::release() void cv::ocl::CannyBuf::release()
@ -112,7 +115,7 @@ void cv::ocl::CannyBuf::release()
edgeBuf.release(); edgeBuf.release();
trackBuf1.release(); trackBuf1.release();
trackBuf2.release(); trackBuf2.release();
counter.release(); openCLFree(counter);
} }
namespace cv { namespace ocl { namespace cv { namespace ocl {
@ -125,9 +128,9 @@ namespace cv { namespace ocl {
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh); void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols); void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols); void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols); void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
} }
@ -164,11 +167,10 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
std::swap( low_thresh, high_thresh ); std::swap( low_thresh, high_thresh );
dst.create(src.size(), CV_8U); dst.create(src.size(), CV_8U);
dst.setTo(Scalar::all(0)); //dst.setTo(Scalar::all(0));
buf.create(src.size(), apperture_size); buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0)); //buf.edgeBuf.setTo(Scalar::all(0));
buf.counter.setTo(Scalar::all(0));
if (apperture_size == 3) if (apperture_size == 3)
{ {
@ -178,17 +180,8 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
} }
else else
{ {
// FIXME: buf.filterDX->apply(src, buf.dx);
// current ocl implementation requires the src and dst having same type buf.filterDY->apply(src, buf.dy);
// convertTo is time consuming so this may be optimized later.
oclMat src_omat32f = src;
src.convertTo(src_omat32f, CV_32F); // FIXME
buf.filterDX->apply(src_omat32f, buf.dx);
buf.filterDY->apply(src_omat32f, buf.dy);
buf.dx.convertTo(buf.dx, CV_32S); // FIXME
buf.dy.convertTo(buf.dy, CV_32S); // FIXME
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
} }
@ -210,12 +203,11 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
std::swap( low_thresh, high_thresh); std::swap( low_thresh, high_thresh);
dst.create(dx.size(), CV_8U); dst.create(dx.size(), CV_8U);
dst.setTo(Scalar::all(0)); //dst.setTo(Scalar::all(0));
buf.dx = dx; buf.dy = dy; buf.dx = dx; buf.dy = dy;
buf.create(dx.size(), -1); buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0)); //buf.edgeBuf.setTo(Scalar::all(0));
buf.counter.setTo(Scalar::all(0));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh)); CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
@ -342,7 +334,7 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols) void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
{ {
Context *clCxt = map.clCxt; Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisLocal"; string kernelName = "edgesHysteresisLocal";
@ -350,7 +342,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter,
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
@ -362,10 +354,10 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter,
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols) void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
{ {
unsigned int count = Mat(counter).at<unsigned int>(0); unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
Context *clCxt = map.clCxt; Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal"; string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
@ -375,13 +367,13 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, ocl
while(count > 0) while(count > 0)
{ {
counter.setTo(0); //counter.setTo(0);
args.clear(); args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&count)); args.push_back( make_pair( sizeof(cl_int), (void *)&count));
@ -389,7 +381,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, ocl
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
count = Mat(counter).at<unsigned int>(0); openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
std::swap(st1, st2); std::swap(st1, st2);
} }
#undef DIVUP #undef DIVUP

View File

@ -67,7 +67,9 @@ namespace cv
void cv::ocl::columnSum(const oclMat& src,oclMat& dst) void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
{ {
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size()); CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;

View File

@ -119,6 +119,8 @@ namespace cv { namespace ocl { namespace device
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img, void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
void resize( const oclMat &src, oclMat &dst, const Size sz);
} }
}}} }}}
@ -150,6 +152,8 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
effect_size = Size(0, 0);
} }
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -199,22 +203,37 @@ void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
CV_Assert(checkDetectorSize()); CV_Assert(checkDetectorSize());
} }
void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride)
{
if (!image_scale.empty())
return;
if (effect_size == Size(0, 0))
effect_size = img.size();
grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2);
const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
labels.create(1, wins_per_img.area(), CV_8U);
}
void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle) void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2);
float angleScale = (float)(nbins / CV_PI); float angleScale = (float)(nbins / CV_PI);
switch (img.type()) switch (img.type())
{ {
case CV_8UC1: case CV_8UC1:
hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
break; break;
case CV_8UC4: case CV_8UC4:
hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
break; break;
} }
} }
@ -224,14 +243,11 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img)
{ {
computeGradient(img, grad, qangle); computeGradient(img, grad, qangle);
size_t block_hist_size = getBlockHistogramSize(); hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); grad, qangle, (float)getWinSigma(), block_hists);
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F); hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
block_hists, (float)threshold_L2hys);
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists);
hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys);
} }
@ -239,11 +255,13 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
{ {
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
computeBlockHistograms(img); computeBlockHistograms(img);
const size_t block_hist_size = getBlockHistogramSize(); const size_t block_hist_size = getBlockHistogramSize();
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F); descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
@ -251,11 +269,11 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
{ {
case DESCR_FORMAT_ROW_BY_ROW: case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors); win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break; break;
case DESCR_FORMAT_COL_BY_COL: case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors); win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break; break;
default: default:
CV_Error(CV_StsBadArg, "Unknown descriptor format"); CV_Error(CV_StsBadArg, "Unknown descriptor format");
@ -272,22 +290,21 @@ void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& hits, doub
if (detector.empty()) if (detector.empty())
return; return;
computeBlockHistograms(img);
if (win_stride == Size()) if (win_stride == Size())
win_stride = block_stride; win_stride = block_stride;
else else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); computeBlockHistograms(img);
labels.create(1, wins_per_img.area(), CV_8U);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
detector, (float)free_coef, (float)hit_threshold, labels); detector, (float)free_coef, (float)hit_threshold, labels);
labels.download(labels_host); labels.download(labels_host);
unsigned char* vec = labels_host.ptr(); unsigned char* vec = labels_host.ptr();
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
for (int i = 0; i < wins_per_img.area(); i++) for (int i = 0; i < wins_per_img.area(); i++)
{ {
int y = i / wins_per_img.width; int y = i / wins_per_img.width;
@ -303,6 +320,7 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
Size win_stride, Size padding, double scale0, int group_threshold) Size win_stride, Size padding, double scale0, int group_threshold)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(scale0 > 1);
vector<double> level_scale; vector<double> level_scale;
double scale = 1.; double scale = 1.;
@ -318,27 +336,30 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
} }
levels = std::max(levels, 1); levels = std::max(levels, 1);
level_scale.resize(levels); level_scale.resize(levels);
image_scales.resize(levels);
std::vector<Rect> all_candidates; std::vector<Rect> all_candidates;
vector<Point> locations; vector<Point> locations;
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
image_scale.create(img.size(), img.type());
for (size_t i = 0; i < level_scale.size(); i++) for (size_t i = 0; i < level_scale.size(); i++)
{ {
scale = level_scale[i]; scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
oclMat smaller_img; if (effect_size == img.size())
{
if (sz == img.size()) detect(img, locations, hit_threshold, win_stride, padding);
smaller_img = img; }
else else
{ {
image_scales[i].create(sz, img.type()); hog::resize( img, image_scale, effect_size);
resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR); detect(image_scale, locations, hit_threshold, win_stride, padding);
smaller_img = image_scales[i];
} }
detect(smaller_img, locations, hit_threshold, win_stride, padding);
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++) for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
@ -1784,4 +1805,36 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
{
CV_Assert( (src.channels() == dst.channels()) );
Context *clCxt = Context::getContext();
string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
float ifx = (float)src.cols / sz.width;
float ify = (float)src.rows / sz.height;
vector< pair<size_t, const void *> > args;
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height));
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
#endif #endif

View File

@ -67,32 +67,6 @@ __kernel void BlendLinear_C1_D0(
} }
} }
__kernel void BlendLinear_C3_D0(
__global uchar *dst,
__global uchar *img1,
__global uchar *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 3;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * istep + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C4_D0( __kernel void BlendLinear_C4_D0(
__global uchar *dst, __global uchar *dst,
__global uchar *img1, __global uchar *img1,
@ -143,32 +117,6 @@ __kernel void BlendLinear_C1_D5(
} }
} }
__kernel void BlendLinear_C3_D5(
__global float *dst,
__global float *img1,
__global float *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 3;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * (istep / sizeof(float)) + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C4_D5( __kernel void BlendLinear_C4_D5(
__global float *dst, __global float *dst,
__global float *img1, __global float *img1,
@ -194,3 +142,4 @@ __kernel void BlendLinear_C4_D5(
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
} }
} }

View File

@ -70,8 +70,8 @@ inline float calc(int x, int y)
// dx_buf output dx buffer // dx_buf output dx buffer
// dy_buf output dy buffer // dy_buf output dy buffer
__kernel __kernel
void calcSobelRowPass void calcSobelRowPass
( (
__global const uchar * src, __global const uchar * src,
__global int * dx_buf, __global int * dx_buf,
__global int * dy_buf, __global int * dy_buf,
@ -83,7 +83,7 @@ void calcSobelRowPass
int dx_buf_offset, int dx_buf_offset,
int dy_buf_step, int dy_buf_step,
int dy_buf_offset int dy_buf_offset
) )
{ {
//src_step /= sizeof(*src); //src_step /= sizeof(*src);
//src_offset /= sizeof(*src); //src_offset /= sizeof(*src);
@ -128,8 +128,8 @@ void calcSobelRowPass
// dy direvitive in y direction output // dy direvitive in y direction output
// mag magnitude direvitive of xy output // mag magnitude direvitive of xy output
__kernel __kernel
void calcMagnitude_buf void calcMagnitude_buf
( (
__global const int * dx_buf, __global const int * dx_buf,
__global const int * dy_buf, __global const int * dy_buf,
__global int * dx, __global int * dx,
@ -147,7 +147,7 @@ void calcMagnitude_buf
int dy_offset, int dy_offset,
int mag_step, int mag_step,
int mag_offset int mag_offset
) )
{ {
dx_buf_step /= sizeof(*dx_buf); dx_buf_step /= sizeof(*dx_buf);
dx_buf_offset /= sizeof(*dx_buf); dx_buf_offset /= sizeof(*dx_buf);
@ -205,8 +205,8 @@ void calcMagnitude_buf
// dy direvitive in y direction output // dy direvitive in y direction output
// mag magnitude direvitive of xy output // mag magnitude direvitive of xy output
__kernel __kernel
void calcMagnitude void calcMagnitude
( (
__global const int * dx, __global const int * dx,
__global const int * dy, __global const int * dy,
__global float * mag, __global float * mag,
@ -218,7 +218,7 @@ void calcMagnitude
int dy_offset, int dy_offset,
int mag_step, int mag_step,
int mag_offset int mag_offset
) )
{ {
dx_step /= sizeof(*dx); dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx); dx_offset /= sizeof(*dx);
@ -261,8 +261,8 @@ void calcMagnitude
// mag magnitudes calculated from calcMagnitude function // mag magnitudes calculated from calcMagnitude function
// map output containing raw edge types // map output containing raw edge types
__kernel __kernel
void calcMap void calcMap
( (
__global const int * dx, __global const int * dx,
__global const int * dy, __global const int * dy,
__global const float * mag, __global const float * mag,
@ -279,7 +279,7 @@ void calcMap
int mag_offset, int mag_offset,
int map_step, int map_step,
int map_offset int map_offset
) )
{ {
dx_step /= sizeof(*dx); dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx); dx_offset /= sizeof(*dx);
@ -361,8 +361,8 @@ void calcMap
// non local memory version // non local memory version
__kernel __kernel
void calcMap_2 void calcMap_2
( (
__global const int * dx, __global const int * dx,
__global const int * dy, __global const int * dy,
__global const float * mag, __global const float * mag,
@ -379,7 +379,7 @@ void calcMap_2
int mag_offset, int mag_offset,
int map_step, int map_step,
int map_offset int map_offset
) )
{ {
dx_step /= sizeof(*dx); dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx); dx_offset /= sizeof(*dx);
@ -440,8 +440,8 @@ void calcMap_2
// [256, 1, 1] threaded, local memory version // [256, 1, 1] threaded, local memory version
__kernel __kernel
void calcMap_3 void calcMap_3
( (
__global const int * dx, __global const int * dx,
__global const int * dy, __global const int * dy,
__global const float * mag, __global const float * mag,
@ -458,7 +458,7 @@ void calcMap_3
int mag_offset, int mag_offset,
int map_step, int map_step,
int map_offset int map_offset
) )
{ {
dx_step /= sizeof(*dx); dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx); dx_offset /= sizeof(*dx);
@ -556,8 +556,8 @@ void calcMap_3
// st the potiential edge points found in this kernel call // st the potiential edge points found in this kernel call
// counter the number of potiential edge points // counter the number of potiential edge points
__kernel __kernel
void edgesHysteresisLocal void edgesHysteresisLocal
( (
__global int * map, __global int * map,
__global ushort2 * st, __global ushort2 * st,
volatile __global unsigned int * counter, volatile __global unsigned int * counter,
@ -565,7 +565,7 @@ void edgesHysteresisLocal
int cols, int cols,
int map_step, int map_step,
int map_offset int map_offset
) )
{ {
map_step /= sizeof(*map); map_step /= sizeof(*map);
map_offset /= sizeof(*map); map_offset /= sizeof(*map);
@ -599,7 +599,7 @@ void edgesHysteresisLocal
{ {
int n; int n;
#pragma unroll #pragma unroll
for (int k = 0; k < 16; ++k) for (int k = 0; k < 16; ++k)
{ {
n = 0; n = 0;
@ -653,8 +653,8 @@ __constant c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
#define stack_size 512 #define stack_size 512
__kernel __kernel
void edgesHysteresisGlobal void edgesHysteresisGlobal
( (
__global int * map, __global int * map,
__global ushort2 * st1, __global ushort2 * st1,
__global ushort2 * st2, __global ushort2 * st2,
@ -664,7 +664,7 @@ void edgesHysteresisGlobal
int count, int count,
int map_step, int map_step,
int map_offset int map_offset
) )
{ {
map_step /= sizeof(*map); map_step /= sizeof(*map);
@ -684,6 +684,12 @@ void edgesHysteresisGlobal
__local ushort2 s_st[stack_size]; __local ushort2 s_st[stack_size];
if(gidx + gidy == 0)
{
*counter = 0;
}
barrier(CLK_GLOBAL_MEM_FENCE);
if(lidx == 0) if(lidx == 0)
{ {
s_counter = 0; s_counter = 0;
@ -770,8 +776,8 @@ void edgesHysteresisGlobal
// map edge type mappings // map edge type mappings
// dst edge output // dst edge output
__kernel __kernel
void getEdges void getEdges
( (
__global const int * map, __global const int * map,
__global uchar * dst, __global uchar * dst,
int rows, int rows,
@ -780,7 +786,7 @@ void getEdges
int map_offset, int map_offset,
int dst_step, int dst_step,
int dst_offset int dst_offset
) )
{ {
map_step /= sizeof(*map); map_step /= sizeof(*map);
map_offset /= sizeof(*map); map_offset /= sizeof(*map);

View File

@ -58,17 +58,9 @@
// Image read mode // Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
#define FLT_EPSILON (1e-15)
#define CV_PI_F 3.14159265f #define CV_PI_F 3.14159265f
// print greyscale image to show image layout
__kernel void printImage(image2d_t img)
{
printf("(%d, %d) - %3d \n",
get_global_id(0),
get_global_id(1),
read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x
);
}
// Use integral image to calculate haar wavelets. // Use integral image to calculate haar wavelets.
// N = 2 // N = 2
@ -444,7 +436,6 @@ __kernel
float val0 = N9[localLin]; float val0 = N9[localLin];
if (val0 > c_hessianThreshold) if (val0 > c_hessianThreshold)
{ {
//printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0);
// Coordinates for the start of the wavelet in the sum image. There // Coordinates for the start of the wavelet in the sum image. There
// is some integer division involved, so don't try to simplify this // is some integer division involved, so don't try to simplify this
// (cancel out sampleStep) without checking the result is the same // (cancel out sampleStep) without checking the result is the same
@ -726,6 +717,7 @@ __kernel
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step; __global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step; __global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
volatile __local float s_X[128]; volatile __local float s_X[128];
volatile __local float s_Y[128]; volatile __local float s_Y[128];
volatile __local float s_angle[128]; volatile __local float s_angle[128];
@ -737,6 +729,7 @@ __kernel
and building the keypoint descriptor are defined relative to 's' */ and building the keypoint descriptor are defined relative to 's' */
const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f; const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
/* To find the dominant orientation, the gradients in x and y are /* To find the dominant orientation, the gradients in x and y are
sampled in a circle of radius 6s using wavelets of size 4s. sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the We ensure the gradient wavelet size is even to ensure the
@ -765,9 +758,11 @@ __kernel
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x); Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
angle = atan2(Y, X); angle = atan2(Y, X);
if (angle < 0) if (angle < 0)
angle += 2.0f * CV_PI_F; angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F; angle *= 180.0f / CV_PI_F;
} }
} }
s_X[tid] = X; s_X[tid] = X;
@ -807,7 +802,6 @@ __kernel
sumx += s_X[get_local_id(0) + 96]; sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96]; sumy += s_Y[get_local_id(0) + 96];
} }
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0)); reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0)); reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
@ -818,10 +812,8 @@ __kernel
bestx = sumx; bestx = sumx;
besty = sumy; besty = sumy;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
{ {
s_X[get_local_id(1)] = bestx; s_X[get_local_id(1)] = bestx;
@ -846,6 +838,10 @@ __kernel
kp_dir += 2.0f * CV_PI_F; kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F; kp_dir *= 180.0f / CV_PI_F;
kp_dir = 360.0f - kp_dir;
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
kp_dir = 0.f;
featureDir[get_group_id(0)] = kp_dir; featureDir[get_group_id(0)] = kp_dir;
} }
} }
@ -940,7 +936,10 @@ void calc_dx_dy(
const float centerX = featureX[get_group_id(0)]; const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)]; const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)]; const float size = featureSize[get_group_id(0)];
const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f); float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
descriptor_dir = 0.f;
descriptor_dir *= (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation /* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */ and building the keypoint descriptor are defined relative to 's' */

View File

@ -448,3 +448,42 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
} }
} }
//----------------------------------------------------------------------------
// Resize
__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}
__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = dst_offset + dy * dst_step + dx;
int spos = src_offset + sy * src_step + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}

View File

@ -51,8 +51,6 @@ using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
using namespace std; using namespace std;
#define EXT_FP64 0
#if !defined (HAVE_OPENCL) #if !defined (HAVE_OPENCL)
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
#else #else
@ -113,7 +111,6 @@ namespace cv { namespace ocl
return 0; return 0;
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// SQDIFF // SQDIFF
void matchTemplate_SQDIFF( void matchTemplate_SQDIFF(
@ -137,11 +134,11 @@ namespace cv { namespace ocl
{ {
matchTemplate_CCORR(image,templ,result,buf); matchTemplate_CCORR(image,templ,result,buf);
buf.image_sums.resize(1); buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
#if EXT_FP64 && SQRSUM_FIXED integral(image.reshape(1), buf.image_sums[0]);
#if SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else #else
Mat sqr_mat = templ.reshape(1); Mat sqr_mat = templ.reshape(1);
@ -237,16 +234,12 @@ namespace cv { namespace ocl
buf.image_sqsums.resize(1); buf.image_sqsums.resize(1);
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
#if EXT_FP64 && SQRSUM_FIXED #if SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#elif EXT_FP64 #else
oclMat templ_c1 = templ.reshape(1); oclMat templ_c1 = templ.reshape(1);
multiply(templ_c1, templ_c1, templ_c1); multiply(templ_c1, templ_c1, templ_c1);
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0]; unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
#else
Mat m_templ_c1 = templ.reshape(1);
multiply(m_templ_c1, m_templ_c1, m_templ_c1);
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
#endif #endif
Context *clCxt = image.clCxt; Context *clCxt = image.clCxt;
string kernelName = "normalizeKernel"; string kernelName = "normalizeKernel";
@ -332,17 +325,10 @@ namespace cv { namespace ocl
if(image.channels() == 1) if(image.channels() == 1)
{ {
buf.image_sums.resize(1); buf.image_sums.resize(1);
// FIXME: temp fix for incorrect integral kernel integral(image, buf.image_sums[0]);
oclMat tmp_oclmat;
integral(image, buf.image_sums[0], tmp_oclmat);
float templ_sum = 0; float templ_sum = 0;
#if EXT_FP64
templ_sum = (float)sum(templ)[0] / templ.size().area(); templ_sum = (float)sum(templ)[0] / templ.size().area();
#else
Mat o_templ = templ;
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
#endif
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
@ -351,29 +337,13 @@ namespace cv { namespace ocl
else else
{ {
Vec4f templ_sum = Vec4f::all(0); Vec4f templ_sum = Vec4f::all(0);
#if EXT_FP64
split(image,buf.images); split(image,buf.images);
templ_sum = sum(templ) / templ.size().area(); templ_sum = sum(templ) / templ.size().area();
#else
// temp fix for non-double supported machine
Mat o_templ = templ, o_image = image;
vector<Mat> o_mat_vector;
o_mat_vector.resize(image.channels());
buf.images.resize(image.channels());
split(o_image, o_mat_vector);
for(int i = 0; i < o_mat_vector.size(); i ++)
{
buf.images[i] = oclMat(o_mat_vector[i]);
}
templ_sum = sum(o_templ) / templ.size().area();
#endif
buf.image_sums.resize(buf.images.size()); buf.image_sums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++) for(int i = 0; i < image.channels(); i ++)
{ {
// FIXME: temp fix for incorrect integral kernel integral(buf.images[i], buf.image_sums[i]);
oclMat omat_temp;
integral(buf.images[i], buf.image_sums[i], omat_temp);
} }
switch(image.channels()) switch(image.channels())
{ {
@ -432,10 +402,9 @@ namespace cv { namespace ocl
integral(image, buf.image_sums[0], buf.image_sqsums[0]); integral(image, buf.image_sums[0], buf.image_sqsums[0]);
float templ_sum = 0; float templ_sum = 0;
float templ_sqsum = 0; float templ_sqsum = 0;
#if EXT_FP64
templ_sum = (float)sum(templ)[0]; templ_sum = (float)sum(templ)[0];
#if SQRSUM_FIXED #if SQRSUM_FIXED
templ_sqsum = sqrSum(templ); templ_sqsum = sqrSum(templ)[0];
#else #else
oclMat templ_sqr = templ; oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr); multiply(templ,templ, templ_sqr);
@ -443,13 +412,7 @@ namespace cv { namespace ocl
#endif //SQRSUM_FIXED #endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum; templ_sqsum -= scale * templ_sum * templ_sum;
templ_sum *= scale; templ_sum *= scale;
#else
// temp fix for non-double supported machine
Mat o_templ = templ;
templ_sum = (float)sum(o_templ)[0];
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
templ_sum *= scale;
#endif
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
@ -463,7 +426,7 @@ namespace cv { namespace ocl
{ {
Vec4f templ_sum = Vec4f::all(0); Vec4f templ_sum = Vec4f::all(0);
Vec4f templ_sqsum = Vec4f::all(0); Vec4f templ_sqsum = Vec4f::all(0);
#if EXT_FP64
split(image,buf.images); split(image,buf.images);
templ_sum = sum(templ); templ_sum = sum(templ);
#if SQRSUM_FIXED #if SQRSUM_FIXED
@ -475,21 +438,6 @@ namespace cv { namespace ocl
#endif //SQRSUM_FIXED #endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum; templ_sqsum -= scale * templ_sum * templ_sum;
#else
// temp fix for non-double supported machine
Mat o_templ = templ, o_image = image;
vector<Mat> o_mat_vector;
o_mat_vector.resize(image.channels());
buf.images.resize(image.channels());
split(o_image, o_mat_vector);
for(int i = 0; i < o_mat_vector.size(); i ++)
{
buf.images[i] = oclMat(o_mat_vector[i]);
}
templ_sum = sum(o_templ);
templ_sqsum = sum(o_templ.mul(o_templ));
#endif
float templ_sqsum_sum = 0; float templ_sqsum_sum = 0;
for(int i = 0; i < image.channels(); i ++) for(int i = 0; i < image.channels(); i ++)
{ {

View File

@ -1,4 +1,49 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv; using namespace cv;
@ -24,7 +69,6 @@ namespace cv
template<typename T> template<typename T>
void pyrdown_run(const oclMat &src, const oclMat &dst) void pyrdown_run(const oclMat &src, const oclMat &dst)
{ {
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
CV_Assert(src.type() == dst.type()); CV_Assert(src.type() == dst.type());
CV_Assert(src.depth() != CV_8S); CV_Assert(src.depth() != CV_8S);
@ -108,7 +152,7 @@ void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
//dst.step = dst.rows; dst.download_channels = src.download_channels;
pyrdown_run(src, dst); pyrdown_run(src, dst);
} }

View File

@ -16,6 +16,7 @@
// //
// @Authors // @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com // Zhang Chunpeng chunpeng@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
// //
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
@ -63,6 +64,7 @@ namespace cv { namespace ocl
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst) void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
{ {
dst.create(src.rows * 2, src.cols * 2, src.type()); dst.create(src.rows * 2, src.cols * 2, src.type());
dst.download_channels=src.download_channels;
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
const std::string kernelName = "pyrUp"; const std::string kernelName = "pyrUp";

View File

@ -149,8 +149,7 @@ namespace
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold)); //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
bindImgTex(img); bindImgTex(img);
oclMat integral_sqsum; integral(img, surf_.sum); // the two argumented integral version is incorrect
integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
bindSumTex(surf_.sum); bindSumTex(surf_.sum);
maskSumTex = 0; maskSumTex = 0;

View File

@ -74,12 +74,10 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
TEST_P(ColumnSum, Accuracy) TEST_P(ColumnSum, Accuracy)
{ {
cv::Mat src = randomMat(size, CV_32FC1); cv::Mat src = randomMat(size, CV_32FC1);
//cv::Mat src(size,CV_32FC1); cv::ocl::oclMat d_dst;
cv::ocl::oclMat d_src(src);
//cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi); cv::ocl::columnSum(d_src,d_dst);
cv::ocl::oclMat d_dst = loadMat(src,useRoi);
cv::ocl::columnSum(loadMat(src,useRoi),d_dst);
cv::Mat dst(d_dst); cv::Mat dst(d_dst);

View File

@ -16,6 +16,7 @@
// //
// @Authors // @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com // Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
@ -43,9 +44,6 @@
// //
//M*/ //M*/
//#define PRINT_CPU_TIME 1000
//#define PRINT_TIME
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> #include <iomanip>
@ -58,66 +56,15 @@ using namespace cvtest;
using namespace testing; using namespace testing;
using namespace std; using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, bool) PARAM_TEST_CASE(PyrDown, MatType, int)
{ {
int type; int type;
cv::Scalar val; int channels;
//src mat
cv::Mat mat1;
cv::Mat mat2;
cv::Mat mask;
cv::Mat dst;
cv::Mat dst1; //bak, for two outputs
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int src2x;
int src2y;
int dstx;
int dsty;
int maskx;
int masky;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat mat2_roi;
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
//ocl mat with roi
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gmat2;
cv::ocl::oclMat gdst;
cv::ocl::oclMat gdst1; //bak
cv::ocl::oclMat gmask;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
channels = GET_PARAM(1);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat1 = randomMat(rng, size, type, 5, 16, false);
mat2 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
dst1 = randomMat(rng, size, type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
//int devnums = getDevice(oclinfo); //int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0); //CV_Assert(devnums > 0);
@ -127,169 +74,36 @@ PARAM_TEST_CASE(PyrDown, MatType, bool)
void Cleanup() void Cleanup()
{ {
mat1.release();
mat2.release();
mask.release();
dst.release();
dst1.release();
mat1_roi.release();
mat2_roi.release();
mask_roi.release();
dst_roi.release();
dst1_roi.release();
gdst_whole.release();
gdst1_whole.release();
gmat1.release();
gmat2.release();
gdst.release();
gdst1.release();
gmask.release();
}
void random_roi()
{
cv::RNG &rng = TS::ptr()->get_rng();
#ifdef RANDOMROI
//randomize ROI
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gdst1_whole = dst1;
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
gmat2 = mat2_roi;
gmask = mask_roi; //end
} }
}; };
#define VARNAME(A) string(#A);
void PrePrint()
{
//for(int i = 0; i < MHEIGHT; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH; k++)
// {
// printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
// }
// printf("\n");
//}
}
void PostPrint()
{
//dst_roi.convertTo(dst_roi,CV_32S);
//cpu_dst.convertTo(cpu_dst,CV_32S);
//dst_roi -= cpu_dst;
//cpu_dst -= dst_roi;
//for(int i = 0; i < MHEIGHT / 2; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH / 2; k++)
// {
// if(gmat1.depth() == 0)
// {
// if(gmat1.channels() == 1)
// {
// printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
// }
// else
// {
// printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
// }
// }
// else if(gmat1.depth() == 5)
// {
// printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
// }
// }
// printf("\n");
//}
//for(int i = 0; i < MHEIGHT / 2; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH / 2; k++)
// {
// if(gmat1.depth() == 0)
// {
// if(gmat1.channels() == 1)
// {
// printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
// }
// else
// {
// printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
// }
// }
// else if(gmat1.depth() == 5)
// {
// printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
// }
// }
// printf("\n");
//}
}
////////////////////////////////PyrDown/////////////////////////////////////////////////
//struct PyrDown : ArithmTestBase {};
TEST_P(PyrDown, Mat) TEST_P(PyrDown, Mat)
{ {
for(int j = 0; j < LOOP_TIMES; j++) for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); cv::Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
cv::pyrDown(mat1_roi, dst_roi); cv::ocl::oclMat gsrc(src), gdst;
cv::ocl::pyrDown(gmat1, gdst); cv::Mat dst_cpu;
cv::pyrDown(src, dst_cpu);
cv::ocl::pyrDown(gsrc, gdst);
cv::Mat cpu_dst; cv::Mat dst;
gdst.download(cpu_dst); gdst.download(dst);
char s[1024]; char s[1024]={0};
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s); EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
Cleanup(); Cleanup();
} }
} }
//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine( INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(CV_8U, CV_32F), Values(1, 3, 4)));
Values(false))); // Values(false) is the reserved parameter
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

View File

@ -16,6 +16,7 @@
// //
// @Authors // @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com // Zhang Chunpeng chunpeng@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
@ -48,44 +49,49 @@
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
using namespace cv;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp,cv::Size,int) PARAM_TEST_CASE(PyrUp, MatType, int)
{ {
cv::Size size;
int type; int type;
int channels;
//std::vector<cv::ocl::Info> oclinfo; //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp() virtual void SetUp()
{ {
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0); //CV_Assert(devnums > 0);
size = GET_PARAM(0); type = GET_PARAM(0);
type = GET_PARAM(1); channels = GET_PARAM(1);
} }
}; };
TEST_P(PyrUp,Accuracy) TEST_P(PyrUp,Accuracy)
{ {
cv::Mat src = randomMat(size,type); for(int j = 0; j < LOOP_TIMES; j++)
{
Size size(MWIDTH, MHEIGHT);
cv::Mat dst_gold; Mat src = randomMat(size,CV_MAKETYPE(type, channels));
cv::pyrUp(src,dst_gold); Mat dst_gold;
pyrUp(src,dst_gold);
cv::ocl::oclMat dst; ocl::oclMat dst;
cv::ocl::oclMat srcMat(src); ocl::oclMat srcMat(src);
cv::ocl::pyrUp(srcMat,dst); ocl::pyrUp(srcMat,dst);
Mat cpu_dst;
dst.download(cpu_dst);
char s[100]={0}; char s[100]={0};
EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s); EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
}
} }
#if 1
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
testing::Values(cv::Size(32, 32)), Values(CV_8U, CV_32F), Values(1, 3, 4)));
testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4),
MatType(CV_16UC4),MatType(CV_32FC4))));
#endif
#endif // HAVE_OPENCL #endif // HAVE_OPENCL