remove redundant OPENCL_DIR flag
remove as much warnings as possible use enum instead of MACRO for ocl.hpp add command line parser in accuracy test and perf test some bug fix for arthim functions
This commit is contained in:
parent
b6a2717c2b
commit
5df77a841e
@ -2,7 +2,7 @@ if(APPLE)
|
|||||||
set(OPENCL_FOUND YES)
|
set(OPENCL_FOUND YES)
|
||||||
set(OPENCL_LIBRARIES "-framework OpenCL")
|
set(OPENCL_LIBRARIES "-framework OpenCL")
|
||||||
else()
|
else()
|
||||||
find_package(OpenCL QUIET)
|
#find_package(OpenCL QUIET)
|
||||||
if(WITH_OPENCLAMDFFT)
|
if(WITH_OPENCLAMDFFT)
|
||||||
find_path(CLAMDFFT_INCLUDE_DIR
|
find_path(CLAMDFFT_INCLUDE_DIR
|
||||||
NAMES clAmdFft.h)
|
NAMES clAmdFft.h)
|
||||||
|
@ -57,13 +57,15 @@ namespace cv
|
|||||||
namespace ocl
|
namespace ocl
|
||||||
{
|
{
|
||||||
using std::auto_ptr;
|
using std::auto_ptr;
|
||||||
|
enum
|
||||||
#define CVCL_DEVICE_TYPE_DEFAULT (1 << 0)
|
{
|
||||||
#define CVCL_DEVICE_TYPE_CPU (1 << 1)
|
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
|
||||||
#define CVCL_DEVICE_TYPE_GPU (1 << 2)
|
CVCL_DEVICE_TYPE_CPU = (1 << 1),
|
||||||
#define CVCL_DEVICE_TYPE_ACCELERATOR (1 << 3)
|
CVCL_DEVICE_TYPE_GPU = (1 << 2),
|
||||||
//#define CVCL_DEVICE_TYPE_CUSTOM (1 << 4)
|
CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
|
||||||
#define CVCL_DEVICE_TYPE_ALL 0xFFFFFFFF
|
//CVCL_DEVICE_TYPE_CUSTOM = (1 << 4)
|
||||||
|
CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF
|
||||||
|
};
|
||||||
//this class contains ocl runtime information
|
//this class contains ocl runtime information
|
||||||
class CV_EXPORTS Info
|
class CV_EXPORTS Info
|
||||||
{
|
{
|
||||||
@ -825,7 +827,6 @@ namespace cv
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef HAVE_CLAMDFFT
|
|
||||||
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
|
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
|
||||||
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
|
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
|
||||||
//! Param dft_size is the size of DFT transform.
|
//! Param dft_size is the size of DFT transform.
|
||||||
@ -839,16 +840,13 @@ namespace cv
|
|||||||
// real to complex dft output is not the same with cpu version
|
// real to complex dft output is not the same with cpu version
|
||||||
// real to complex and complex to real does not support DFT_ROWS
|
// real to complex and complex to real does not support DFT_ROWS
|
||||||
CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
|
CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
|
||||||
#endif // HAVE_CLAMDFFT
|
|
||||||
|
|
||||||
#ifdef HAVE_CLAMDBLAS
|
|
||||||
//! implements generalized matrix product algorithm GEMM from BLAS
|
//! implements generalized matrix product algorithm GEMM from BLAS
|
||||||
// The functionality requires clAmdBlas library
|
// The functionality requires clAmdBlas library
|
||||||
// only support type CV_32FC1
|
// only support type CV_32FC1
|
||||||
// flag GEMM_3_T is not supported
|
// flag GEMM_3_T is not supported
|
||||||
CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||||
const oclMat &src3, double beta, oclMat &dst, int flags = 0);
|
const oclMat &src3, double beta, oclMat &dst, int flags = 0);
|
||||||
#endif
|
|
||||||
|
|
||||||
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
||||||
|
|
||||||
|
@ -73,22 +73,86 @@ void print_info()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
std::string workdir;
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
|
||||||
TS::ptr()->init("ocl");
|
TS::ptr()->init("ocl");
|
||||||
InitGoogleTest(&argc, argv);
|
InitGoogleTest(&argc, argv);
|
||||||
|
const char *keys =
|
||||||
|
|
||||||
|
"{ h | help | false | print help message }"
|
||||||
|
|
||||||
|
"{ w | workdir | ../../../samples/c/| set working directory }"
|
||||||
|
|
||||||
|
"{ t | type | gpu | set device type:cpu or gpu}"
|
||||||
|
|
||||||
|
"{ p | platform | 0 | set platform id }"
|
||||||
|
|
||||||
|
"{ d | device | 0 | set device id }";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CommandLineParser cmd(argc, argv, keys);
|
||||||
|
|
||||||
|
if (cmd.get<bool>("help"))
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
cout << "Avaible options besides goole test option:" << endl;
|
||||||
|
|
||||||
|
cmd.printParams();
|
||||||
|
}
|
||||||
|
|
||||||
|
workdir = cmd.get<string>("workdir");
|
||||||
|
|
||||||
|
string type = cmd.get<string>("type");
|
||||||
|
|
||||||
|
unsigned int pid = cmd.get<unsigned int>("platform");
|
||||||
|
|
||||||
|
int device = cmd.get<int>("device");
|
||||||
|
|
||||||
|
|
||||||
print_info();
|
print_info();
|
||||||
int devnums = getDevice(oclinfo);
|
int flag = CVCL_DEVICE_TYPE_GPU;
|
||||||
if(devnums < 1)
|
|
||||||
|
if(type == "cpu")
|
||||||
|
|
||||||
{
|
{
|
||||||
std::cout << "no device found\n";
|
|
||||||
return -1;
|
flag = CVCL_DEVICE_TYPE_CPU;
|
||||||
|
|
||||||
}
|
}
|
||||||
//if you want to use undefault device, set it here
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
//setDevice(oclinfo[0]);
|
int devnums = getDevice(oclinfo);
|
||||||
|
if(devnums <= device || device < 0)
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
std::cout << "device invalid\n";
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pid >= oclinfo.size())
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
std::cout << "platform invalid\n";
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pid != 0 || device != 0)
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
setDevice(oclinfo[pid], device);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
|
||||||
setBinpath(CLBINPATH);
|
setBinpath(CLBINPATH);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
@ -2672,13 +2672,13 @@ TEST_P(CountNonZero, MAT)
|
|||||||
Has_roi(k);
|
Has_roi(k);
|
||||||
|
|
||||||
t0 = (double)cvGetTickCount();//cpu start
|
t0 = (double)cvGetTickCount();//cpu start
|
||||||
int cpures = cv::countNonZero(mat1_roi);
|
cv::countNonZero(mat1_roi);
|
||||||
t0 = (double)cvGetTickCount() - t0;//cpu end
|
t0 = (double)cvGetTickCount() - t0;//cpu end
|
||||||
|
|
||||||
t1 = (double)cvGetTickCount();//gpu start1
|
t1 = (double)cvGetTickCount();//gpu start1
|
||||||
gmat1 = mat1_roi;
|
gmat1 = mat1_roi;
|
||||||
t2 = (double)cvGetTickCount(); //kernel
|
t2 = (double)cvGetTickCount(); //kernel
|
||||||
int gpures = cv::ocl::countNonZero(gmat1);
|
cv::ocl::countNonZero(gmat1);
|
||||||
t2 = (double)cvGetTickCount() - t2;//kernel
|
t2 = (double)cvGetTickCount() - t2;//kernel
|
||||||
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
t1 = (double)cvGetTickCount() - t1;//gpu end1
|
||||||
if(j == 0)
|
if(j == 0)
|
||||||
@ -2713,7 +2713,7 @@ TEST_P(CountNonZero, MAT)
|
|||||||
{
|
{
|
||||||
cout << "\nwith roi:";
|
cout << "\nwith roi:";
|
||||||
};
|
};
|
||||||
int gpures = cv::ocl::countNonZero(gmat1);
|
cv::ocl::countNonZero(gmat1);
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -52,8 +52,6 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
|
||||||
|
|
||||||
#ifndef MWC_TEST_UTILITY
|
#ifndef MWC_TEST_UTILITY
|
||||||
#define MWC_TEST_UTILITY
|
#define MWC_TEST_UTILITY
|
||||||
|
|
||||||
@ -79,7 +77,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Canny1
|
// Canny1
|
||||||
|
extern std::string workdir;
|
||||||
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
|
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
|
||||||
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
||||||
|
|
||||||
@ -101,7 +99,7 @@ PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
|
|||||||
|
|
||||||
TEST_P(Canny1, Performance)
|
TEST_P(Canny1, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
double low_thresh = 100.0;
|
double low_thresh = 100.0;
|
||||||
|
@ -1184,11 +1184,11 @@ INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
|
|||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
|
INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
|
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1),
|
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1),
|
||||||
|
@ -52,7 +52,7 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
extern std::string workdir;
|
||||||
struct getRect
|
struct getRect
|
||||||
{
|
{
|
||||||
Rect operator ()(const CvAvgComp &e) const
|
Rect operator ()(const CvAvgComp &e) const
|
||||||
@ -80,9 +80,6 @@ PARAM_TEST_CASE(HaarTestBase, int, int)
|
|||||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
||||||
{
|
{
|
||||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||||
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
|
||||||
" [--scale[=<image scale>\n"
|
|
||||||
" [filename|camera_index]\n" << endl ;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
@ -99,16 +96,16 @@ struct Haar : HaarTestBase {};
|
|||||||
|
|
||||||
TEST_F(Haar, FaceDetect)
|
TEST_F(Haar, FaceDetect)
|
||||||
{
|
{
|
||||||
string imgName = "../../../samples/c/lena.jpg";
|
string imgName = workdir + "lena.jpg";
|
||||||
Mat img = imread( imgName, 1 );
|
Mat img = imread( imgName, 1 );
|
||||||
|
|
||||||
if(img.empty())
|
if(img.empty())
|
||||||
{
|
{
|
||||||
std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
|
std::cout << imgName << std::endl;
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = 0;
|
//int i = 0;
|
||||||
double t = 0;
|
double t = 0;
|
||||||
vector<Rect> faces, oclfaces;
|
vector<Rect> faces, oclfaces;
|
||||||
|
|
||||||
|
@ -53,8 +53,7 @@ using namespace cv::ocl;
|
|||||||
using namespace cvtest;
|
using namespace cvtest;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
extern std::string workdir;
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
|
||||||
|
|
||||||
#ifndef MWC_TEST_UTILITY
|
#ifndef MWC_TEST_UTILITY
|
||||||
#define MWC_TEST_UTILITY
|
#define MWC_TEST_UTILITY
|
||||||
@ -100,15 +99,15 @@ PARAM_TEST_CASE(HOG, WinSizw48, bool)
|
|||||||
|
|
||||||
TEST_P(HOG, Performance)
|
TEST_P(HOG, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(workdir + "lena.jpg", cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
// define HOG related arguments
|
// define HOG related arguments
|
||||||
float scale = 1.05;
|
float scale = 1.05;
|
||||||
int nlevels = 13;
|
//int nlevels = 13;
|
||||||
float gr_threshold = 8;
|
float gr_threshold = 8;
|
||||||
float hit_threshold = 1.4;
|
float hit_threshold = 1.4;
|
||||||
bool hit_threshold_auto = true;
|
//bool hit_threshold_auto = true;
|
||||||
|
|
||||||
int win_width = is48 ? 48 : 64;
|
int win_width = is48 ? 48 : 64;
|
||||||
int win_stride_width = 8;
|
int win_stride_width = 8;
|
||||||
|
@ -1246,6 +1246,7 @@ TEST_P(Remap, Mat)
|
|||||||
}
|
}
|
||||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
||||||
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
||||||
|
cout << borderstr[0] << endl;
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
double totalcputick = 0;
|
double totalcputick = 0;
|
||||||
double totalgputick = 0;
|
double totalgputick = 0;
|
||||||
|
@ -714,7 +714,6 @@ TEST_P(DataTransfer, perf)
|
|||||||
totaluploadtick = t0 + totaluploadtick;
|
totaluploadtick = t0 + totaluploadtick;
|
||||||
totaldownloadtick = t1 + totaldownloadtick;
|
totaldownloadtick = t1 + totaldownloadtick;
|
||||||
}
|
}
|
||||||
EXPECT_MAT_SIMILAR(mat, cpu_dst, 0.0);
|
|
||||||
totaltick = totaluploadtick + totaldownloadtick;
|
totaltick = totaluploadtick + totaldownloadtick;
|
||||||
cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
|
||||||
|
@ -450,7 +450,7 @@ TEST_P(Split, Accuracy)
|
|||||||
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
|
||||||
{
|
{
|
||||||
Has_roi(j);
|
Has_roi(j);
|
||||||
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
//cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
|
||||||
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
|
||||||
gdst1_whole = dst1;
|
gdst1_whole = dst1;
|
||||||
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
|
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
|
||||||
|
@ -54,11 +54,11 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define FILTER_IMAGE "../../../samples/gpu/road.png"
|
extern std::string workdir;
|
||||||
|
|
||||||
TEST(SURF, Performance)
|
TEST(SURF, Performance)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(workdir+"lena.jpg", cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
ocl::SURF_OCL d_surf;
|
ocl::SURF_OCL d_surf;
|
||||||
|
@ -772,7 +772,7 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0)
|
|||||||
{
|
{
|
||||||
size_t groupnum = src.clCxt->impl->maxComputeUnits;
|
size_t groupnum = src.clCxt->impl->maxComputeUnits;
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status;
|
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
T *p = new T[dbsize];
|
T *p = new T[dbsize];
|
||||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
|
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
|
||||||
@ -930,7 +930,7 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal,
|
|||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
groupnum = groupnum * 2;
|
groupnum = groupnum * 2;
|
||||||
int vlen = 8;
|
int vlen = 8;
|
||||||
int dbsize = groupnum * 2 * vlen * sizeof(T) , status;
|
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
||||||
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
||||||
@ -945,11 +945,11 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal,
|
|||||||
T *p = new T[groupnum * vlen * 2];
|
T *p = new T[groupnum * vlen * 2];
|
||||||
memset(p, 0, dbsize);
|
memset(p, 0, dbsize);
|
||||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
||||||
for(int i = 0; i < vlen * groupnum; i++)
|
for(int i = 0; i < vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*minVal = *minVal < p[i] ? *minVal : p[i];
|
*minVal = *minVal < p[i] ? *minVal : p[i];
|
||||||
}
|
}
|
||||||
for(int i = vlen * groupnum; i < 2 * vlen * groupnum; i++)
|
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
||||||
}
|
}
|
||||||
@ -1606,7 +1606,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
|||||||
size_t groupnum = src.clCxt->impl->maxComputeUnits;
|
size_t groupnum = src.clCxt->impl->maxComputeUnits;
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
int minloc = -1 , maxloc = -1;
|
int minloc = -1 , maxloc = -1;
|
||||||
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) , status;
|
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
||||||
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
||||||
@ -1621,15 +1621,15 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
|||||||
T *p = new T[groupnum * vlen * 4];
|
T *p = new T[groupnum * vlen * 4];
|
||||||
memset(p, 0, dbsize);
|
memset(p, 0, dbsize);
|
||||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
||||||
for(int i = 0; i < vlen * groupnum; i++)
|
for(int i = 0; i < vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i];
|
*minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i];
|
||||||
minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : p[i + 2 * vlen * groupnum];
|
minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : cvRound(p[i + 2 * vlen * groupnum]);
|
||||||
}
|
}
|
||||||
for(int i = vlen * groupnum; i < 2 * vlen * groupnum; i++)
|
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i];
|
*maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i];
|
||||||
maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : p[i + 2 * vlen * groupnum];
|
maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : cvRound(p[i + 2 * vlen * groupnum]);
|
||||||
}
|
}
|
||||||
|
|
||||||
int pre_rows = src.offset / src.step;
|
int pre_rows = src.offset / src.step;
|
||||||
@ -1717,7 +1717,7 @@ int cv::ocl::countNonZero(const oclMat &src)
|
|||||||
}
|
}
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
groupnum = groupnum * 2;
|
groupnum = groupnum * 2;
|
||||||
int vlen = 8 , dbsize = groupnum * vlen, status;
|
int vlen = 8 , dbsize = groupnum * vlen;
|
||||||
//cl_ulong start, end;
|
//cl_ulong start, end;
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName = "arithm_op_nonzero";
|
string kernelName = "arithm_op_nonzero";
|
||||||
|
@ -227,8 +227,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
|
|||||||
}
|
}
|
||||||
|
|
||||||
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
|
||||||
void matchUnrolledCached(const oclMat query, const oclMat *trains, int n, const oclMat mask,
|
void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
|
||||||
const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType)
|
const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -266,8 +266,8 @@ void match(const oclMat &query, const oclMat &train, const oclMat &mask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
|
||||||
void match(const oclMat query, const oclMat *trains, int n, const oclMat mask,
|
void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
|
||||||
const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType)
|
const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -796,7 +796,7 @@ void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
|
void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int /*distType*/)
|
||||||
{
|
{
|
||||||
cv::ocl::Context *ctx = trainIdx.clCxt;
|
cv::ocl::Context *ctx = trainIdx.clCxt;
|
||||||
size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
|
size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
|
||||||
@ -1406,7 +1406,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, const oc
|
|||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
|
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
|
||||||
oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &maskCollection)
|
oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &/*maskCollection*/)
|
||||||
{
|
{
|
||||||
if (query.empty() || trainCollection.empty())
|
if (query.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
@ -1702,7 +1702,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
|
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
|
||||||
oclMat &nMatches, float maxDistance, const vector<oclMat> &masks)
|
oclMat &nMatches, float /*maxDistance*/, const vector<oclMat> &masks)
|
||||||
{
|
{
|
||||||
if (query.empty() || empty())
|
if (query.empty() || empty())
|
||||||
return;
|
return;
|
||||||
|
@ -51,13 +51,17 @@ using namespace cv;
|
|||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined HAVE_OPENCL
|
||||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
#elif !defined HAVE_CLAMDFFT
|
||||||
|
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||||
|
{
|
||||||
|
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include <clAmdFft.h>
|
#include <clAmdFft.h>
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
|
@ -340,12 +340,12 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
|
|||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName;
|
string kernelName;
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
|
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
|
||||||
|
|
||||||
if(src.type() == CV_8UC1)
|
if(src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
kernelName = "morph_C1_D0";
|
kernelName = "morph_C1_D0";
|
||||||
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -489,7 +489,7 @@ namespace
|
|||||||
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_, int iters_) :
|
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_, int iters_) :
|
||||||
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
|
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
|
||||||
|
|
||||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
virtual void apply(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
Filter2DEngine_GPU::apply(src, dst);
|
Filter2DEngine_GPU::apply(src, dst);
|
||||||
//if (iters > 1)
|
//if (iters > 1)
|
||||||
@ -778,7 +778,7 @@ namespace
|
|||||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
||||||
{
|
{
|
||||||
Size src_size = src.size();
|
Size src_size = src.size();
|
||||||
int src_type = src.type();
|
//int src_type = src.type();
|
||||||
|
|
||||||
int cn = src.oclchannels();
|
int cn = src.oclchannels();
|
||||||
//dst.create(src_size, src_type);
|
//dst.create(src_size, src_type);
|
||||||
|
@ -50,11 +50,18 @@
|
|||||||
|
|
||||||
#include "clAmdBlas.h"
|
#include "clAmdBlas.h"
|
||||||
|
|
||||||
#if !defined (HAVE_OPENCL)
|
#if !defined HAVE_OPENCL
|
||||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||||
|
const oclMat &src3, double beta, oclMat &dst, int flags)
|
||||||
{
|
{
|
||||||
throw_nogpu();
|
throw_nogpu();
|
||||||
}
|
}
|
||||||
|
#elif !defined HAVE_CLAMDBLAS
|
||||||
|
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||||
|
const oclMat &src3, double beta, oclMat &dst, int flags)
|
||||||
|
{
|
||||||
|
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
@ -813,14 +813,9 @@ gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
|
|||||||
CvHaarFeature *feature =
|
CvHaarFeature *feature =
|
||||||
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
|
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
|
||||||
GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
|
GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
|
||||||
double sum0 = 0, area0 = 0;
|
|
||||||
CvRect r[3];
|
CvRect r[3];
|
||||||
|
|
||||||
int base_w = -1, base_h = -1;
|
|
||||||
int new_base_w = 0, new_base_h = 0;
|
|
||||||
int kx, ky;
|
|
||||||
int flagx = 0, flagy = 0;
|
|
||||||
int x0 = 0, y0 = 0;
|
|
||||||
int nr;
|
int nr;
|
||||||
|
|
||||||
/* align blocks */
|
/* align blocks */
|
||||||
@ -872,7 +867,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
std::vector<cv::Rect> rectList;
|
std::vector<cv::Rect> rectList;
|
||||||
std::vector<int> rweights;
|
std::vector<int> rweights;
|
||||||
double factor;
|
double factor;
|
||||||
int coi;
|
|
||||||
int datasize;
|
int datasize;
|
||||||
int totalclassifier;
|
int totalclassifier;
|
||||||
|
|
||||||
@ -885,9 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
int *candidate;
|
int *candidate;
|
||||||
cl_int status;
|
cl_int status;
|
||||||
|
|
||||||
bool doCannyPruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != 0;
|
// bool doCannyPruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != 0;
|
||||||
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||||
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
|
// bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
|
||||||
|
|
||||||
//the Intel HD Graphics is unsupported
|
//the Intel HD Graphics is unsupported
|
||||||
if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
|
if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
|
||||||
@ -1015,7 +1009,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
gimgroi = gsum(roi);
|
gimgroi = gsum(roi);
|
||||||
gimgroisq = gsqsum(roi);
|
gimgroisq = gsqsum(roi);
|
||||||
//scaleinfo[i].rows = gimgroi.rows;
|
//scaleinfo[i].rows = gimgroi.rows;
|
||||||
int ystep = 1; // factor > 2 ? 1 : 2;
|
|
||||||
int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
|
int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
|
||||||
int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
|
int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
|
||||||
scaleinfo[i].width_height = (width << 16) | height;
|
scaleinfo[i].width_height = (width << 16) | height;
|
||||||
@ -1109,7 +1102,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
pq.s[2] = gcascade->pq2;
|
pq.s[2] = gcascade->pq2;
|
||||||
pq.s[3] = gcascade->pq3;
|
pq.s[3] = gcascade->pq3;
|
||||||
float correction = gcascade->inv_window_area;
|
float correction = gcascade->inv_window_area;
|
||||||
int argcount = 0;
|
|
||||||
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
|
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
|
||||||
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
|
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
|
||||||
// openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
|
// openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
|
||||||
@ -1184,7 +1177,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
{
|
{
|
||||||
CvSize winsize0 = cascade->orig_window_size;
|
CvSize winsize0 = cascade->orig_window_size;
|
||||||
int n_factors = 0;
|
int n_factors = 0;
|
||||||
int flag = 0;
|
|
||||||
oclMat gsum;
|
oclMat gsum;
|
||||||
oclMat gsqsum;
|
oclMat gsqsum;
|
||||||
cv::ocl::integral(gimg, gsum, gsqsum);
|
cv::ocl::integral(gimg, gsum, gsqsum);
|
||||||
@ -1276,7 +1268,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
scaleinfo[i].imgoff = 0;
|
scaleinfo[i].imgoff = 0;
|
||||||
scaleinfo[i].factor = factor;
|
scaleinfo[i].factor = factor;
|
||||||
int startnodenum = nodenum * i;
|
int startnodenum = nodenum * i;
|
||||||
int argcounts = 0;
|
|
||||||
float factor2 = (float)factor;
|
float factor2 = (float)factor;
|
||||||
/*
|
/*
|
||||||
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
|
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
|
||||||
@ -1294,7 +1285,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
||||||
|
|
||||||
size_t globalThreads2[3] = {nodenum, 1, 1};
|
size_t globalThreads2[3] = {nodenum, 1, 1};
|
||||||
size_t localThreads2[3] = {256, 1, 1};
|
|
||||||
|
|
||||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||||
|
|
||||||
@ -2478,13 +2468,13 @@ else
|
|||||||
// } /* j */
|
// } /* j */
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
/*
|
||||||
CV_INLINE
|
CV_INLINE
|
||||||
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
|
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
|
||||||
double variance_norm_factor,
|
double variance_norm_factor,
|
||||||
size_t p_offset )
|
size_t p_offset )
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@ -2501,14 +2491,15 @@ double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
|
|||||||
}
|
}
|
||||||
while( idx > 0 );
|
while( idx > 0 );
|
||||||
return classifier->alpha[-idx];
|
return classifier->alpha[-idx];
|
||||||
*/
|
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
CV_IMPL int
|
CV_IMPL int
|
||||||
gpuRunHaarClassifierCascade( const CvHaarClassifierCascade *_cascade,
|
gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade,
|
||||||
CvPoint pt, int start_stage )
|
CvPoint pt, int start_stage */)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
int result = -1;
|
int result = -1;
|
||||||
@ -2620,7 +2611,7 @@ namespace cv
|
|||||||
for( y = y1; y < y2; y += ystep )
|
for( y = y1; y < y2; y += ystep )
|
||||||
for( x = 0; x < ssz.width; x += ystep )
|
for( x = 0; x < ssz.width; x += ystep )
|
||||||
{
|
{
|
||||||
if( gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 ) > 0 )
|
if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
|
||||||
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
|
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
|
||||||
winSize.width, winSize.height));
|
winSize.width, winSize.height));
|
||||||
}
|
}
|
||||||
@ -2679,7 +2670,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int result = gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 );
|
int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */);
|
||||||
if( result > 0 )
|
if( result > 0 )
|
||||||
vec->push_back(Rect(x, y, winsize.width, winsize.height));
|
vec->push_back(Rect(x, y, winsize.width, winsize.height));
|
||||||
ixstep = result != 0 ? 1 : 2;
|
ixstep = result != 0 ? 1 : 2;
|
||||||
|
@ -296,9 +296,9 @@ namespace cv
|
|||||||
kernelName = "remapNNF1Constant";
|
kernelName = "remapNNF1Constant";
|
||||||
}
|
}
|
||||||
|
|
||||||
int channels = dst.oclchannels();
|
//int channels = dst.oclchannels();
|
||||||
int depth = dst.depth();
|
//int depth = dst.depth();
|
||||||
int type = src.type();
|
//int type = src.type();
|
||||||
size_t blkSizeX = 16, blkSizeY = 16;
|
size_t blkSizeX = 16, blkSizeY = 16;
|
||||||
size_t glbSizeX;
|
size_t glbSizeX;
|
||||||
int cols = dst.cols;
|
int cols = dst.cols;
|
||||||
@ -308,7 +308,7 @@ namespace cv
|
|||||||
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
||||||
|
|
||||||
}
|
}
|
||||||
else if(src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1)
|
else if(src.type() == CV_32FC1 && interpolation == INTER_LINEAR)
|
||||||
{
|
{
|
||||||
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
|
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
|
||||||
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
||||||
@ -322,73 +322,6 @@ namespace cv
|
|||||||
size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
|
size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
|
||||||
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||||
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||||
/*
|
|
||||||
/////////////////////////////
|
|
||||||
//using the image buffer
|
|
||||||
/////////////////////////////
|
|
||||||
|
|
||||||
size_t image_row_pitch = 0;
|
|
||||||
cl_int err1, err2, err3;
|
|
||||||
cl_mem_flags flags1 = CL_MEM_READ_ONLY;
|
|
||||||
cl_image_format format;
|
|
||||||
if(src.type() == CV_8UC1)
|
|
||||||
{
|
|
||||||
format.image_channel_order = CL_R;
|
|
||||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
|
||||||
}
|
|
||||||
else if(src.type() == CV_8UC4)
|
|
||||||
{
|
|
||||||
format.image_channel_order = CL_RGBA;
|
|
||||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
|
||||||
}
|
|
||||||
else if(src.type() == CV_32FC1)
|
|
||||||
{
|
|
||||||
format.image_channel_order = CL_R;
|
|
||||||
format.image_channel_data_type = CL_FLOAT;
|
|
||||||
}
|
|
||||||
else if(src.type() == CV_32FC4)
|
|
||||||
{
|
|
||||||
format.image_channel_order = CL_RGBA;
|
|
||||||
format.image_channel_data_type = CL_FLOAT;
|
|
||||||
}
|
|
||||||
cl_mem srcImage = clCreateImage2D(clCxt->impl->clContext, flags1, &format, src.cols, src.rows,
|
|
||||||
image_row_pitch, NULL, &err1);
|
|
||||||
if(err1 != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("Error creating CL image buffer, error code %d\n", err1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const size_t src_origin[3] = {0, 0, 0};
|
|
||||||
const size_t region[3] = {src.cols, src.rows, 1};
|
|
||||||
cl_event BtoI_event, ItoB_event;
|
|
||||||
err3 = clEnqueueCopyBufferToImage(clCxt->impl->clCmdQueue, (cl_mem)src.data, srcImage,
|
|
||||||
0, src_origin, region, 0, NULL, NULL);
|
|
||||||
if(err3 != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("Error copying buffer to image\n");
|
|
||||||
printf("Error code %d \n", err3);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// clWaitForEvents(1, &BtoI_event);
|
|
||||||
|
|
||||||
cl_int ret;
|
|
||||||
Mat test(src.rows, src.cols, CV_8UC1);
|
|
||||||
memset(test.data, 0, src.rows*src.cols);
|
|
||||||
ret = clEnqueueReadImage(clCxt->impl->clCmdQueue, srcImage, CL_TRUE,
|
|
||||||
src_origin, region, 0, 0, test.data, NULL, NULL, &ItoB_event);
|
|
||||||
if(ret != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("read image error, %d ", ret);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
clWaitForEvents(1, &ItoB_event);
|
|
||||||
|
|
||||||
cout << "src" << endl;
|
|
||||||
cout << src << endl;
|
|
||||||
cout<<"image:"<<endl;
|
|
||||||
cout<< test << endl;
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@ -396,7 +329,6 @@ namespace cv
|
|||||||
{
|
{
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
// args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer
|
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
|
||||||
@ -425,7 +357,6 @@ namespace cv
|
|||||||
{
|
{
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
// args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer
|
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||||
@ -1369,10 +1300,10 @@ namespace cv
|
|||||||
if( src.depth() != CV_8U || src.oclchannels() != 4 )
|
if( src.depth() != CV_8U || src.oclchannels() != 4 )
|
||||||
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
|
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
|
||||||
|
|
||||||
if(src.clCxt->impl->double_support == 0)
|
// if(src.clCxt->impl->double_support == 0)
|
||||||
{
|
// {
|
||||||
CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation is exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
|
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
|
||||||
}
|
// }
|
||||||
|
|
||||||
dst.create( src.size(), CV_8UC4 );
|
dst.create( src.size(), CV_8UC4 );
|
||||||
|
|
||||||
@ -1437,10 +1368,10 @@ namespace cv
|
|||||||
if( src.depth() != CV_8U || src.oclchannels() != 4 )
|
if( src.depth() != CV_8U || src.oclchannels() != 4 )
|
||||||
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
|
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
|
||||||
|
|
||||||
if(src.clCxt->impl->double_support == 0)
|
// if(src.clCxt->impl->double_support == 0)
|
||||||
{
|
// {
|
||||||
CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation is exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
|
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
|
||||||
}
|
// }
|
||||||
|
|
||||||
dstr.create( src.size(), CV_8UC4 );
|
dstr.create( src.size(), CV_8UC4 );
|
||||||
dstsp.create( src.size(), CV_16SC2 );
|
dstsp.create( src.size(), CV_16SC2 );
|
||||||
@ -1603,7 +1534,7 @@ namespace cv
|
|||||||
int borderType )
|
int borderType )
|
||||||
{
|
{
|
||||||
int cn = src.channels();
|
int cn = src.channels();
|
||||||
int i, j, k, maxk, radius;
|
int i, j, maxk, radius;
|
||||||
Size size = src.size();
|
Size size = src.size();
|
||||||
|
|
||||||
CV_Assert( (src.channels() == 1 || src.channels() == 3) &&
|
CV_Assert( (src.channels() == 1 || src.channels() == 3) &&
|
||||||
|
@ -424,7 +424,7 @@ namespace cv
|
|||||||
|
|
||||||
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind)
|
size_t width, size_t height, int src_offset)
|
||||||
{
|
{
|
||||||
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
|
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
|
||||||
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
|
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
|
||||||
@ -451,7 +451,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
int savetofile(const Context *clcxt, cl_program &program, const char *fileName)
|
int savetofile(const Context *clcxt, cl_program &program, const char *fileName)
|
||||||
{
|
{
|
||||||
cl_int status;
|
//cl_int status;
|
||||||
size_t numDevices = 1;
|
size_t numDevices = 1;
|
||||||
cl_device_id *devices = clcxt->impl->devices;
|
cl_device_id *devices = clcxt->impl->devices;
|
||||||
//figure out the sizes of each of the binaries.
|
//figure out the sizes of each of the binaries.
|
||||||
@ -507,7 +507,7 @@ namespace cv
|
|||||||
FILE *fp = fopen(fileName, "wb+");
|
FILE *fp = fopen(fileName, "wb+");
|
||||||
if(fp == NULL)
|
if(fp == NULL)
|
||||||
{
|
{
|
||||||
char *temp;
|
char *temp = NULL;
|
||||||
sprintf(temp, "Failed to load kernel file : %s\r\n", fileName);
|
sprintf(temp, "Failed to load kernel file : %s\r\n", fileName);
|
||||||
CV_Error(CV_GpuApiCallError, temp);
|
CV_Error(CV_GpuApiCallError, temp);
|
||||||
}
|
}
|
||||||
@ -639,8 +639,7 @@ namespace cv
|
|||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize,
|
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads)
|
||||||
size_t *globalThreads, size_t *localThreads)
|
|
||||||
{
|
{
|
||||||
size_t kernelWorkGroupSize;
|
size_t kernelWorkGroupSize;
|
||||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
|
||||||
@ -679,10 +678,10 @@ namespace cv
|
|||||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||||
|
|
||||||
size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||||
cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
|
cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||||
}
|
}
|
||||||
for(int i = 0; i < args.size(); i ++)
|
for(size_t i = 0; i < args.size(); i ++)
|
||||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
#ifndef PRINT_KERNEL_RUN_TIME
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
@ -897,7 +896,7 @@ namespace cv
|
|||||||
impl->maxComputeUnits = m.impl->maxComputeUnits;
|
impl->maxComputeUnits = m.impl->maxComputeUnits;
|
||||||
impl->double_support = m.impl->double_support;
|
impl->double_support = m.impl->double_support;
|
||||||
memcpy(impl->extra_options, m.impl->extra_options, 512);
|
memcpy(impl->extra_options, m.impl->extra_options, 512);
|
||||||
for(int i = 0; i < m.impl->devices.size(); i++)
|
for(size_t i = 0; i < m.impl->devices.size(); i++)
|
||||||
{
|
{
|
||||||
impl->devices.push_back(m.impl->devices[i]);
|
impl->devices.push_back(m.impl->devices[i]);
|
||||||
impl->devName.push_back(m.impl->devName[i]);
|
impl->devName.push_back(m.impl->devName[i]);
|
||||||
|
@ -65,7 +65,6 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
|
|||||||
{
|
{
|
||||||
|
|
||||||
x = x << 2;
|
x = x << 2;
|
||||||
|
|
||||||
#define dst_align (dst_offset & 3)
|
#define dst_align (dst_offset & 3)
|
||||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||||
@ -130,9 +129,23 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) & (int)0xfffffff8);
|
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) & (int)0xfffffff8);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
|
||||||
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
|
||||||
|
|
||||||
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
||||||
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
|
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
|
||||||
@ -177,9 +190,23 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) - (dst_align << 1 ));
|
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) - (dst_align << 1 ));
|
||||||
|
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
|
||||||
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
||||||
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
|
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
|
||||||
int4 tmp;
|
int4 tmp;
|
||||||
@ -225,8 +252,23 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
|
int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix));
|
||||||
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
|
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
|
||||||
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
|
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
|
||||||
float4 tmp;
|
float4 tmp;
|
||||||
@ -271,9 +313,23 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
|
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
|
||||||
|
|
||||||
// float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
|
// float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
|
||||||
@ -319,9 +375,23 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
|
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
// double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
|
// double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
|
||||||
double4 tmp_data;
|
double4 tmp_data;
|
||||||
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
|
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
|
||||||
|
@ -69,9 +69,23 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
if(src1_index < 0)
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = src1_data & src2_data;
|
uchar4 tmp_data = src1_data & src2_data;
|
||||||
@ -106,9 +120,23 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
char4 src1_data = vload4(0, src1 + src1_index);
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
char4 src2_data = vload4(0, src2 + src2_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
char4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
char4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
char4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
char4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
char4 dst_data = *((__global char4 *)(dst + dst_index));
|
char4 dst_data = *((__global char4 *)(dst + dst_index));
|
||||||
char4 tmp_data = src1_data & src2_data;
|
char4 tmp_data = src1_data & src2_data;
|
||||||
|
|
||||||
@ -143,9 +171,23 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
||||||
|
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
|
||||||
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
||||||
ushort4 tmp_data = src1_data & src2_data;
|
ushort4 tmp_data = src1_data & src2_data;
|
||||||
|
|
||||||
@ -181,9 +223,23 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
||||||
|
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
|
||||||
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
||||||
short4 tmp_data = src1_data & src2_data;
|
short4 tmp_data = src1_data & src2_data;
|
||||||
|
|
||||||
|
@ -67,12 +67,19 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = ~ src1_data;
|
uchar4 tmp_data = ~ src1_data;
|
||||||
|
|
||||||
|
/* if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
*/
|
||||||
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
||||||
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
|
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
|
||||||
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
|
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
|
||||||
|
@ -69,10 +69,22 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = src1_data | src2_data;
|
uchar4 tmp_data = src1_data | src2_data;
|
||||||
|
|
||||||
|
@ -70,9 +70,23 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = src1_data ^ src2_data;
|
uchar4 tmp_data = src1_data ^ src2_data;
|
||||||
|
|
||||||
@ -106,9 +120,23 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
char4 src1_data = vload4(0, src1 + src1_index);
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
char4 src2_data = vload4(0, src2 + src2_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
char4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
char4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
char4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
char4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
char4 dst_data = *((__global char4 *)(dst + dst_index));
|
char4 dst_data = *((__global char4 *)(dst + dst_index));
|
||||||
char4 tmp_data = src1_data ^ src2_data;
|
char4 tmp_data = src1_data ^ src2_data;
|
||||||
|
|
||||||
@ -143,9 +171,23 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
||||||
|
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
|
||||||
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
|
||||||
ushort4 tmp_data = src1_data ^ src2_data;
|
ushort4 tmp_data = src1_data ^ src2_data;
|
||||||
|
|
||||||
@ -181,10 +223,28 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
|
||||||
|
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
|
||||||
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
|
||||||
|
|
||||||
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
|
||||||
|
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
short4 tmp_data = src1_data ^ src2_data;
|
short4 tmp_data = src1_data ^ src2_data;
|
||||||
|
|
||||||
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
||||||
|
@ -69,9 +69,24 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
@ -85,7 +100,8 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void arithm_compare_eq_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
|
||||||
|
__kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||||
__global ushort *src2, int src2_step, int src2_offset,
|
__global ushort *src2, int src2_step, int src2_offset,
|
||||||
__global uchar *dst, int dst_step, int dst_offset,
|
__global uchar *dst, int dst_step, int dst_offset,
|
||||||
int rows, int cols, int dst_step1)
|
int rows, int cols, int dst_step1)
|
||||||
@ -106,8 +122,22 @@ __kernel void arithm_compare_eq_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
@ -122,7 +152,6 @@ __kernel void arithm_compare_eq_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src1_offset,
|
__kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||||
__global short *src2, int src2_step, int src2_offset,
|
__global short *src2, int src2_step, int src2_offset,
|
||||||
__global uchar *dst, int dst_step, int dst_offset,
|
__global uchar *dst, int dst_step, int dst_offset,
|
||||||
@ -143,9 +172,25 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
@ -179,9 +224,24 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
|
|
||||||
@ -212,9 +272,17 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
|
|
||||||
@ -246,9 +314,24 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data == src2_data));
|
||||||
|
|
||||||
@ -282,9 +365,24 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
@ -318,9 +416,24 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
@ -356,9 +469,24 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
@ -390,9 +518,25 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
|
|
||||||
@ -423,9 +567,24 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
|
|
||||||
@ -457,9 +616,24 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data > src2_data));
|
||||||
|
|
||||||
@ -494,8 +668,24 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
@ -532,8 +722,25 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
@ -570,8 +777,24 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
@ -605,8 +828,23 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
|
|
||||||
@ -639,8 +877,24 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
|
|
||||||
@ -673,10 +927,22 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
} uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||||
|
|
||||||
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
||||||
|
@ -65,9 +65,23 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
@ -104,8 +118,22 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
@ -141,9 +169,23 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
@ -176,8 +218,24 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
|
|
||||||
@ -208,9 +266,22 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
|
|
||||||
@ -242,9 +313,24 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
|
||||||
|
|
||||||
@ -279,9 +365,23 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
@ -318,8 +418,23 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
@ -355,9 +470,24 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
@ -390,8 +520,27 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
|
|
||||||
@ -422,9 +571,24 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
|
|
||||||
@ -456,9 +620,24 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
|
||||||
|
|
||||||
@ -492,9 +671,24 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
|
||||||
|
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
uchar4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
|
||||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
||||||
@ -530,9 +724,24 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
|
||||||
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
||||||
@ -568,9 +777,24 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
|
||||||
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
||||||
@ -602,9 +826,24 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
|
||||||
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
|
||||||
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
int4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data =convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data =convert_uchar4((src1_data <= src2_data));
|
||||||
|
|
||||||
@ -635,9 +874,23 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
||||||
|
|
||||||
@ -669,9 +922,24 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
|
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
|
||||||
|
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
double4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index));
|
|
||||||
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index));
|
|
||||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||||
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
uchar4 tmp_data = convert_uchar4((src1_data <= src2_data));
|
||||||
|
|
||||||
|
@ -74,9 +74,22 @@ __kernel void magnitudeSqr_C1_D5 (__global float *src1,int src1_step,int src1_of
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index));
|
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index));
|
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||||
|
if(src1_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||||
|
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src2_index < 0)
|
||||||
|
{
|
||||||
|
float4 tmp;
|
||||||
|
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||||
|
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
||||||
|
|
||||||
float4 tmp_data ;
|
float4 tmp_data ;
|
||||||
@ -125,8 +138,19 @@ __kernel void magnitudeSqr_C2_D5 (__global float *src1,int src1_step,int src1_of
|
|||||||
int dst_start = mad24(y, dst_step, dst_offset);
|
int dst_start = mad24(y, dst_step, dst_offset);
|
||||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
|
||||||
|
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||||
|
|
||||||
|
float8 src1_data = vload8(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||||
|
|
||||||
|
if(src1_index==-6)
|
||||||
|
src1_data.s01234567 = src1_data.s67012345;
|
||||||
|
if(src1_index==-4)
|
||||||
|
src1_data.s01234567 = src1_data.s45670123;
|
||||||
|
if(src1_index== -2)
|
||||||
|
src1_data.s01234567 = src1_data.s23456701;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float8 src1_data = vload8(0, (__global float *)((__global char *)src1 + src1_index));
|
|
||||||
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
|
||||||
|
|
||||||
float4 tmp_data ;
|
float4 tmp_data ;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -288,8 +288,22 @@ __kernel void split_vector_C2_D0 (__global uchar *mat_src, int src_step, int s
|
|||||||
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
||||||
int dst1_idx = mad24(y, dst1_step, dst1_offset + x & (int)0xfffffffc);
|
int dst1_idx = mad24(y, dst1_step, dst1_offset + x & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
|
int src2_index_fix = src_idx_1 < 0 ? 0 : src_idx_1;
|
||||||
uchar8 src_data_0 = vload8(0, mat_src + src_idx_0);
|
uchar8 src_data_0 = vload8(0, mat_src + src_idx_0);
|
||||||
uchar8 src_data_1 = vload8(0, mat_src + src_idx_1);
|
uchar8 src_data_1 = vload8(0, mat_src + src_idx_1);
|
||||||
|
if(src_idx_0 == -6)
|
||||||
|
src_data_0.s01234567 = src_data_0.s67012345;
|
||||||
|
if(src_idx_0 == -4)
|
||||||
|
src_data_0.s01234567 = src_data_0.s45670123;
|
||||||
|
if(src_idx_0 == -2)
|
||||||
|
src_data_0.s01234567 = src_data_0.s23456701;
|
||||||
|
if(src_idx_1 == -6)
|
||||||
|
src_data_1.s01234567 = src_data_1.s67012345;
|
||||||
|
if(src_idx_1 == -4)
|
||||||
|
src_data_1.s01234567 = src_data_1.s45670123;
|
||||||
|
if(src_idx_1 == -2)
|
||||||
|
src_data_1.s01234567 = src_data_1.s23456701;
|
||||||
|
|
||||||
uchar4 dst0_data = *((__global uchar4 *)(mat_dst0 + dst0_idx));
|
uchar4 dst0_data = *((__global uchar4 *)(mat_dst0 + dst0_idx));
|
||||||
uchar4 dst1_data = *((__global uchar4 *)(mat_dst1 + dst1_idx));
|
uchar4 dst1_data = *((__global uchar4 *)(mat_dst1 + dst1_idx));
|
||||||
@ -546,10 +560,22 @@ __kernel void split_vector_C2_D1 (__global char *mat_src, int src_step, int sr
|
|||||||
int dst1_start = mad24(y, dst1_step, dst1_offset);
|
int dst1_start = mad24(y, dst1_step, dst1_offset);
|
||||||
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
||||||
int dst1_idx = mad24(y, dst1_step, dst1_offset + x & (int)0xfffffffc);
|
int dst1_idx = mad24(y, dst1_step, dst1_offset + x & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
|
int src2_index_fix = src_idx_1 < 0 ? 0 : src_idx_1;
|
||||||
char8 src_data_0 = vload8(0, mat_src + src_idx_0);
|
char8 src_data_0 = vload8(0, mat_src + src_idx_0);
|
||||||
char8 src_data_1 = vload8(0, mat_src + src_idx_1);
|
char8 src_data_1 = vload8(0, mat_src + src_idx_1);
|
||||||
|
if(src_idx_0 == -6)
|
||||||
|
src_data_0.s01234567 = src_data_0.s67012345;
|
||||||
|
if(src_idx_0 == -4)
|
||||||
|
src_data_0.s01234567 = src_data_0.s45670123;
|
||||||
|
if(src_idx_0 == -2)
|
||||||
|
src_data_0.s01234567 = src_data_0.s23456701;
|
||||||
|
if(src_idx_1 == -6)
|
||||||
|
src_data_1.s01234567 = src_data_1.s67012345;
|
||||||
|
if(src_idx_1 == -4)
|
||||||
|
src_data_1.s01234567 = src_data_1.s45670123;
|
||||||
|
if(src_idx_1 == -2)
|
||||||
|
src_data_1.s01234567 = src_data_1.s23456701;
|
||||||
char4 dst0_data = *((__global char4 *)(mat_dst0 + dst0_idx));
|
char4 dst0_data = *((__global char4 *)(mat_dst0 + dst0_idx));
|
||||||
char4 dst1_data = *((__global char4 *)(mat_dst1 + dst1_idx));
|
char4 dst1_data = *((__global char4 *)(mat_dst1 + dst1_idx));
|
||||||
|
|
||||||
@ -604,7 +630,14 @@ __kernel void split_vector_C4_D2 (__global ushort *mat_src, int src_step, int
|
|||||||
int dst3_end = mad24(y, dst3_step, dst3_offset + dst_step1);
|
int dst3_end = mad24(y, dst3_step, dst3_offset + dst_step1);
|
||||||
int dst3_idx = mad24(y, dst3_step, dst3_offset + (x << 1) & (int)0xfffffffc);
|
int dst3_idx = mad24(y, dst3_step, dst3_offset + (x << 1) & (int)0xfffffffc);
|
||||||
|
|
||||||
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
ushort8 src_data0 = vload8(0,(__global ushort *)((__global char *)mat_src + src_idx_0));
|
ushort8 src_data0 = vload8(0,(__global ushort *)((__global char *)mat_src + src_idx_0));
|
||||||
|
if(src_idx_0 == -6)
|
||||||
|
src_data0.s01234567 = src_data0.s67012345;
|
||||||
|
if(src_idx_0 == -4)
|
||||||
|
src_data0.s01234567 = src_data0.s45670123;
|
||||||
|
if(src_idx_0 == -2)
|
||||||
|
src_data0.s01234567 = src_data0.s23456701;
|
||||||
ushort4 src_data1 = *((__global ushort4 *)((__global char *)mat_src + src_idx_1));
|
ushort4 src_data1 = *((__global ushort4 *)((__global char *)mat_src + src_idx_1));
|
||||||
|
|
||||||
ushort2 dst0_data = *((__global ushort2 *)((__global char *)mat_dst0 + dst0_idx));
|
ushort2 dst0_data = *((__global ushort2 *)((__global char *)mat_dst0 + dst0_idx));
|
||||||
@ -727,8 +760,22 @@ __kernel void split_vector_C2_D2 (__global ushort *mat_src, int src_step, int
|
|||||||
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
||||||
int dst1_idx = mad24(y, dst1_step, dst1_offset + (x << 1) & (int)0xfffffffc);
|
int dst1_idx = mad24(y, dst1_step, dst1_offset + (x << 1) & (int)0xfffffffc);
|
||||||
|
|
||||||
ushort4 src_data_0 = vload4(0, (__global ushort *)((__global char *)mat_src + src_idx_0));
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
ushort4 src_data_1 = vload4(0, (__global ushort *)((__global char *)mat_src + src_idx_1));
|
int src2_index_fix = src_idx_1 < 0 ? 0 : src_idx_1;
|
||||||
|
ushort4 src_data_0 = vload4(0, (__global ushort *)((__global char *)mat_src + src1_index_fix));
|
||||||
|
ushort4 src_data_1 = vload4(0, (__global ushort *)((__global char *)mat_src + src2_index_fix));
|
||||||
|
if(src_idx_0 < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src_idx_0 == -2) ? src_data_0.zwxy : src_data_0.yzwx;
|
||||||
|
src_data_0.xyzw = (src_idx_1 == -1) ? src_data_0.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src_idx_1 < 0)
|
||||||
|
{
|
||||||
|
ushort4 tmp;
|
||||||
|
tmp.xyzw = (src_idx_1 == -2) ? src_data_1.zwxy : src_data_1.yzwx;
|
||||||
|
src_data_1.xyzw = (src_idx_1 == -1) ? src_data_1.wxyz : tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
ushort2 dst0_data = *((__global ushort2 *)((__global char *)mat_dst0 + dst0_idx));
|
ushort2 dst0_data = *((__global ushort2 *)((__global char *)mat_dst0 + dst0_idx));
|
||||||
ushort2 dst1_data = *((__global ushort2 *)((__global char *)mat_dst1 + dst1_idx));
|
ushort2 dst1_data = *((__global ushort2 *)((__global char *)mat_dst1 + dst1_idx));
|
||||||
@ -778,8 +825,16 @@ __kernel void split_vector_C4_D3 (__global short *mat_src, int src_step, int s
|
|||||||
int dst3_start = mad24(y, dst3_step, dst3_offset);
|
int dst3_start = mad24(y, dst3_step, dst3_offset);
|
||||||
int dst3_end = mad24(y, dst3_step, dst3_offset + dst_step1);
|
int dst3_end = mad24(y, dst3_step, dst3_offset + dst_step1);
|
||||||
int dst3_idx = mad24(y, dst3_step, dst3_offset + (x << 1) & (int)0xfffffffc);
|
int dst3_idx = mad24(y, dst3_step, dst3_offset + (x << 1) & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
short8 src_data0 = vload8(0,(__global short *)((__global char *)mat_src + src_idx_0));
|
short8 src_data0 = vload8(0,(__global short *)((__global char *)mat_src + src_idx_0));
|
||||||
|
|
||||||
|
if(src_idx_0 == -6)
|
||||||
|
src_data0.s01234567 = src_data0.s67012345;
|
||||||
|
if(src_idx_0 == -4)
|
||||||
|
src_data0.s01234567 = src_data0.s45670123;
|
||||||
|
if(src_idx_0 == -2)
|
||||||
|
src_data0.s01234567 = src_data0.s23456701;
|
||||||
|
|
||||||
short4 src_data1 = *((__global short4 *)((__global char *)mat_src + src_idx_1));
|
short4 src_data1 = *((__global short4 *)((__global char *)mat_src + src_idx_1));
|
||||||
|
|
||||||
short2 dst0_data = *((__global short2 *)((__global char *)mat_dst0 + dst0_idx));
|
short2 dst0_data = *((__global short2 *)((__global char *)mat_dst0 + dst0_idx));
|
||||||
@ -901,9 +956,23 @@ __kernel void split_vector_C2_D3 (__global short *mat_src, int src_step, int s
|
|||||||
int dst1_start = mad24(y, dst1_step, dst1_offset);
|
int dst1_start = mad24(y, dst1_step, dst1_offset);
|
||||||
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
int dst1_end = mad24(y, dst1_step, dst1_offset + dst_step1);
|
||||||
int dst1_idx = mad24(y, dst1_step, dst1_offset + (x << 1) & (int)0xfffffffc);
|
int dst1_idx = mad24(y, dst1_step, dst1_offset + (x << 1) & (int)0xfffffffc);
|
||||||
|
int src1_index_fix = src_idx_0 < 0 ? 0 : src_idx_0;
|
||||||
|
int src2_index_fix = src_idx_1 < 0 ? 0 : src_idx_1;
|
||||||
short4 src_data_0 = vload4(0, (__global short *)((__global char *)mat_src + src_idx_0));
|
short4 src_data_0 = vload4(0, (__global short *)((__global char *)mat_src + src_idx_0));
|
||||||
short4 src_data_1 = vload4(0, (__global short *)((__global char *)mat_src + src_idx_1));
|
short4 src_data_1 = vload4(0, (__global short *)((__global char *)mat_src + src_idx_1));
|
||||||
|
if(src_idx_0 < 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = (src_idx_0 == -2) ? src_data_0.zwxy : src_data_0.yzwx;
|
||||||
|
src_data_0.xyzw = (src_idx_0 == -1) ? src_data_0.wxyz:tmp.xyzw;
|
||||||
|
}
|
||||||
|
if(src_idx_1< 0)
|
||||||
|
{
|
||||||
|
short4 tmp;
|
||||||
|
tmp.xyzw = ( src_idx_1== -2) ? src_data_1.zwxy : src_data_1.yzwx;
|
||||||
|
src_data_1.xyzw = ( src_idx_1== -1) ? src_data_1.wxyz : tmp.xyzw;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
short2 dst0_data = *((__global short2 *)((__global char *)mat_dst0 + dst0_idx));
|
short2 dst0_data = *((__global short2 *)((__global char *)mat_dst0 + dst0_idx));
|
||||||
short2 dst1_data = *((__global short2 *)((__global char *)mat_dst1 + dst1_idx));
|
short2 dst1_data = *((__global short2 *)((__global char *)mat_dst1 + dst1_idx));
|
||||||
|
@ -124,7 +124,7 @@ namespace cv
|
|||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||||
{
|
{
|
||||||
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -172,7 +172,7 @@ namespace cv
|
|||||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||||
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
||||||
);
|
);
|
||||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
|
CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
|
||||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
Context *clCxt = image.clCxt;
|
||||||
@ -209,7 +209,7 @@ namespace cv
|
|||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||||
{
|
{
|
||||||
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -220,8 +220,8 @@ namespace cv
|
|||||||
image.convertTo(buf.imagef, CV_32F);
|
image.convertTo(buf.imagef, CV_32F);
|
||||||
templ.convertTo(buf.templf, CV_32F);
|
templ.convertTo(buf.templf, CV_32F);
|
||||||
}
|
}
|
||||||
CV_Assert(image.channels() == 1);
|
CV_Assert(image.oclchannels() == 1);
|
||||||
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.oclchannels()));
|
||||||
filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0));
|
filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0));
|
||||||
result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
||||||
}
|
}
|
||||||
@ -265,7 +265,7 @@ namespace cv
|
|||||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||||
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
|
||||||
);
|
);
|
||||||
CV_Assert(image.channels() == templ.channels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
|
CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
|
||||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||||
|
|
||||||
Context *clCxt = image.clCxt;
|
Context *clCxt = image.clCxt;
|
||||||
@ -320,7 +320,7 @@ namespace cv
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
// to be continued in the following section
|
// to be continued in the following section
|
||||||
if(image.channels() == 1)
|
if(image.oclchannels() == 1)
|
||||||
{
|
{
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
integral(image, buf.image_sums[0]);
|
integral(image, buf.image_sums[0]);
|
||||||
@ -340,7 +340,7 @@ namespace cv
|
|||||||
buf.image_sums.resize(buf.images.size());
|
buf.image_sums.resize(buf.images.size());
|
||||||
|
|
||||||
|
|
||||||
for(int i = 0; i < image.channels(); i ++)
|
for(int i = 0; i < image.oclchannels(); i ++)
|
||||||
{
|
{
|
||||||
integral(buf.images[i], buf.image_sums[i]);
|
integral(buf.images[i], buf.image_sums[i]);
|
||||||
}
|
}
|
||||||
@ -394,7 +394,7 @@ namespace cv
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||||
args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
|
||||||
// to be continued in the following section
|
// to be continued in the following section
|
||||||
if(image.channels() == 1)
|
if(image.oclchannels() == 1)
|
||||||
{
|
{
|
||||||
buf.image_sums.resize(1);
|
buf.image_sums.resize(1);
|
||||||
buf.image_sqsums.resize(1);
|
buf.image_sqsums.resize(1);
|
||||||
|
@ -126,7 +126,7 @@ namespace cv
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// convert_C3C4
|
// convert_C3C4
|
||||||
void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep)
|
void convert_C3C4(const cl_mem &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
|
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
|
||||||
int pixel_end = dst.wholecols * dst.wholerows - 1;
|
int pixel_end = dst.wholecols * dst.wholerows - 1;
|
||||||
@ -174,7 +174,7 @@ void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep)
|
|||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// convert_C4C3
|
// convert_C4C3
|
||||||
void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep)
|
void convert_C4C3(const oclMat &src, cl_mem &dst)
|
||||||
{
|
{
|
||||||
int srcStep_in_pixel = src.step1() / src.oclchannels();
|
int srcStep_in_pixel = src.step1() / src.oclchannels();
|
||||||
int pixel_end = src.wholecols * src.wholerows - 1;
|
int pixel_end = src.wholecols * src.wholerows - 1;
|
||||||
@ -245,7 +245,7 @@ void cv::ocl::oclMat::upload(const Mat &m)
|
|||||||
openCLVerifyCall(err);
|
openCLVerifyCall(err);
|
||||||
|
|
||||||
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
|
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
|
||||||
convert_C3C4(temp, *this, pitch);
|
convert_C3C4(temp, *this);
|
||||||
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
|
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
|
||||||
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
||||||
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
||||||
@ -296,7 +296,7 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
|
|||||||
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
||||||
openCLVerifyCall(err);
|
openCLVerifyCall(err);
|
||||||
|
|
||||||
convert_C4C3(*this, temp, pitch / m.elemSize1());
|
convert_C4C3(*this, temp);
|
||||||
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
|
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
|
||||||
//int* cputemp=new int[wholecols*wholerows * 3];
|
//int* cputemp=new int[wholecols*wholerows * 3];
|
||||||
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
||||||
@ -382,7 +382,7 @@ void cv::ocl::oclMat::copyTo( oclMat &m ) const
|
|||||||
CV_DbgAssert(!this->empty());
|
CV_DbgAssert(!this->empty());
|
||||||
m.create(size(), type());
|
m.create(size(), type());
|
||||||
openCLCopyBuffer2D(clCxt, m.data, m.step, m.offset,
|
openCLCopyBuffer2D(clCxt, m.data, m.step, m.offset,
|
||||||
data, step, cols * elemSize(), rows, offset, clMemcpyDeviceToDevice);
|
data, step, cols * elemSize(), rows, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
|
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
|
||||||
|
@ -85,10 +85,10 @@ namespace cv
|
|||||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||||
|
|
||||||
size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||||
cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
|
cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||||
}
|
}
|
||||||
for(int i = 0; i < args.size(); i ++)
|
for(size_t i = 0; i < args.size(); i ++)
|
||||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
|
||||||
|
@ -47,7 +47,7 @@
|
|||||||
#define __OPENCV_PRECOMP_H__
|
#define __OPENCV_PRECOMP_H__
|
||||||
|
|
||||||
#if _MSC_VER >= 1200
|
#if _MSC_VER >= 1200
|
||||||
#pragma warning( disable: 4251 4710 4711 4514 4996 )
|
#pragma warning( disable: 4244 4251 4710 4711 4514 4996 )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_CVCONFIG_H
|
#ifdef HAVE_CVCONFIG_H
|
||||||
@ -81,7 +81,7 @@
|
|||||||
#if defined __APPLE__
|
#if defined __APPLE__
|
||||||
#include <OpenCL/OpenCL.h>
|
#include <OpenCL/OpenCL.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.h>
|
#include <CL/opencl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "safe_call.hpp"
|
#include "safe_call.hpp"
|
||||||
@ -100,7 +100,7 @@ namespace cv
|
|||||||
size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1);
|
size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1);
|
||||||
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||||
const void *src, size_t spitch,
|
const void *src, size_t spitch,
|
||||||
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind);
|
size_t width, size_t height, int src_offset);
|
||||||
void openCLFree(void *devPtr);
|
void openCLFree(void *devPtr);
|
||||||
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
||||||
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
||||||
@ -108,8 +108,7 @@ namespace cv
|
|||||||
const char **source, string kernelName);
|
const char **source, string kernelName);
|
||||||
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
|
||||||
const char **source, string kernelName, const char *build_options);
|
const char **source, string kernelName, const char *build_options);
|
||||||
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize,
|
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
|
||||||
size_t *globalThreads, size_t *localThreads);
|
|
||||||
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair<size_t, const void *> > &args,
|
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair<size_t, const void *> > &args,
|
||||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
||||||
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName,
|
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName,
|
||||||
|
@ -61,6 +61,7 @@ namespace cv
|
|||||||
extern const char *pyrlk;
|
extern const char *pyrlk;
|
||||||
extern const char *operator_setTo;
|
extern const char *operator_setTo;
|
||||||
extern const char *operator_convertTo;
|
extern const char *operator_convertTo;
|
||||||
|
extern const char *operator_copyToM;
|
||||||
extern const char *arithm_mul;
|
extern const char *arithm_mul;
|
||||||
extern const char *pyr_down;
|
extern const char *pyr_down;
|
||||||
}
|
}
|
||||||
@ -397,6 +398,71 @@ oclMat &setTo(oclMat &src, const Scalar &scalar)
|
|||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
////////////////////////////////// CopyTo /////////////////////////////////
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
void copy_to_with_mask_cus(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
|
||||||
|
{
|
||||||
|
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
|
||||||
|
src.rows == dst.rows && src.cols == dst.cols
|
||||||
|
&& mask.type() == CV_8UC1);
|
||||||
|
|
||||||
|
vector<pair<size_t , const void *> > args;
|
||||||
|
|
||||||
|
std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
|
||||||
|
{"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
|
||||||
|
{"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
|
||||||
|
{"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
|
||||||
|
};
|
||||||
|
char compile_option[32];
|
||||||
|
sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
size_t globalThreads[3];
|
||||||
|
|
||||||
|
globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
|
||||||
|
globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
|
||||||
|
globalThreads[2] = 1;
|
||||||
|
|
||||||
|
int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
|
||||||
|
int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
|
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
|
||||||
|
|
||||||
|
openCLExecuteKernel2(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
|
||||||
|
localThreads, args, -1, -1, compile_option, CLFLUSH);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyTo(const oclMat &src, oclMat &m )
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!src.empty());
|
||||||
|
m.create(src.size(), src.type());
|
||||||
|
openCLCopyBuffer2D(src.clCxt, m.data, m.step, m.offset,
|
||||||
|
src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask)
|
||||||
|
{
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
|
copyTo(src, mat);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mat.create(src.size(), src.type());
|
||||||
|
copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
|
void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
|
||||||
{
|
{
|
||||||
if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
|
if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
|
||||||
@ -879,20 +945,23 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
|
|||||||
nextPyr_.resize(maxLevel + 1);
|
nextPyr_.resize(maxLevel + 1);
|
||||||
|
|
||||||
prevPyr_[0] = prevImg;
|
prevPyr_[0] = prevImg;
|
||||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
//nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||||
|
convertTo(nextImg, nextPyr_[0], CV_32F);
|
||||||
|
|
||||||
for (int level = 1; level <= maxLevel; ++level)
|
for (int level = 1; level <= maxLevel; ++level)
|
||||||
{
|
{
|
||||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
|
||||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
|
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
|
||||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
||||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
||||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
||||||
uPyr_[1].setTo(Scalar::all(0));
|
//uPyr_[1].setTo(Scalar::all(0));
|
||||||
vPyr_[1].setTo(Scalar::all(0));
|
//vPyr_[1].setTo(Scalar::all(0));
|
||||||
|
setTo(uPyr_[1], Scalar::all(0));
|
||||||
|
setTo(vPyr_[1], Scalar::all(0));
|
||||||
|
|
||||||
Size winSize2i(winSize.width, winSize.height);
|
Size winSize2i(winSize.width, winSize.height);
|
||||||
|
|
||||||
@ -909,8 +978,12 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
|
|||||||
idx = idx2;
|
idx = idx2;
|
||||||
}
|
}
|
||||||
|
|
||||||
uPyr_[idx].copyTo(u);
|
//uPyr_[idx].copyTo(u);
|
||||||
vPyr_[idx].copyTo(v);
|
//vPyr_[idx].copyTo(v);
|
||||||
|
copyTo(uPyr_[idx], u);
|
||||||
|
copyTo(vPyr_[idx], v);
|
||||||
|
|
||||||
|
clFinish(prevImg.clCxt->impl->clCmdQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -73,22 +73,53 @@ void print_info()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
std::string workdir;
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
TS::ptr()->init("ocl");
|
TS::ptr()->init("ocl");
|
||||||
InitGoogleTest(&argc, argv);
|
InitGoogleTest(&argc, argv);
|
||||||
|
const char *keys =
|
||||||
|
"{ h | help | false | print help message }"
|
||||||
|
"{ w | workdir | ../../../samples/c/| set working directory }"
|
||||||
|
"{ t | type | gpu | set device type:cpu or gpu}"
|
||||||
|
"{ p | platform | 0 | set platform id }"
|
||||||
|
"{ d | device | 0 | set device id }";
|
||||||
|
|
||||||
|
CommandLineParser cmd(argc, argv, keys);
|
||||||
|
if (cmd.get<bool>("help"))
|
||||||
|
{
|
||||||
|
cout << "Avaible options besides goole test option:" << endl;
|
||||||
|
cmd.printParams();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
workdir = cmd.get<string>("workdir");
|
||||||
|
string type = cmd.get<string>("type");
|
||||||
|
unsigned int pid = cmd.get<unsigned int>("platform");
|
||||||
|
int device = cmd.get<int>("device");
|
||||||
|
|
||||||
print_info();
|
print_info();
|
||||||
|
int flag = CVCL_DEVICE_TYPE_GPU;
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
if(type == "cpu")
|
||||||
int devnums = getDevice(oclinfo);
|
|
||||||
if(devnums < 1)
|
|
||||||
{
|
{
|
||||||
std::cout << "no device found\n";
|
flag = CVCL_DEVICE_TYPE_CPU;
|
||||||
|
}
|
||||||
|
std::vector<cv::ocl::Info> oclinfo;
|
||||||
|
int devnums = getDevice(oclinfo, flag);
|
||||||
|
if(devnums <= device || device < 0)
|
||||||
|
{
|
||||||
|
std::cout << "device invalid\n";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
//setDevice(oclinfo[1]);
|
if(pid >= oclinfo.size())
|
||||||
|
{
|
||||||
|
std::cout << "platform invalid\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(pid != 0 || device != 0)
|
||||||
|
{
|
||||||
|
setDevice(oclinfo[pid], device);
|
||||||
|
}
|
||||||
|
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,7 +67,5 @@
|
|||||||
#include "interpolation.hpp"
|
#include "interpolation.hpp"
|
||||||
//#include "add_test_info.h"
|
//#include "add_test_info.h"
|
||||||
|
|
||||||
#define OPENCV_DEFAULT_OPENCL_DEVICE CVCL_DEVICE_TYPE_GPU
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -133,10 +133,9 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
|||||||
|
|
||||||
void random_roi()
|
void random_roi()
|
||||||
{
|
{
|
||||||
cv::RNG &rng = TS::ptr()->get_rng();
|
|
||||||
|
|
||||||
#ifdef RANDOMROI
|
#ifdef RANDOMROI
|
||||||
//randomize ROI
|
//randomize ROI
|
||||||
|
cv::RNG &rng = TS::ptr()->get_rng();
|
||||||
roicols = rng.uniform(1, mat1.cols);
|
roicols = rng.uniform(1, mat1.cols);
|
||||||
roirows = rng.uniform(1, mat1.rows);
|
roirows = rng.uniform(1, mat1.rows);
|
||||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||||
@ -234,7 +233,7 @@ TEST_P(Exp, Mat)
|
|||||||
|
|
||||||
char s[1024];
|
char s[1024];
|
||||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, s);
|
EXPECT_MAT_NEAR(dst, cpu_dst, 2, s);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -855,7 +854,7 @@ TEST_P(MinMaxLoc, MAT)
|
|||||||
cv::Point minLoc_, maxLoc_;
|
cv::Point minLoc_, maxLoc_;
|
||||||
cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat());
|
cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat());
|
||||||
|
|
||||||
double error0, error1, minlocVal, minlocVal_, maxlocVal, maxlocVal_;
|
double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
|
||||||
if(depth == 0)
|
if(depth == 0)
|
||||||
{
|
{
|
||||||
minlocVal = mat1_roi.at<unsigned char>(minLoc);
|
minlocVal = mat1_roi.at<unsigned char>(minLoc);
|
||||||
@ -975,7 +974,7 @@ TEST_P(MinMaxLoc, MASK)
|
|||||||
cv::Point minLoc_, maxLoc_;
|
cv::Point minLoc_, maxLoc_;
|
||||||
cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask);
|
cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask);
|
||||||
|
|
||||||
double error0, error1, minlocVal, minlocVal_, maxlocVal, maxlocVal_;
|
double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
|
||||||
if(minLoc_.x == -1 || minLoc_.y == -1 || maxLoc_.x == -1 || maxLoc_.y == -1) continue;
|
if(minLoc_.x == -1 || minLoc_.y == -1 || maxLoc_.x == -1 || maxLoc_.y == -1) continue;
|
||||||
if(depth == 0)
|
if(depth == 0)
|
||||||
{
|
{
|
||||||
|
@ -45,16 +45,11 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
#ifdef WIN32
|
|
||||||
#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg"
|
|
||||||
#else
|
|
||||||
#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path
|
|
||||||
#endif
|
|
||||||
#define SHOW_RESULT 0
|
#define SHOW_RESULT 0
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Canny
|
// Canny
|
||||||
|
extern std::string workdir;
|
||||||
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
|
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
|
||||||
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
|
||||||
|
|
||||||
@ -76,7 +71,7 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
|
|||||||
|
|
||||||
TEST_P(Canny, Accuracy)
|
TEST_P(Canny, Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
|
cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
|
||||||
ASSERT_FALSE(img.empty());
|
ASSERT_FALSE(img.empty());
|
||||||
|
|
||||||
double low_thresh = 50.0;
|
double low_thresh = 50.0;
|
||||||
|
@ -52,7 +52,7 @@ using namespace cvtest;
|
|||||||
using namespace testing;
|
using namespace testing;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
extern string workdir;
|
||||||
struct getRect
|
struct getRect
|
||||||
{
|
{
|
||||||
Rect operator ()(const CvAvgComp &e) const
|
Rect operator ()(const CvAvgComp &e) const
|
||||||
@ -75,14 +75,11 @@ PARAM_TEST_CASE(HaarTestBase, int, int)
|
|||||||
{
|
{
|
||||||
scale = 1.0;
|
scale = 1.0;
|
||||||
index = 0;
|
index = 0;
|
||||||
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||||
|
|
||||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
||||||
{
|
{
|
||||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||||
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
|
|
||||||
" [--scale[=<image scale>\n"
|
|
||||||
" [filename|camera_index]\n" << endl ;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//int devnums = getDevice(oclinfo);
|
//int devnums = getDevice(oclinfo);
|
||||||
@ -99,17 +96,17 @@ struct Haar : HaarTestBase {};
|
|||||||
|
|
||||||
TEST_F(Haar, FaceDetect)
|
TEST_F(Haar, FaceDetect)
|
||||||
{
|
{
|
||||||
string imgName = "../../../samples/c/lena.jpg";
|
string imgName = workdir + "lena.jpg";
|
||||||
Mat img = imread( imgName, 1 );
|
Mat img = imread( imgName, 1 );
|
||||||
|
|
||||||
if(img.empty())
|
if(img.empty())
|
||||||
{
|
{
|
||||||
std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
|
std::cout << "Couldn't read " << imgName << std::endl;
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = 0;
|
//int i = 0;
|
||||||
double t = 0;
|
//double t = 0;
|
||||||
vector<Rect> faces, oclfaces;
|
vector<Rect> faces, oclfaces;
|
||||||
|
|
||||||
const static Scalar colors[] = { CV_RGB(0, 0, 255),
|
const static Scalar colors[] = { CV_RGB(0, 0, 255),
|
||||||
|
@ -48,7 +48,7 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
extern string workdir;
|
||||||
PARAM_TEST_CASE(HOG, cv::Size, int)
|
PARAM_TEST_CASE(HOG, cv::Size, int)
|
||||||
{
|
{
|
||||||
cv::Size winSize;
|
cv::Size winSize;
|
||||||
@ -63,7 +63,7 @@ PARAM_TEST_CASE(HOG, cv::Size, int)
|
|||||||
TEST_P(HOG, GetDescriptors)
|
TEST_P(HOG, GetDescriptors)
|
||||||
{
|
{
|
||||||
// Load image
|
// Load image
|
||||||
cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
|
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
|
||||||
ASSERT_FALSE(img_rgb.empty());
|
ASSERT_FALSE(img_rgb.empty());
|
||||||
|
|
||||||
// Convert image
|
// Convert image
|
||||||
@ -120,7 +120,7 @@ bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
|
|||||||
TEST_P(HOG, Detect)
|
TEST_P(HOG, Detect)
|
||||||
{
|
{
|
||||||
// Load image
|
// Load image
|
||||||
cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
|
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
|
||||||
ASSERT_FALSE(img_rgb.empty());
|
ASSERT_FALSE(img_rgb.empty());
|
||||||
|
|
||||||
// Convert image
|
// Convert image
|
||||||
|
@ -1008,7 +1008,7 @@ TEST_P(Remap, Mat)
|
|||||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
||||||
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
||||||
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
|
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
|
||||||
for(int j = 0; j < 100; j++)
|
for(int j = 0; j < LOOP_TIMES; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
|
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
|
||||||
@ -1017,7 +1017,7 @@ TEST_P(Remap, Mat)
|
|||||||
gdst.download(cpu_dst);
|
gdst.download(cpu_dst);
|
||||||
|
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty);
|
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d bordertype=%s", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty, borderstr[0]);
|
||||||
|
|
||||||
|
|
||||||
if(interpolation == 0)
|
if(interpolation == 0)
|
||||||
@ -1371,7 +1371,9 @@ TEST_P(meanShiftFiltering, Mat)
|
|||||||
gdst.download(cpu_gdst);
|
gdst.download(cpu_gdst);
|
||||||
|
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
|
char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
|
||||||
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
||||||
|
strcat(sss, warning);
|
||||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1397,7 +1399,9 @@ TEST_P(meanShiftProc, Mat)
|
|||||||
gdstCoor.download(cpu_gdstCoor);
|
gdstCoor.download(cpu_gdstCoor);
|
||||||
|
|
||||||
char sss[1024];
|
char sss[1024];
|
||||||
|
char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
|
||||||
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
||||||
|
strcat(sss, warning);
|
||||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
||||||
EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
|
EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
|
||||||
}
|
}
|
||||||
@ -1740,7 +1744,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
|
|||||||
));
|
));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
|
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
|
||||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1),
|
Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1),
|
||||||
Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
|
Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
|
||||||
Values((int)cv::BORDER_CONSTANT)));
|
Values((int)cv::BORDER_CONSTANT)));
|
||||||
@ -1751,7 +1755,7 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
|
|||||||
ONE_TYPE(CV_32SC1) //no use
|
ONE_TYPE(CV_32SC1) //no use
|
||||||
));
|
));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
|
//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
|
||||||
Values(CV_32FC1, CV_32FC1),
|
// Values(CV_32FC1, CV_32FC1),
|
||||||
Values(false))); // Values(false) is the reserved parameter
|
// Values(false))); // Values(false) is the reserved parameter
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#define PERF_TEST 0
|
//#define PERF_TEST 0
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// MatchTemplate
|
// MatchTemplate
|
||||||
@ -157,18 +157,18 @@ TEST_P(MatchTemplate32F, Accuracy)
|
|||||||
#endif // PERF_TEST
|
#endif // PERF_TEST
|
||||||
}
|
}
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
||||||
// testing::Combine(
|
testing::Combine(
|
||||||
// MTEMP_SIZES,
|
MTEMP_SIZES,
|
||||||
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
// testing::Values(Channels(1), Channels(3), Channels(4)),
|
testing::Values(Channels(1), Channels(3), Channels(4)),
|
||||||
// ALL_TEMPLATE_METHODS
|
ALL_TEMPLATE_METHODS
|
||||||
// )
|
)
|
||||||
// );
|
);
|
||||||
//
|
|
||||||
//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
|
||||||
// MTEMP_SIZES,
|
MTEMP_SIZES,
|
||||||
// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
|
||||||
// testing::Values(Channels(1), Channels(3), Channels(4)),
|
testing::Values(Channels(1), Channels(3), Channels(4)),
|
||||||
// testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
|
||||||
#endif
|
#endif
|
||||||
|
@ -54,7 +54,7 @@ using namespace std;
|
|||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// BroxOpticalFlow
|
// BroxOpticalFlow
|
||||||
|
extern string workdir;
|
||||||
#define BROX_OPTICAL_FLOW_DUMP_FILE "opticalflow/brox_optical_flow.bin"
|
#define BROX_OPTICAL_FLOW_DUMP_FILE "opticalflow/brox_optical_flow.bin"
|
||||||
#define BROX_OPTICAL_FLOW_DUMP_FILE_CC20 "opticalflow/brox_optical_flow_cc20.bin"
|
#define BROX_OPTICAL_FLOW_DUMP_FILE_CC20 "opticalflow/brox_optical_flow_cc20.bin"
|
||||||
|
|
||||||
@ -78,10 +78,10 @@ PARAM_TEST_CASE(Sparse, bool, bool)
|
|||||||
|
|
||||||
TEST_P(Sparse, Mat)
|
TEST_P(Sparse, Mat)
|
||||||
{
|
{
|
||||||
cv::Mat frame0 = readImage("../../../samples/gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||||
ASSERT_FALSE(frame0.empty());
|
ASSERT_FALSE(frame0.empty());
|
||||||
|
|
||||||
cv::Mat frame1 = readImage("../../../samples/gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||||
ASSERT_FALSE(frame1.empty());
|
ASSERT_FALSE(frame1.empty());
|
||||||
|
|
||||||
cv::Mat gray_frame;
|
cv::Mat gray_frame;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user