Merge branch '2.4'

This commit is contained in:
Andrey Kamaev
2013-04-05 19:52:42 +04:00
154 changed files with 11355 additions and 16795 deletions

View File

@@ -239,7 +239,7 @@ protected:
}
};
TEST(Calib3d_SolvePnPRansac, accuracy) { CV_solvePnPRansac_Test test; test.safe_run(); }
TEST(DISABLED_Calib3d_SolvePnPRansac, accuracy) { CV_solvePnPRansac_Test test; test.safe_run(); }
TEST(Calib3d_SolvePnP, accuracy) { CV_solvePnP_Test test; test.safe_run(); }

View File

@@ -460,14 +460,29 @@ void CV_StereoMatchingTest::run(int)
continue;
}
int dispScaleFactor = datasetsParams[datasetName].dispScaleFactor;
Mat tmp; trueLeftDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); trueLeftDisp = tmp; tmp.release();
Mat tmp;
trueLeftDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor );
trueLeftDisp = tmp;
tmp.release();
if( !trueRightDisp.empty() )
trueRightDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); trueRightDisp = tmp; tmp.release();
{
trueRightDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor );
trueRightDisp = tmp;
tmp.release();
}
Mat leftDisp, rightDisp;
int ignBorder = max(runStereoMatchingAlgorithm(leftImg, rightImg, leftDisp, rightDisp, ci), EVAL_IGNORE_BORDER);
leftDisp.convertTo( tmp, CV_32FC1 ); leftDisp = tmp; tmp.release();
rightDisp.convertTo( tmp, CV_32FC1 ); rightDisp = tmp; tmp.release();
leftDisp.convertTo( tmp, CV_32FC1 );
leftDisp = tmp;
tmp.release();
rightDisp.convertTo( tmp, CV_32FC1 );
rightDisp = tmp;
tmp.release();
int tempCode = processStereoMatchingResults( resFS, ci, isWrite,
leftImg, rightImg, trueLeftDisp, trueRightDisp, leftDisp, rightDisp, QualityEvalParams(ignBorder));
@@ -531,7 +546,8 @@ int CV_StereoMatchingTest::processStereoMatchingResults( FileStorage& fs, int ca
// rightDisp is not used in current test virsion
int code = cvtest::TS::OK;
assert( fs.isOpened() );
assert( trueLeftDisp.type() == CV_32FC1 && trueRightDisp.type() == CV_32FC1 );
assert( trueLeftDisp.type() == CV_32FC1 );
assert( trueRightDisp.empty() || trueRightDisp.type() == CV_32FC1 );
assert( leftDisp.type() == CV_32FC1 && rightDisp.type() == CV_32FC1 );
// get masks for unknown ground truth disparity values

View File

@@ -7,7 +7,7 @@ Face Recognition with OpenCV
Introduction
============
`OpenCV (Open Source Computer Vision) <http://opencv.willowgarage.com>`_ is a popular computer vision library started by `Intel <http://www.intel.com>`_ in 1999. The cross-platform library sets its focus on real-time image processing and includes patent-free implementations of the latest computer vision algorithms. In 2008 `Willow Garage <http://www.willowgarage.com>`_ took over support and OpenCV 2.3.1 now comes with a programming interface to C, C++, `Python <http://www.python.org>`_ and `Android <http://www.android.com>`_. OpenCV is released under a BSD license so it is used in academic projects and commercial products alike.
`OpenCV (Open Source Computer Vision) <http://opencv.org>`_ is a popular computer vision library started by `Intel <http://www.intel.com>`_ in 1999. The cross-platform library sets its focus on real-time image processing and includes patent-free implementations of the latest computer vision algorithms. In 2008 `Willow Garage <http://www.willowgarage.com>`_ took over support and OpenCV 2.3.1 now comes with a programming interface to C, C++, `Python <http://www.python.org>`_ and `Android <http://www.android.com>`_. OpenCV is released under a BSD license so it is used in academic projects and commercial products alike.
OpenCV 2.4 now comes with the very new :ocv:class:`FaceRecognizer` class for face recognition, so you can start experimenting with face recognition right away. This document is the guide I've wished for, when I was working myself into face recognition. It shows you how to perform face recognition with :ocv:class:`FaceRecognizer` in OpenCV (with full source code listings) and gives you an introduction into the algorithms behind. I'll also show how to create the visualizations you can find in many publications, because a lot of people asked for.

View File

@@ -6,7 +6,7 @@ project(facerec_cpp_samples)
#SET(OpenCV_DIR /path/to/your/opencv/installation)
# packages
find_package(OpenCV REQUIRED) # http://opencv.willowgarage.com
find_package(OpenCV REQUIRED) # http://opencv.org
# probably you should loop through the sample files here
add_executable(facerec_demo facerec_demo.cpp)

View File

@@ -45,4 +45,4 @@
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/contrib.hpp"
#include "opencv2/contrib.hpp"

View File

@@ -1106,7 +1106,7 @@ void LevMarqSparse::bundleAdjust( std::vector<Point3d>& points, //positions of p
Mat rot_vec = levmarP.rowRange(i*num_cam_param, i*num_cam_param+3);
Rodrigues( rot_vec, R[i] );
//translation
T[i] = levmarP.rowRange(i*num_cam_param + 3, i*num_cam_param+6);
levmarP.rowRange(i*num_cam_param + 3, i*num_cam_param+6).copyTo(T[i]);
//intrinsic camera matrix
double* intr_data = (double*)cameraMatrix[i].data;

View File

@@ -380,6 +380,7 @@ void CvFuzzyMeanShiftTracker::SearchWindow::initDepthValues(IplImage *maskImage,
{
if (*depthData)
{
d = *depthData;
m1 += d;
if (d < mind)
mind = d;

View File

@@ -4,7 +4,7 @@ Introduction
.. highlight:: cpp
OpenCV (Open Source Computer Vision Library: http://opencv.willowgarage.com/wiki/) is an open-source BSD-licensed library that includes several hundreds of computer vision algorithms. The document describes the so-called OpenCV 2.x API, which is essentially a C++ API, as opposite to the C-based OpenCV 1.x API. The latter is described in opencv1x.pdf.
OpenCV (Open Source Computer Vision Library: http://opencv.org) is an open-source BSD-licensed library that includes several hundreds of computer vision algorithms. The document describes the so-called OpenCV 2.x API, which is essentially a C++ API, as opposite to the C-based OpenCV 1.x API. The latter is described in opencv1x.pdf.
OpenCV has a modular structure, which means that the package includes several shared or static libraries. The following modules are available:

View File

@@ -45,4 +45,4 @@
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/core.hpp"
#include "opencv2/core.hpp"

View File

@@ -360,6 +360,8 @@ CV_INLINE int cvRound( double value )
fistp t;
}
return t;
#elif defined _MSC_VER && defined _M_ARM && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND(value);
#elif defined HAVE_LRINT || defined CV_ICC || defined __GNUC__
# ifdef HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND(value);
@@ -367,8 +369,12 @@ CV_INLINE int cvRound( double value )
return (int)lrint(value);
# endif
#else
// while this is not IEEE754-compliant rounding, it's usually a good enough approximation
return (int)(value + (value >= 0 ? 0.5 : -0.5));
double intpart, fractpart;
fractpart = modf(value, &intpart);
if ((fabs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0))
return (int)(value + (value >= 0 ? 0.5 : -0.5));
else
return (int)intpart;
#endif
}

View File

@@ -1704,6 +1704,7 @@ public:
SparseMatConstIterator_();
//! the full constructor setting the iterator to the first sparse matrix element
SparseMatConstIterator_(const SparseMat_<_Tp>* _m);
SparseMatConstIterator_(const SparseMat* _m);
//! the copy constructor
SparseMatConstIterator_(const SparseMatConstIterator_& it);
@@ -1740,6 +1741,7 @@ public:
SparseMatIterator_();
//! the full constructor setting the iterator to the first sparse matrix element
SparseMatIterator_(SparseMat_<_Tp>* _m);
SparseMatIterator_(SparseMat* _m);
//! the copy constructor
SparseMatIterator_(const SparseMatIterator_& it);

View File

@@ -2587,6 +2587,13 @@ SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m)
: SparseMatConstIterator(_m)
{}
template<typename _Tp> inline
SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m)
: SparseMatConstIterator(_m)
{
CV_Assert( _m->type() == DataType<_Tp>::type );
}
template<typename _Tp> inline
SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it)
: SparseMatConstIterator(it)
@@ -2634,6 +2641,11 @@ SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m)
: SparseMatConstIterator_<_Tp>(_m)
{}
template<typename _Tp> inline
SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m)
: SparseMatConstIterator_<_Tp>(_m)
{}
template<typename _Tp> inline
SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it)
: SparseMatConstIterator_<_Tp>(it)

View File

@@ -577,10 +577,10 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
continue;
p *= 2;
double beta = a - b, gamma = hypot((double)p, beta), delta;
double beta = a - b, gamma = hypot((double)p, beta);
if( beta < 0 )
{
delta = (gamma - beta)*0.5;
double delta = (gamma - beta)*0.5;
s = (_Tp)std::sqrt(delta/gamma);
c = (_Tp)(p/(gamma*s*2));
}
@@ -588,36 +588,18 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
{
c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
s = (_Tp)(p/(gamma*c*2));
delta = p*p*0.5/(gamma + beta);
}
W[i] += delta;
W[j] -= delta;
if( iter % 2 != 0 && W[i] > 0 && W[j] > 0 )
a = b = 0;
for( k = 0; k < m; k++ )
{
k = vblas.givens(Ai, Aj, m, c, s);
_Tp t0 = c*Ai[k] + s*Aj[k];
_Tp t1 = -s*Ai[k] + c*Aj[k];
Ai[k] = t0; Aj[k] = t1;
for( ; k < m; k++ )
{
_Tp t0 = c*Ai[k] + s*Aj[k];
_Tp t1 = -s*Ai[k] + c*Aj[k];
Ai[k] = t0; Aj[k] = t1;
}
}
else
{
a = b = 0;
for( k = 0; k < m; k++ )
{
_Tp t0 = c*Ai[k] + s*Aj[k];
_Tp t1 = -s*Ai[k] + c*Aj[k];
Ai[k] = t0; Aj[k] = t1;
a += (double)t0*t0; b += (double)t1*t1;
}
W[i] = a; W[j] = b;
a += (double)t0*t0; b += (double)t1*t1;
}
W[i] = a; W[j] = b;
changed = true;

View File

@@ -324,7 +324,7 @@ void MatOp::augAssignXor(const MatExpr& expr, Mat& m) const
{
Mat temp;
expr.op->assign(expr, temp);
m /= temp;
m ^= temp;
}

View File

@@ -183,7 +183,7 @@ static void finalizeHdr(Mat& m)
void Mat::create(int d, const int* _sizes, int _type)
{
int i;
CV_Assert(0 <= d && _sizes && d <= CV_MAX_DIM && _sizes);
CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
_type = CV_MAT_TYPE(_type);
if( data && (d == dims || (d == 1 && dims <= 2)) && _type == type() )

View File

@@ -1551,3 +1551,16 @@ TEST(Core_Add, AddToColumnWhen4Rows)
ASSERT_EQ(0, countNonZero(m1 - m2));
}
TEST(Core_round, CvRound)
{
ASSERT_EQ(2, cvRound(2.0));
ASSERT_EQ(2, cvRound(2.1));
ASSERT_EQ(-2, cvRound(-2.1));
ASSERT_EQ(3, cvRound(2.8));
ASSERT_EQ(-3, cvRound(-2.8));
ASSERT_EQ(2, cvRound(2.5));
ASSERT_EQ(4, cvRound(3.5));
ASSERT_EQ(-2, cvRound(-2.5));
ASSERT_EQ(-4, cvRound(-3.5));
}

View File

@@ -48,7 +48,7 @@ Maximally stable extremal region extractor. ::
};
The class encapsulates all the parameters of the MSER extraction algorithm (see
http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://opencv.willowgarage.com/wiki/documentation/cpp/features2d/MSER for useful comments and parameters description.
http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://code.opencv.org/projects/opencv/wiki/MSER for useful comments and parameters description.
ORB

View File

@@ -69,7 +69,7 @@ struct KeypointResponseGreater
void KeyPointsFilter::retainBest(std::vector<KeyPoint>& keypoints, int n_points)
{
//this is only necessary if the keypoints size is greater than the number of desired points.
if( n_points > 0 && keypoints.size() > (size_t)n_points )
if( n_points >= 0 && keypoints.size() > (size_t)n_points )
{
if (n_points==0)
{

View File

@@ -421,7 +421,6 @@ struct Hamming
ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
{
ResultType result = 0;
#ifdef __GNUC__
#ifdef __ARM_NEON__
{
uint32x4_t bits = vmovq_n_u32(0);
@@ -438,7 +437,7 @@ struct Hamming
result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
}
#else
#elif __GNUC__
{
//for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
typedef unsigned long long pop_t;
@@ -458,8 +457,8 @@ struct Hamming
result += __builtin_popcountll(a_final ^ b_final);
}
}
#endif //NEON
#else
#else // NO NEON and NOT GNUC
typedef unsigned long long pop_t;
HammingLUT lut;
result = lut(reinterpret_cast<const unsigned char*> (a),
reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));

View File

@@ -29,8 +29,6 @@ if(HAVE_CUDA)
source_group("Src\\NVidia" FILES ${ncv_files})
ocv_include_directories("src/nvidia" "src/nvidia/core" "src/nvidia/NPP_staging" ${CUDA_INCLUDE_DIRS})
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter /wd4211 /wd4201 /wd4100 /wd4505 /wd4408)
string(REPLACE "-Wsign-promo" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
#set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;")
if(MSVC)
if(NOT ENABLE_NOISY_WARNINGS)

View File

@@ -1007,7 +1007,7 @@ PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT
PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
PERF_TEST_P(Video, DISABLED_Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
{
declare.time(20);
@@ -1044,7 +1044,7 @@ PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video
#if defined(HAVE_NVCUVID) && defined(WIN32)
PERF_TEST_P(Video, Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
{
declare.time(30);

View File

@@ -1793,10 +1793,10 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace arithm
{
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatEq_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void cmpMatNe_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void cmpMatLt_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void cmpMatLe_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -1820,7 +1820,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{cmpMatEq<double> , cmpMatNe<double> , cmpMatLt<double> , cmpMatLe<double> }
};
typedef void (*func_v4_t)(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
typedef void (*func_v4_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
static const func_v4_t funcs_v4[] =
{
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4

View File

@@ -129,6 +129,17 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuM
gpu::multiply(u1s[s], Scalar::all(0.5), u1s[s]);
gpu::multiply(u2s[s], Scalar::all(0.5), u2s[s]);
}
else
{
u1s[s].create(I0s[s].size(), CV_32FC1);
u2s[s].create(I0s[s].size(), CV_32FC1);
}
}
if (!useInitialFlow)
{
u1s[nscales-1].setTo(Scalar::all(0));
u2s[nscales-1].setTo(Scalar::all(0));
}
// pyramidal structure for computing the optical flow
@@ -173,18 +184,9 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
CV_DbgAssert( I1.size() == I0.size() );
CV_DbgAssert( I1.type() == I0.type() );
CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
CV_DbgAssert( u1.size() == I0.size() );
CV_DbgAssert( u2.size() == u1.size() );
if (u1.empty())
{
u1.create(I0.size(), CV_32FC1);
u1.setTo(Scalar::all(0));
u2.create(I0.size(), CV_32FC1);
u2.setTo(Scalar::all(0));
}
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
centeredGradient(I1, I1x, I1y);

View File

@@ -95,7 +95,7 @@ if(HAVE_QT)
if(${_have_flag})
set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
endif()
elseif(WIN32)
elseif(HAVE_WIN32UI)
list(APPEND highgui_srcs src/window_w32.cpp)
elseif(HAVE_GTK)
list(APPEND highgui_srcs src/window_gtk.cpp)
@@ -111,9 +111,21 @@ elseif(APPLE)
endif()
endif()
if(WIN32)
list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp src/cap_dshow.cpp)
endif(WIN32)
if(WIN32 AND NOT ARM)
list(APPEND highgui_srcs src/cap_cmu.cpp)
endif()
if (WIN32 AND HAVE_DSHOW)
list(APPEND highgui_srcs src/cap_dshow.cpp)
endif()
if (WIN32 AND HAVE_MSMF)
list(APPEND highgui_srcs src/cap_msmf.cpp)
endif()
if (WIN32 AND HAVE_VFW)
list(APPEND highgui_srcs src/cap_vfw.cpp)
endif()
if(HAVE_XINE)
list(APPEND highgui_srcs src/cap_xine.cpp)

View File

@@ -298,6 +298,7 @@ enum
CV_CAP_UNICAP =600, // Unicap drivers
CV_CAP_DSHOW =700, // DirectShow (via videoInput)
CV_CAP_MSMF =1400, // Microsoft Media Foundation (via videoInput)
CV_CAP_PVAPI =800, // PvAPI, Prosilica GigE SDK

View File

@@ -20,9 +20,9 @@
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
/*defined(HAVE_OPENNI) || too specialized */ \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
defined(HAVE_VFW)
/*defined(HAVE_OPENNI) too specialized */ \
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
#else
@@ -34,7 +34,7 @@
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
defined(HAVE_VFW)
# define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0

View File

@@ -114,7 +114,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
{
int domains[] =
{
#ifdef HAVE_VIDEOINPUT
#ifdef HAVE_DSHOW
CV_CAP_DSHOW,
#endif
#if 1
@@ -168,7 +168,8 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
// try every possibly installed camera API
for (int i = 0; domains[i] >= 0; i++)
{
#if defined(HAVE_VIDEOINPUT) || \
#if defined(HAVE_DSHOW) || \
defined(HAVE_MSMF) || \
defined(HAVE_TYZX) || \
defined(HAVE_VFW) || \
defined(HAVE_LIBV4L) || \
@@ -195,11 +196,18 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
switch (domains[i])
{
#ifdef HAVE_VIDEOINPUT
#ifdef HAVE_MSMF
case CV_CAP_MSMF:
capture = cvCreateCameraCapture_MSMF (index);
if (capture)
return capture;
break;
#endif
#ifdef HAVE_DSHOW
case CV_CAP_DSHOW:
capture = cvCreateCameraCapture_DShow (index);
if (capture)
return capture;
capture = cvCreateCameraCapture_DShow (index);
if (capture)
return capture;
break;
#endif

View File

@@ -41,7 +41,7 @@
#include "precomp.hpp"
#if (defined WIN32 || defined _WIN32) && defined HAVE_VIDEOINPUT
#if (defined WIN32 || defined _WIN32) && defined HAVE_DSHOW
/*
DirectShow-based Video Capturing module is based on
@@ -3098,6 +3098,7 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter *
return hr;
}
/********************* Capturing video from camera via DirectShow *********************/
class CvCaptureCAM_DShow : public CvCapture

View File

@@ -209,7 +209,7 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename)
if( result->open( filename ))
return result;
delete result;
#if defined WIN32 || defined _WIN32
#ifdef HAVE_VFW
return cvCreateFileCapture_VFW(filename);
#else
return 0;
@@ -263,9 +263,9 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc
if( result->open( filename, fourcc, fps, frameSize, isColor != 0 ))
return result;
delete result;
#if defined WIN32 || defined _WIN32
return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor);
#else
#ifdef HAVE_VFW
return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor);
#else
return 0;
#endif
}

View File

@@ -153,6 +153,14 @@ extern "C" {
#define AVERROR_EOF (-MKTAG( 'E','O','F',' '))
#endif
#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54,25,0)
# define CV_CODEC_ID AVCodecID
# define CV_CODEC(name) AV_##name
#else
# define CV_CODEC_ID CodecID
# define CV_CODEC(name) name
#endif
static int get_number_of_cpus(void)
{
#if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(52, 111, 0)
@@ -1026,7 +1034,7 @@ static const char * icvFFMPEGErrStr(int err)
/* function internal to FFMPEG (libavformat/riff.c) to lookup codec id by fourcc tag*/
extern "C" {
enum CodecID codec_get_bmp_id(unsigned int tag);
enum CV_CODEC_ID codec_get_bmp_id(unsigned int tag);
}
void CvVideoWriter_FFMPEG::init()
@@ -1078,7 +1086,7 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo
/* add a video output stream to the container */
static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc,
CodecID codec_id,
CV_CODEC_ID codec_id,
int w, int h, int bitrate,
double fps, int pixel_format)
{
@@ -1110,7 +1118,7 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc,
c->codec_id = oc->oformat->video_codec;
#endif
if(codec_id != CODEC_ID_NONE){
if(codec_id != CV_CODEC(CODEC_ID_NONE)){
c->codec_id = codec_id;
}
@@ -1179,10 +1187,10 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc,
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = (PixelFormat) pixel_format;
if (c->codec_id == CODEC_ID_MPEG2VIDEO) {
if (c->codec_id == CV_CODEC(CODEC_ID_MPEG2VIDEO)) {
c->max_b_frames = 2;
}
if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3){
if (c->codec_id == CV_CODEC(CODEC_ID_MPEG1VIDEO) || c->codec_id == CV_CODEC(CODEC_ID_MSMPEG4V3)){
/* needed to avoid using macroblocks in which some coeffs overflow
this doesnt happen with normal video, it just happens here as the
motion of the chroma plane doesnt match the luma plane */
@@ -1290,7 +1298,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int
#if LIBAVFORMAT_BUILD < 5231
// It is not needed in the latest versions of the ffmpeg
if( c->codec_id == CODEC_ID_RAWVIDEO && origin != 1 )
if( c->codec_id == CV_CODEC(CODEC_ID_RAWVIDEO) && origin != 1 )
{
if( !temp_image.data )
{
@@ -1477,7 +1485,7 @@ void CvVideoWriter_FFMPEG::close()
bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
double fps, int width, int height, bool is_color )
{
CodecID codec_id = CODEC_ID_NONE;
CV_CODEC_ID codec_id = CV_CODEC(CODEC_ID_NONE);
int err, codec_pix_fmt;
double bitrate_scale = 1;
@@ -1518,11 +1526,11 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
/* Lookup codec_id for given fourcc */
#if LIBAVCODEC_VERSION_INT<((51<<16)+(49<<8)+0)
if( (codec_id = codec_get_bmp_id( fourcc )) == CODEC_ID_NONE )
if( (codec_id = codec_get_bmp_id( fourcc )) == CV_CODEC(CODEC_ID_NONE) )
return false;
#else
const struct AVCodecTag * tags[] = { codec_bmp_tags, NULL};
if( (codec_id = av_codec_get_id(tags, fourcc)) == CODEC_ID_NONE )
if( (codec_id = av_codec_get_id(tags, fourcc)) == CV_CODEC(CODEC_ID_NONE) )
return false;
#endif
@@ -1544,20 +1552,20 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
// set a few optimal pixel formats for lossless codecs of interest..
switch (codec_id) {
#if LIBAVCODEC_VERSION_INT>((50<<16)+(1<<8)+0)
case CODEC_ID_JPEGLS:
case CV_CODEC(CODEC_ID_JPEGLS):
// BGR24 or GRAY8 depending on is_color...
codec_pix_fmt = input_pix_fmt;
break;
#endif
case CODEC_ID_HUFFYUV:
case CV_CODEC(CODEC_ID_HUFFYUV):
codec_pix_fmt = PIX_FMT_YUV422P;
break;
case CODEC_ID_MJPEG:
case CODEC_ID_LJPEG:
case CV_CODEC(CODEC_ID_MJPEG):
case CV_CODEC(CODEC_ID_LJPEG):
codec_pix_fmt = PIX_FMT_YUVJ420P;
bitrate_scale = 3;
break;
case CODEC_ID_RAWVIDEO:
case CV_CODEC(CODEC_ID_RAWVIDEO):
codec_pix_fmt = input_pix_fmt == PIX_FMT_GRAY8 ||
input_pix_fmt == PIX_FMT_GRAY16LE ||
input_pix_fmt == PIX_FMT_GRAY16BE ? input_pix_fmt : PIX_FMT_YUV420P;
@@ -1788,7 +1796,7 @@ struct OutputMediaStream_FFMPEG
void write(unsigned char* data, int size, int keyFrame);
// add a video output stream to the container
static AVStream* addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format);
static AVStream* addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format);
AVOutputFormat* fmt_;
AVFormatContext* oc_;
@@ -1835,7 +1843,7 @@ void OutputMediaStream_FFMPEG::close()
}
}
AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format)
AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format)
{
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 10, 0)
AVStream* st = avformat_new_stream(oc, 0);
@@ -1915,10 +1923,10 @@ AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID
c->gop_size = 12; // emit one intra frame every twelve frames at most
c->pix_fmt = pixel_format;
if (c->codec_id == CODEC_ID_MPEG2VIDEO)
if (c->codec_id == CV_CODEC(CODEC_ID_MPEG2VIDEO))
c->max_b_frames = 2;
if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3)
if (c->codec_id == CV_CODEC(CODEC_ID_MPEG1VIDEO) || c->codec_id == CV_CODEC(CODEC_ID_MSMPEG4V3))
{
// needed to avoid using macroblocks in which some coeffs overflow
// this doesnt happen with normal video, it just happens here as the
@@ -1955,7 +1963,7 @@ bool OutputMediaStream_FFMPEG::open(const char* fileName, int width, int height,
if (!fmt_)
return false;
CodecID codec_id = CODEC_ID_H264;
CV_CODEC_ID codec_id = CV_CODEC(CODEC_ID_H264);
// alloc memory for context
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
@@ -2156,23 +2164,23 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma
switch (enc->codec_id)
{
case CODEC_ID_MPEG1VIDEO:
case CV_CODEC(CODEC_ID_MPEG1VIDEO):
*codec = ::VideoCodec_MPEG1;
break;
case CODEC_ID_MPEG2VIDEO:
case CV_CODEC(CODEC_ID_MPEG2VIDEO):
*codec = ::VideoCodec_MPEG2;
break;
case CODEC_ID_MPEG4:
case CV_CODEC(CODEC_ID_MPEG4):
*codec = ::VideoCodec_MPEG4;
break;
case CODEC_ID_VC1:
case CV_CODEC(CODEC_ID_VC1):
*codec = ::VideoCodec_VC1;
break;
case CODEC_ID_H264:
case CV_CODEC(CODEC_ID_H264):
*codec = ::VideoCodec_H264;
break;

View File

@@ -1714,6 +1714,7 @@ static void icvCloseCAM_V4L( CvCaptureCAM_V4L* capture ){
#endif
free(capture->deviceName);
capture->deviceName = NULL;
//v4l2_free_ranges(capture);
//cvFree((void **)capture);
}

File diff suppressed because it is too large Load Diff

View File

@@ -406,7 +406,7 @@ bool CvCaptureCAM_VFW::open( int wIndex )
fourcc = (DWORD)-1;
memset( &caps, 0, sizeof(caps));
capDriverGetCaps( hWndC, &caps, sizeof(&caps));
capDriverGetCaps( hWndC, &caps, sizeof(caps));
::MoveWindow( hWndC, 0, 0, 320, 240, TRUE );
capSetUserData( hWndC, (size_t)this );
capSetCallbackOnFrame( hWndC, frameCallback );

View File

@@ -103,14 +103,6 @@ struct CvVideoWriter
virtual bool writeFrame(const IplImage*) { return false; }
};
#if defined WIN32 || defined _WIN32
#define HAVE_VFW 1
/* uncomment to enable CMUCamera1394 fireware camera module */
//#define HAVE_CMU1394 1
#endif
CvCapture * cvCreateCameraCapture_V4L( int index );
CvCapture * cvCreateCameraCapture_DC1394( int index );
CvCapture * cvCreateCameraCapture_DC1394_2( int index );
@@ -126,6 +118,7 @@ CvVideoWriter* cvCreateVideoWriter_Win32( const char* filename, int fourcc,
CvVideoWriter* cvCreateVideoWriter_VFW( const char* filename, int fourcc,
double fps, CvSize frameSize, int is_color );
CvCapture* cvCreateCameraCapture_DShow( int index );
CvCapture* cvCreateCameraCapture_MSMF( int index );
CvCapture* cvCreateCameraCapture_OpenNI( int index );
CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
CvCapture* cvCreateCameraCapture_Android( int index );

View File

@@ -57,7 +57,7 @@ CV_IMPL void cvSetWindowProperty(const char* name, int prop_id, double prop_valu
#if defined (HAVE_QT)
cvSetModeWindow_QT(name,prop_value);
#elif defined WIN32 || defined _WIN32
#elif defined(HAVE_WIN32UI)
cvSetModeWindow_W32(name,prop_value);
#elif defined (HAVE_GTK)
cvSetModeWindow_GTK(name,prop_value);
@@ -96,7 +96,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
#if defined (HAVE_QT)
return cvGetModeWindow_QT(name);
#elif defined WIN32 || defined _WIN32
#elif defined(HAVE_WIN32UI)
return cvGetModeWindow_W32(name);
#elif defined (HAVE_GTK)
return cvGetModeWindow_GTK(name);
@@ -113,7 +113,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
#if defined (HAVE_QT)
return cvGetPropWindow_QT(name);
#elif defined WIN32 || defined _WIN32
#elif defined(HAVE_WIN32UI)
return cvGetPropWindowAutoSize_W32(name);
#elif defined (HAVE_GTK)
return cvGetPropWindowAutoSize_GTK(name);
@@ -126,7 +126,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
#if defined (HAVE_QT)
return cvGetRatioWindow_QT(name);
#elif defined WIN32 || defined _WIN32
#elif defined(HAVE_WIN32UI)
return cvGetRatioWindow_W32(name);
#elif defined (HAVE_GTK)
return cvGetRatioWindow_GTK(name);
@@ -139,7 +139,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
#if defined (HAVE_QT)
return cvGetOpenGlProp_QT(name);
#elif defined WIN32 || defined _WIN32
#elif defined(HAVE_WIN32UI)
return cvGetOpenGlProp_W32(name);
#elif defined (HAVE_GTK)
return cvGetOpenGlProp_GTK(name);
@@ -440,11 +440,11 @@ int cv::createButton(const String&, ButtonCallback, void*, int , bool )
#endif
#if defined WIN32 || defined _WIN32 // see window_w32.cpp
#if defined(HAVE_WIN32UI) // see window_w32.cpp
#elif defined (HAVE_GTK) // see window_gtk.cpp
#elif defined (HAVE_COCOA) // see window_carbon.cpp
#elif defined (HAVE_COCOA) // see window_carbon.cpp
#elif defined (HAVE_CARBON)
#elif defined (HAVE_QT) //YV see window_QT.cpp
#elif defined (HAVE_QT) //YV see window_QT.cpp
#else

View File

@@ -176,7 +176,7 @@ TEST(Highgui_Video, ffmpeg_image) { CV_FFmpegReadImageTest test; test.safe_run()
#endif
#if defined(HAVE_FFMPEG) || defined(WIN32) || defined(_WIN32)
#if defined(HAVE_FFMPEG)
//////////////////////////////// Parallel VideoWriters and VideoCaptures ////////////////////////////////////

View File

@@ -43,7 +43,7 @@
#include "test_precomp.hpp"
#include "opencv2/highgui.hpp"
#if defined HAVE_GTK || defined HAVE_QT || defined WIN32 || defined _WIN32 || defined HAVE_CARBON || defined HAVE_COCOA
#if defined HAVE_GTK || defined HAVE_QT || defined HAVE_WIN32UI || defined HAVE_CARBON || defined HAVE_COCOA
using namespace cv;
using namespace std;

View File

@@ -16,7 +16,7 @@
#include "opencv2/core/private.hpp"
#if defined(HAVE_VIDEOINPUT) || \
#if defined(HAVE_DSHOW) || \
defined(HAVE_TYZX) || \
defined(HAVE_VFW) || \
defined(HAVE_LIBV4L) || \
@@ -32,7 +32,7 @@
defined(HAVE_OPENNI) || \
defined(HAVE_XIMEA) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_GIGE_API) || \
defined(HAVE_GIGE_API) || \
(0)
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
# define BUILD_WITH_CAMERA_SUPPORT 1
@@ -45,9 +45,7 @@
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
/*defined(HAVE_OPENNI) || too specialized */ \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
defined(HAVE_FFMPEG)
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
@@ -57,8 +55,7 @@
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
defined(HAVE_FFMPEG)
# define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0

View File

@@ -1931,7 +1931,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::resize(src, dst, inv_scale_x, inv_scale_y, interpolation))
if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
return;
#endif
@@ -3858,7 +3858,7 @@ cv2DRotationMatrix( CvPoint2D32f center, double angle,
double scale, CvMat* matrix )
{
cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
CV_Assert( M.size() == M.size() );
CV_Assert( M.size() == M0.size() );
M.convertTo(M0, M0.type());
return matrix;
}
@@ -3871,7 +3871,7 @@ cvGetPerspectiveTransform( const CvPoint2D32f* src,
{
cv::Mat M0 = cv::cvarrToMat(matrix),
M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
CV_Assert( M.size() == M.size() );
CV_Assert( M.size() == M0.size() );
M.convertTo(M0, M0.type());
return matrix;
}

View File

@@ -283,7 +283,14 @@ if(BUILD_FAT_JAVA_LIB)
if(__extradeps)
list(REMOVE_ITEM __deps ${__extradeps})
endif()
target_link_libraries(${the_module} -Wl,-whole-archive ${__deps} -Wl,-no-whole-archive ${__extradeps} ${OPENCV_LINKER_LIBS})
if(APPLE)
foreach(_dep ${__deps})
target_link_libraries(${the_module} -Wl,-force_load "${_dep}")
endforeach()
else()
target_link_libraries(${the_module} -Wl,-whole-archive ${__deps} -Wl,-no-whole-archive)
endif()
target_link_libraries(${the_module} ${__extradeps} ${OPENCV_LINKER_LIBS})
else()
target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
endif()

View File

@@ -14,7 +14,7 @@ ocv_list_filterout(opencv_test_java_files ".svn")
# copy sources out from the build tree
set(opencv_test_java_file_deps "")
foreach(f ${opencv_test_java_files} ${ANDROID_MANIFEST_FILE})
foreach(f ${opencv_test_java_files} ${ANDROID_MANIFEST_FILE} ".classpath" ".project")
add_custom_command(
OUTPUT "${opencv_test_java_bin_dir}/${f}"
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${f}" "${opencv_test_java_bin_dir}/${f}"

View File

@@ -1,5 +1,6 @@
package org.opencv.test.features2d;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -204,7 +205,17 @@ public class BruteForceHammingDescriptorMatcherTest extends OpenCVTestCase {
}
public void testRadiusMatchMatListOfListOfDMatchFloat() {
fail("Not yet implemented");
Mat train = getTrainDescriptors();
Mat query = getQueryDescriptors();
ArrayList<MatOfDMatch> matches = new ArrayList<MatOfDMatch>();
matcher.radiusMatch(query, train, matches, 50.f);
assertEquals(matches.size(), 4);
assertTrue(matches.get(0).empty());
assertMatEqual(matches.get(1), new MatOfDMatch(truth[1]), EPS);
assertMatEqual(matches.get(2), new MatOfDMatch(truth[2]), EPS);
assertTrue(matches.get(3).empty());
}
public void testRadiusMatchMatListOfListOfDMatchFloatListOfMat() {

View File

@@ -54,6 +54,9 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
public CameraBridgeViewBase(Context context, int cameraId) {
super(context);
mCameraIndex = cameraId;
getHolder().addCallback(this);
mMaxWidth = MAX_UNSPECIFIED;
mMaxHeight = MAX_UNSPECIFIED;
}
public CameraBridgeViewBase(Context context, AttributeSet attrs) {

View File

@@ -60,7 +60,6 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
public JavaCameraView(Context context, AttributeSet attrs) {
super(context, attrs);
Log.d(TAG, "Java camera view ctor");
}
protected boolean initializeCamera(int width, int height) {
@@ -237,10 +236,8 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
}
public void onPreviewFrame(byte[] frame, Camera arg1) {
Log.i(TAG, "Preview Frame received. Need to create MAT and deliver it to clients");
Log.i(TAG, "Frame size is " + frame.length);
synchronized (this)
{
Log.d(TAG, "Preview Frame received. Frame size: " + frame.length);
synchronized (this) {
mFrameChain[1 - mChainIdx].put(0, 0, frame);
this.notify();
}
@@ -248,8 +245,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mCamera.addCallbackBuffer(mBuffer);
}
private class JavaCameraFrame implements CvCameraViewFrame
{
private class JavaCameraFrame implements CvCameraViewFrame {
public Mat gray() {
return mYuvFrameData.submat(0, mHeight, 0, mWidth);
}

View File

@@ -22,6 +22,12 @@ public class OpenCVLoader
*/
public static final String OPENCV_VERSION_2_4_4 = "2.4.4";
/**
* OpenCV Library version 2.4.5.
*/
public static final String OPENCV_VERSION_2_4_5 = "2.4.5";
/**
* Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
* @return Returns true is initialization of OpenCV was successful.

View File

@@ -16,8 +16,8 @@ public class MatOfDMatch extends Mat {
protected MatOfDMatch(long addr) {
super(addr);
if(checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat: " + toString());
//FIXME: do we need release() here?
}
@@ -27,8 +27,8 @@ public class MatOfDMatch extends Mat {
public MatOfDMatch(Mat m) {
super(m, Range.all());
if(checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat: " + toString());
//FIXME: do we need release() here?
}

View File

@@ -429,10 +429,11 @@ void CvBlobTrackerAuto1::Process(IplImage* pImg, IplImage* pMask)
for(i=0; i<NewBlobList.GetBlobNum(); ++i)
{
CvBlob* pBN = NewBlobList.GetBlob(i);
pBN->ID = m_NextBlobID;
if(pBN && pBN->w >= CV_BLOB_MINW && pBN->h >= CV_BLOB_MINH)
{
pBN->ID = m_NextBlobID;
CvBlob* pB = m_pBT->AddBlob(pBN, pImg, pmask );
if(pB)
{

View File

@@ -235,7 +235,7 @@ void CvCalibFilter::SetCameraCount( int count )
cvReleaseMat( &rectMap[i][1] );
}
memset( latestCounts, 0, sizeof(latestPoints) );
memset( latestCounts, 0, sizeof(latestCounts) );
maxPoints = 0;
cameraCount = count;
}

View File

@@ -2115,7 +2115,7 @@ CV_IMPL IplImage* icvCreateIsometricImage( IplImage* src, IplImage* dst,
if( !dst || dst->depth != desired_depth ||
dst->nChannels != desired_num_channels ||
dst_size.width != src_size.width ||
dst_size.height != dst_size.height )
dst_size.height != src_size.height )
{
cvReleaseImage( &dst );
dst = cvCreateImage( src_size, desired_depth, desired_num_channels );

View File

@@ -58,9 +58,8 @@ namespace
IMPLEMENT_PARAM_CLASS(SURF_Upright, bool)
}
PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
PARAM_TEST_CASE(SURF, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
{
cv::gpu::DeviceInfo devInfo;
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
@@ -69,14 +68,11 @@ PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves,
virtual void SetUp()
{
devInfo = GET_PARAM(0);
hessianThreshold = GET_PARAM(1);
nOctaves = GET_PARAM(2);
nOctaveLayers = GET_PARAM(3);
extended = GET_PARAM(4);
upright = GET_PARAM(5);
cv::gpu::setDevice(devInfo.deviceID());
hessianThreshold = GET_PARAM(0);
nOctaves = GET_PARAM(1);
nOctaveLayers = GET_PARAM(2);
extended = GET_PARAM(3);
upright = GET_PARAM(4);
}
};
@@ -93,39 +89,24 @@ GPU_TEST_P(SURF, Detector)
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
EXPECT_GT(matchedRatio, 0.95);
}
GPU_TEST_P(SURF, Detector_Masked)
@@ -144,39 +125,24 @@ GPU_TEST_P(SURF, Detector_Masked)
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
EXPECT_GT(matchedRatio, 0.95);
}
GPU_TEST_P(SURF, Descriptor)
@@ -199,43 +165,26 @@ GPU_TEST_P(SURF, Descriptor)
surf_gold.extended = extended;
surf_gold.upright = upright;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.6);
}
EXPECT_GT(matchedRatio, 0.6);
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
ALL_DEVICES,
testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)),
testing::Values(SURF_Octaves(3), SURF_Octaves(4)),
testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)),
@@ -245,17 +194,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
//////////////////////////////////////////////////////
// VIBE
PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(VIBE, cv::Size, MatType, UseRoi)
{
};
GPU_TEST_P(VIBE, Accuracy)
{
const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(1);
const int type = GET_PARAM(2);
const bool useRoi = GET_PARAM(3);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const bool useRoi = GET_PARAM(2);
const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
@@ -278,7 +225,6 @@ GPU_TEST_P(VIBE, Accuracy)
}
INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
WHOLE_SUBMAT));

View File

@@ -1,73 +1,3 @@
#include "test_precomp.hpp"
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
int main(int argc, char **argv)
{
try
{
const char* keys =
"{ h | help ? | false | Print help}"
"{ i | info | false | Print information about system and exit }"
"{ d | device | -1 | Device on which tests will be executed (-1 means all devices) }"
;
CommandLineParser cmd(argc, (const char**)argv, keys);
if (cmd.get<bool>("help"))
{
cmd.printMessage();
return 0;
}
printCudaInfo();
if (cmd.get<bool>("info"))
{
return 0;
}
int device = cmd.get<int>("device");
if (device < 0)
{
DeviceManager::instance().loadAll();
std::cout << "Run tests on all supported devices \n" << std::endl;
}
else
{
DeviceManager::instance().load(device);
DeviceInfo info(device);
std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl;
}
TS::ptr()->init("cv");
InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return -1;
}
catch (...)
{
std::cerr << "Unknown error" << std::endl;
return -1;
}
return 0;
}
#else // HAVE_CUDA
CV_TEST_MAIN("cv")
#endif // HAVE_CUDA

View File

@@ -15,14 +15,16 @@
#include "opencv2/highgui.hpp"
#include "opencv2/nonfree.hpp"
#include "opencv2/ts/gpu_test.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_OCL
# include "opencv2/nonfree/ocl.hpp"
#endif
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
#include "opencv2/ts/gpu_test.hpp"
#include "opencv2/nonfree/gpu.hpp"
#ifdef HAVE_OPENCV_GPU
# include "opencv2/nonfree/gpu.hpp"
#endif
#endif

View File

@@ -109,17 +109,6 @@ static int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, co
return validCount;
}
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
#define IMPLEMENT_PARAM_CLASS(name, type) \
namespace { class name { \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) {*os << #name << "=" << testing::PrintToString(static_cast< type >(param));}}
IMPLEMENT_PARAM_CLASS(HessianThreshold, double)
IMPLEMENT_PARAM_CLASS(Octaves, int)
IMPLEMENT_PARAM_CLASS(OctaveLayers, int)

View File

@@ -3,5 +3,5 @@ if(NOT HAVE_OPENCL)
endif()
set(the_description "OpenCL-accelerated Computer Vision")
ocv_define_module(ocl opencv_core opencv_imgproc opencv_objdetect opencv_video)
ocv_define_module(ocl opencv_core opencv_imgproc opencv_objdetect opencv_video opencv_features2d)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)

View File

@@ -50,7 +50,6 @@
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/objdetect.hpp"
//#include "opencv2/features2d.hpp"
namespace cv
{
@@ -125,6 +124,9 @@ namespace cv
CV_EXPORTS void* getoclCommandQueue();
//explicit call clFinish. The global command queue will be used.
CV_EXPORTS void finish();
//this function enable ocl module to use customized cl_context and cl_command_queue
//getDevice also need to be called before this function
CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
@@ -1714,6 +1716,36 @@ namespace cv
private:
oclMat minSSD, leBuf, riBuf;
};
class CV_EXPORTS StereoBeliefPropagation
{
public:
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_ITERS = 5 };
enum { DEFAULT_LEVELS = 5 };
static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int msg_type = CV_16S);
StereoBeliefPropagation(int ndisp, int iters, int levels,
float max_data_term, float data_weight,
float max_disc_term, float disc_single_jump,
int msg_type = CV_32F);
void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
void operator()(const oclMat &data, oclMat &disparity);
int ndisp;
int iters;
int levels;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int msg_type;
private:
oclMat u, d, l, r, u2, d2, l2, r2;
std::vector<oclMat> datas;
oclMat out;
};
}
}
#if defined _MSC_VER && _MSC_VER >= 1200

View File

@@ -1,120 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
}
template <typename T> struct NearestInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
}
};
template <typename T> struct LinearInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
x -= 0.5f;
y -= 0.5f;
int x1 = cvFloor(x);
int y1 = cvFloor(y);
int x2 = x1 + 1;
int y2 = y1 + 1;
float res = 0;
res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
return cv::saturate_cast<T>(res);
}
};
template <typename T> struct CubicInterpolator
{
static float getValue(float p[4], float x)
{
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
}
static float getValue(float p[4][4], float x, float y)
{
float arr[4];
arr[0] = getValue(p[0], x);
arr[1] = getValue(p[1], x);
arr[2] = getValue(p[2], x);
arr[3] = getValue(p[3], x);
return getValue(arr, y);
}
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
int ix = cvRound(x);
int iy = cvRound(y);
float vals[4][4] =
{
{readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
};
return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
}
};
#endif // __OPENCV_TEST_INTERPOLATION_HPP__

View File

@@ -7,12 +7,13 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
@@ -21,12 +22,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -41,99 +42,118 @@
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace std;
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
void print_info()
int main(int argc, const char *argv[])
{
printf("\n");
#if defined _WIN32
# if defined _WIN64
puts("OS: Windows 64");
# else
puts("OS: Windows 32");
# endif
#elif defined linux
# if defined _LP64
puts("OS: Linux 64");
# else
puts("OS: Linux 32");
# endif
#elif defined __APPLE__
# if defined _LP64
puts("OS: Apple 64");
# else
puts("OS: Apple 32");
# endif
#endif
vector<ocl::Info> oclinfo;
int num_devices = getDevice(oclinfo);
if (num_devices < 1)
{
cerr << "no device found\n";
return -1;
}
int devidx = 0;
for (size_t i = 0; i < oclinfo.size(); i++)
{
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++)
{
printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str());
}
}
redirectError(cvErrorCallback);
}
std::string workdir;
int main(int argc, char **argv)
{
TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv);
const char *keys =
"{ h | false | print help message }"
"{ w | ../../../samples/c/| set working directory i.e. -w=C:\\}"
"{ t | gpu | set device type:i.e. -t=cpu or gpu}"
"{ p | 0 | set platform id i.e. -p=0}"
"{ d | 0 | set device id i.e. -d=0}";
"{ h help | false | print help message }"
"{ f filter | | filter for test }"
"{ w workdir | | set working directory }"
"{ l list | false | show all tests }"
"{ d device | 0 | device id }"
"{ i iters | 10 | iteration count }"
"{ m warmup | 1 | gpu warm up iteration count}"
"{ t xtop | 1.1 | xfactor top boundary}"
"{ b xbottom | 0.9 | xfactor bottom boundary}"
"{ v verify | false | only run gpu once to verify if problems occur}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<string>("h")=="true")
if (cmd.has("help"))
{
cout << "Avaible options besides goole test option:" << endl;
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
workdir = cmd.get<string>("w");
string type = cmd.get<string>("t");
unsigned int pid = cmd.get<unsigned int>("p");
int device = cmd.get<int>("d");
print_info();
// int flag = CVCL_DEVICE_TYPE_GPU;
// if(type == "cpu")
// {
// flag = CVCL_DEVICE_TYPE_CPU;
// }
std::vector<cv::ocl::Info> oclinfo;
int devnums = getDevice(oclinfo);
if(devnums <= device || device < 0)
int device = cmd.get<int>("device");
if (device < 0 || device >= num_devices)
{
std::cout << "device invalid\n";
cerr << "Invalid device ID" << endl;
return -1;
}
if(pid >= oclinfo.size())
if (cmd.get<bool>("verify"))
{
std::cout << "platform invalid\n";
return -1;
TestSystem::instance().setNumIters(1);
TestSystem::instance().setGPUWarmupIters(0);
TestSystem::instance().setCPUIters(0);
}
if(pid != 0 || device != 0)
devidx = 0;
for (size_t i = 0; i < oclinfo.size(); i++)
{
setDevice(oclinfo[pid], device);
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++)
{
if (device == devidx)
{
ocl::setDevice(oclinfo[i], (int)j);
TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]);
printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str());
goto END_DEV;
}
}
}
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
setBinpath(CLBINPATH);
return RUN_ALL_TESTS();
}
END_DEV:
#else // DON'T HAVE_OPENCL
string filter = cmd.get<string>("filter");
string workdir = cmd.get<string>("workdir");
bool list = cmd.has("list");
int iters = cmd.get<int>("iters");
int wu_iters = cmd.get<int>("warmup");
double x_top = cmd.get<double>("xtop");
double x_bottom = cmd.get<double>("xbottom");
TestSystem::instance().setTopThreshold(x_top);
TestSystem::instance().setBottomThreshold(x_bottom);
if (!filter.empty())
{
TestSystem::instance().setTestFilter(filter);
}
if (!workdir.empty())
{
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
{
workdir += '/';
}
TestSystem::instance().setWorkingDir(workdir);
}
if (list)
{
TestSystem::instance().setListMode(true);
}
TestSystem::instance().setNumIters(iters);
TestSystem::instance().setGPUWarmupIters(wu_iters);
TestSystem::instance().run();
int main()
{
printf("OpenCV was built without OpenCL support\n");
return 0;
}
#endif // HAVE_OPENCL
}

File diff suppressed because it is too large Load Diff

View File

@@ -44,79 +44,77 @@
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(Blend, MatType, int)
///////////// blend ////////////////////////
template <typename T>
void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
{
int type;
int channels;
std::vector<cv::ocl::Info> oclinfo;
result_gold.create(img1.size(), img1.type());
virtual void SetUp()
int cn = img1.channels();
for (int y = 0; y < img1.rows; ++y)
{
const float *weights1_row = weights1.ptr<float>(y);
const float *weights2_row = weights2.ptr<float>(y);
const T *img1_row = img1.ptr<T>(y);
const T *img2_row = img2.ptr<T>(y);
T *result_gold_row = result_gold.ptr<T>(y);
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH);
}
};
TEST_P(Blend, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
double totalgputick_all = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
{
t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2;
cv::Mat m;
gdst.download(m);
t1 = (double)cvGetTickCount() - t1;
if (j == 0)
for (int x = 0; x < img1.cols * cn; ++x)
{
continue;
float w1 = weights1_row[x / cn];
float w2 = weights2_row[x / cn];
result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
}
totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel;
};
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
}
TEST(blend)
{
Mat src1, src2, weights1, weights2, dst;
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1";
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(weights1, size, size, CV_32FC1, 0, 1);
gen(weights2, size, size, CV_32FC1, 0, 1);
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_ON;
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
WARMUP_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
}

View File

@@ -0,0 +1,150 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
//////////////////// BruteForceMatch /////////////////
TEST(BruteForceMatcher)
{
Mat trainIdx_cpu;
Mat distance_cpu;
Mat allDist_cpu;
Mat nMatches_cpu;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
// Init CPU matcher
int desc_len = 64;
BFMatcher matcher(NORM_L2);
Mat query;
gen(query, size, desc_len, CV_32F, 0, 1);
Mat train;
gen(train, size, desc_len, CV_32F, 0, 1);
// Output
vector< vector<DMatch> > matches(2);
// Init GPU matcher
ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat d_query(query);
ocl::oclMat d_train(train);
ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches;
SUBTEST << size << "; match";
matcher.match(query, train, matches[0]);
CPU_ON;
matcher.match(query, train, matches[0]);
CPU_OFF;
WARMUP_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.match(d_query, d_train, matches[0]);
GPU_FULL_OFF;
SUBTEST << size << "; knnMatch";
matcher.knnMatch(query, train, matches, 2);
CPU_ON;
matcher.knnMatch(query, train, matches, 2);
CPU_OFF;
WARMUP_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
WARMUP_OFF;
GPU_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.knnMatch(d_query, d_train, matches, 2);
GPU_FULL_OFF;
SUBTEST << size << "; radiusMatch";
float max_distance = 2.0f;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_ON;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF;
d_trainIdx.release();
WARMUP_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
GPU_FULL_OFF;
}
}

View File

@@ -42,112 +42,42 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////
// Canny1
extern std::string workdir;
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
///////////// Canny ////////////////////////
TEST(Canny)
{
int apperture_size;
bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo;
Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
virtual void SetUp()
if (img.empty())
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(Canny1, Performance)
{
cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
throw runtime_error("can't open aloeL.jpg");
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
Mat edges(img.size(), CV_8UC1);
}
CPU_ON;
Canny(img, edges, 50.0, 100.0);
CPU_OFF;
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
ocl::oclMat d_img(img);
ocl::oclMat d_edges;
ocl::CannyBuf d_buf;
WARMUP_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
WARMUP_OFF;
GPU_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
;
GPU_OFF;
#endif //Have opencl
GPU_FULL_ON;
d_img.upload(img);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
d_edges.download(edges);
GPU_FULL_OFF;
}

View File

@@ -0,0 +1,91 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////// cvtColor////////////////////////
TEST(cvtColor)
{
Mat src, dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC4};
std::string type_name[] = {"CV_8UC4"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
gen(src, size, size, all_type[j], 0, 256);
SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY";
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_ON;
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
WARMUP_OFF;
GPU_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
}

View File

@@ -15,8 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai fangfang@multicorewareinc.com
//
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -31,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -43,78 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
///////////////////////////////////////////////////////////////////////////////
/// ColumnSum
#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////
// ColumnSum
PARAM_TEST_CASE(ColumnSum)
///////////// columnSum////////////////////////
TEST(columnSum)
{
cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo;
Mat src, dst;
ocl::oclMat d_src, d_dst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
SUBTEST << size << 'x' << size << "; CV_32FC1";
gen(src, size, size, CV_32FC1, 0, 256);
CPU_ON;
dst.create(src.size(), src.type());
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
}
}
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
};
TEST_F(ColumnSum, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_src(src);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::columnSum(d_src, d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#endif
}

View File

@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfangbai, fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -42,85 +42,48 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
///////////// dft ////////////////////////
TEST(dft)
{
cv::Size dft_size;
bool dft_rows;
vector<cv::ocl::Info> info;
virtual void SetUp()
Mat src, dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_32FC1, CV_32FC2};
std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info);
}
};
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex";
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1));
cv::ocl::oclMat d_b;
dft(src, dst);
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
CPU_ON;
dft(src, dst);
CPU_OFF;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
d_src.upload(src);
t1 = (double)cvGetTickCount();//gpu start1
WARMUP_ON;
ocl::dft(d_src, d_dst, Size(size, size));
WARMUP_OFF;
cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
GPU_ON;
ocl::dft(d_src, d_dst, Size(size, size));
;
GPU_OFF;
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_b.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
GPU_FULL_ON;
d_src.upload(src);
ocl::dft(d_src, d_dst, Size(size, size));
d_dst.download(dst);
GPU_FULL_OFF;
}
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)),
// testing::Values(false, true)));
#endif // HAVE_CLAMDFFT
}

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,7 @@
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
@@ -41,73 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
///////////// gemm ////////////////////////
TEST(gemm)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
Mat src1, src2, src3, dst;
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
SUBTEST << size << 'x' << size;
cv::ocl::getDevice(info);
gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_ON;
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
WARMUP_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
};
TEST_P(Gemm, Performance)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
#endif
}

View File

@@ -10,12 +10,12 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,132 +42,97 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_OPENCL
///////////// Haar ////////////////////////
namespace cv
{
namespace ocl
{
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv;
extern std::string workdir;
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
Rect operator()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
class CascadeClassifier_GPU : public OclCascadeClassifier
{
//std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
double scale;
int index;
virtual void SetUp()
public:
void detectMultiScale(oclMat &image,
CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1,
int minNeighbors = 3, int flags = 0,
Size minSize = Size(),
Size maxSize = Size())
{
scale = 1.0;
index = 0;
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
}
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\");
(void)maxSize;
MemStorage storage(cvCreateMemStorage(0));
//CvMat img=image;
CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
faces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {};
TEST_F(Haar, FaceDetect)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << imgName << std::endl;
return ;
}
//int i = 0;
double t = 0;
vector<Rect> faces, oclfaces;
// const static Scalar colors[] = { CV_RGB(0, 0, 255),
// CV_RGB(0, 128, 255),
// CV_RGB(0, 255, 255),
// CV_RGB(0, 255, 0),
// CV_RGB(255, 128, 0),
// CV_RGB(255, 255, 0),
// CV_RGB(255, 0, 0),
// CV_RGB(255, 0, 255)
// } ;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
{
cpucascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
cv::ocl::oclMat image;
CvSeq *_objects=NULL;
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
{
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
//{
// Mat smallImgROI;
// Point center;
// Scalar color = colors[i%8];
// int radius;
// center.x = cvRound((r->x + r->width*0.5)*scale);
// center.y = cvRound((r->y + r->height*0.5)*scale);
// radius = cvRound((r->width + r->height)*0.25*scale);
// circle( img, center, radius, color, 3, 8, 0 );
//}
//namedWindow("result");
//imshow("result",img);
//waitKey(0);
//destroyAllWindows();
}
#endif // HAVE_OPENCL
}
TEST(Haar)
{
Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
if (img.empty())
{
throw runtime_error("can't open basketball1.png");
}
CascadeClassifier faceCascadeCPU;
if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml")))
{
throw runtime_error("can't load haarcascade_frontalface_alt.xml");
}
vector<Rect> faces;
SUBTEST << img.cols << "x" << img.rows << "; scale image";
CPU_ON;
faceCascadeCPU.detectMultiScale(img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
CPU_OFF;
ocl::CascadeClassifier_GPU faceCascade;
if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
{
throw runtime_error("can't load haarcascade_frontalface_alt.xml");
}
ocl::oclMat d_img(img);
faces.clear();
WARMUP_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
WARMUP_OFF;
faces.clear();
GPU_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
;
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
GPU_FULL_OFF;
}

View File

@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -42,125 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
extern std::string workdir;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
IMPLEMENT_PARAM_CLASS(WinSizw48, bool);
PARAM_TEST_CASE(HOG, WinSizw48, bool)
///////////// HOG////////////////////////
TEST(HOG)
{
bool is48;
vector<float> detector;
virtual void SetUp()
Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
if (src.empty())
{
is48 = GET_PARAM(0);
if(is48)
{
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
}
else
{
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
}
}
};
TEST_P(HOG, Performance)
{
cv::Mat img = readImage(workdir + "lena.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
// define HOG related arguments
float scale = 1.05f;
//int nlevels = 13;
int gr_threshold = 8;
float hit_threshold = 1.4f;
//bool hit_threshold_auto = true;
int win_width = is48 ? 48 : 64;
int win_stride_width = 8;
int win_stride_height = 8;
bool gamma_corr = true;
Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96)
Size win_stride(win_stride_width, win_stride_height);
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
ocl::oclMat d_src(img);//upload
t2 = (double)cvGetTickCount(); //kernel
vector<Rect> found;
gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
t2 = (double)cvGetTickCount() - t2;//kernel
// no download time for HOG
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
throw runtime_error("can't open road.png");
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations;
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));
SUBTEST << 768 << 'x' << 576 << "; road.png";
#endif //Have opencl
hog.detectMultiScale(src, found_locations);
CPU_ON;
hog.detectMultiScale(src, found_locations);
CPU_OFF;
cv::ocl::HOGDescriptor ocl_hog;
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
ocl::oclMat d_src;
d_src.upload(src);
WARMUP_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
WARMUP_OFF;
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl_hog.detectMultiScale(d_src, found_locations);
GPU_FULL_OFF;
}

File diff suppressed because it is too large Load Diff

View File

@@ -42,191 +42,105 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
//////// Utility
#ifndef DIFFERENT_SIZES
#else
#undef DIFFERENT_SIZES
#endif
#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000))
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
/////////// matchTemplate ////////////////////////
//void InitMatchTemplate()
//{
// Mat src; gen(src, 500, 500, CV_32F, 0, 1);
// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
// ocl::oclMat d_src(src), d_templ(templ), d_dst;
// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
//}
TEST(matchTemplate)
{
cv::Size size;
cv::Size templ_size;
int cn;
int method;
//vector<cv::ocl::Info> oclinfo;
//InitMatchTemplate();
virtual void SetUp()
Mat src, templ, dst;
int templ_size = 5;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
size = GET_PARAM(0);
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
int all_type[] = {CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_32FC1", "CV_32FC4"};
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
for(templ_size = 5; templ_size <= 5; templ_size *= 5)
{
gen(src, size, size, all_type[j], 0, 1);
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR";
gen(templ, templ_size, templ_size, all_type[j], 0, 1);
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF;
ocl::oclMat d_src(src), d_templ, d_dst;
d_templ.upload(templ);
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
WARMUP_OFF;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
int all_type_8U[] = {CV_8UC1};
std::string type_name_8U[] = {"CV_8UC1"};
for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++)
{
for(templ_size = 5; templ_size <= 5; templ_size *= 5)
{
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED";
gen(src, size, size, all_type_8U[j], 0, 255);
gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255);
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
CPU_OFF;
ocl::oclMat d_src(src);
ocl::oclMat d_templ(templ), d_dst;
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
WARMUP_OFF;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
}
};
struct MatchTemplate8U : MatchTemplate {};
TEST_P(MatchTemplate8U, Performance)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
struct MatchTemplate32F : MatchTemplate {};
TEST_P(MatchTemplate32F, Performance)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif //HAVE_OPENCL
}

View File

@@ -10,12 +10,12 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,697 +42,140 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv::ocl;
////////////////////////////////converto/////////////////////////////////////////////////
PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
///////////// ConvertTo////////////////////////
TEST(ConvertTo)
{
int type;
int dst_type;
Mat src, dst;
ocl::oclMat d_src, d_dst;
//src mat
cv::Mat mat;
cv::Mat dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
// set up roi
int roicols;
int roirows;
int srcx;
int srcy;
int dstx;
int dsty;
//src mat with roi
cv::Mat mat_roi;
cv::Mat dst_roi;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
//ocl mat with roi
cv::ocl::oclMat gmat;
cv::ocl::oclMat gdst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
dst_type = GET_PARAM(1);
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1";
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
gen(src, size, size, all_type[j], 0, 256);
//gen(dst, size, size, all_type[j], 0, 256);
//d_dst.upload(dst);
src.convertTo(dst, CV_32FC1);
CPU_ON;
src.convertTo(dst, CV_32FC1);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
d_src.convertTo(d_dst, CV_32FC1);
WARMUP_OFF;
GPU_ON;
d_src.convertTo(d_dst, CV_32FC1);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.convertTo(d_dst, CV_32FC1);
d_dst.download(dst);
GPU_FULL_OFF;
}
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
//cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
roicols = mat.cols - 1; //start
roirows = mat.rows - 1;
srcx = 1;
srcy = 1;
dstx = 1;
dsty = 1;
}
else
{
roicols = mat.cols;
roirows = mat.rows;
srcx = 0;
srcy = 0;
dstx = 0;
dsty = 0;
};
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
//gdst_whole = dst;
//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
//gmat = mat_roi;
}
};
struct ConvertTo : ConvertToTestBase {};
TEST_P(ConvertTo, Accuracy)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
t0 = (double)cvGetTickCount();//cpu start
mat_roi.convertTo(dst_roi, dst_type);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
t2 = (double)cvGetTickCount(); //kernel
gmat.convertTo(gdst, dst_type);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst_whole.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.convertTo(gdst, dst_type);
};
#endif
}
///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
///////////// copyTo////////////////////////
TEST(copyTo)
{
int type;
Mat src, dst;
ocl::oclMat d_src, d_dst;
cv::Mat mat;
cv::Mat mask;
cv::Mat dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
// set up roi
int roicols;
int roirows;
int srcx;
int srcy;
int dstx;
int dsty;
int maskx;
int masky;
//src mat with roi
cv::Mat mat_roi;
cv::Mat mask_roi;
cv::Mat dst_roi;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
//ocl mat with roi
cv::ocl::oclMat gmat;
cv::ocl::oclMat gdst;
cv::ocl::oclMat gmask;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
gen(src, size, size, all_type[j], 0, 256);
//gen(dst, size, size, all_type[j], 0, 256);
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
//d_dst.upload(dst);
src.copyTo(dst);
CPU_ON;
src.copyTo(dst);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
d_src.copyTo(d_dst);
WARMUP_OFF;
GPU_ON;
d_src.copyTo(d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.copyTo(d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
//cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
roicols = mat.cols - 1; //start
roirows = mat.rows - 1;
srcx = 1;
srcy = 1;
dstx = 1;
dsty = 1;
maskx = 1;
masky = 1;
}
else
{
roicols = mat.cols;
roirows = mat.rows;
srcx = 0;
srcy = 0;
dstx = 0;
dsty = 0;
maskx = 0;
masky = 0;
};
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
//gdst_whole = dst;
//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
//gmat = mat_roi;
//gmask = mask_roi;
}
};
struct CopyTo : CopyToTestBase {};
TEST_P(CopyTo, Without_mask)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
t0 = (double)cvGetTickCount();//cpu start
mat_roi.copyTo(dst_roi);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
t2 = (double)cvGetTickCount(); //kernel
gmat.copyTo(gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst_whole.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.copyTo(gdst);
};
#endif
}
TEST_P(CopyTo, With_mask)
///////////// setTo////////////////////////
TEST(setTo)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
Mat src, dst;
Scalar val(1, 2, 3, 4);
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
Has_roi(k);
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
t0 = (double)cvGetTickCount();//cpu start
mat_roi.copyTo(dst_roi, mask_roi);
t0 = (double)cvGetTickCount() - t0;//cpu end
gen(src, size, size, all_type[j], 0, 256);
t1 = (double)cvGetTickCount();//gpu start1
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
src.setTo(val);
gmat = mat_roi;
gmask = mask_roi;
t2 = (double)cvGetTickCount(); //kernel
gmat.copyTo(gdst, gmask);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst_whole.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
CPU_ON;
src.setTo(val);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
d_src.setTo(val);
WARMUP_OFF;
GPU_ON;
d_src.setTo(val);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.setTo(val);
GPU_FULL_OFF;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi;
gmask = mask_roi;
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.copyTo(gdst, gmask);
};
#endif
}
///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
PARAM_TEST_CASE(SetToTestBase, MatType, bool)
{
int type;
cv::Scalar val;
cv::Mat mat;
cv::Mat mask;
// set up roi
int roicols;
int roirows;
int srcx;
int srcy;
int maskx;
int masky;
//src mat with roi
cv::Mat mat_roi;
cv::Mat mask_roi;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gmat_whole;
//ocl mat with roi
cv::ocl::oclMat gmat;
cv::ocl::oclMat gmask;
virtual void SetUp()
{
type = GET_PARAM(0);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat = randomMat(rng, size, type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
//cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
roicols = mat.cols - 1; //start
roirows = mat.rows - 1;
srcx = 1;
srcy = 1;
maskx = 1;
masky = 1;
}
else
{
roicols = mat.cols;
roirows = mat.rows;
srcx = 0;
srcy = 0;
maskx = 0;
masky = 0;
};
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
//gmat_whole = mat;
//gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
//gmask = mask_roi;
}
};
struct SetTo : SetToTestBase {};
TEST_P(SetTo, Without_mask)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
t0 = (double)cvGetTickCount();//cpu start
mat_roi.setTo(val);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1
gmat_whole = mat;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
t2 = (double)cvGetTickCount(); //kernel
gmat.setTo(val);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gmat_whole.download(cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat_whole = mat;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.setTo(val);
};
#endif
}
TEST_P(SetTo, With_mask)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
t0 = (double)cvGetTickCount();//cpu start
mat_roi.setTo(val, mask_roi);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1
gmat_whole = mat;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
gmask = mask_roi;
t2 = (double)cvGetTickCount(); //kernel
gmat.setTo(val, gmask);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gmat_whole.download(cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat_whole = mat;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
gmask = mask_roi;
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
gmat.setTo(val, gmask);
};
#endif
}
PARAM_TEST_CASE(DataTransfer, MatType, bool)
{
int type;
cv::Mat mat;
cv::ocl::oclMat gmat_whole;
virtual void SetUp()
{
type = GET_PARAM(0);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat = randomMat(rng, size, type, 5, 16, false);
}
};
TEST_P(DataTransfer, perf)
{
double totaluploadtick = 0;
double totaldownloadtick = 0;
double totaltick = 0;
double t0 = 0;
double t1 = 0;
cv::Mat cpu_dst;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t0 = (double)cvGetTickCount();
gmat_whole.upload(mat);//upload
t0 = (double)cvGetTickCount() - t0;
t1 = (double)cvGetTickCount();
gmat_whole.download(cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;
if(j == 0)
continue;
totaluploadtick = t0 + totaluploadtick;
totaldownloadtick = t1 + totaldownloadtick;
}
totaltick = totaluploadtick + totaldownloadtick;
cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
//**********test************
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
#endif
}

View File

@@ -0,0 +1,84 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////// norm////////////////////////
TEST(norm)
{
Mat src, buf;
ocl::oclMat d_src, d_buf;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
norm(src, NORM_INF);
CPU_ON;
norm(src, NORM_INF);
CPU_OFF;
d_src.upload(src);
d_buf.upload(buf);
WARMUP_ON;
ocl::norm(d_src, d_buf, NORM_INF);
WARMUP_OFF;
GPU_ON;
ocl::norm(d_src, d_buf, NORM_INF);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::norm(d_src, d_buf, NORM_INF);
GPU_FULL_OFF;
}
}

View File

@@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////////////////////
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai, fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,96 +42,46 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, int)
///////////// pyrDown //////////////////////
TEST(pyrDown)
{
int type;
int channels;
//src mat
cv::Mat mat1;
cv::Mat dst;
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gdst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
#define VARNAME(A) string(#A);
////////////////////////////////PyrDown/////////////////////////////////////////////////
TEST_P(PyrDown, Mat)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng();
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
cv::ocl::oclMat gdst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat gmat1(mat1);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrDown(gmat1, gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst.download(cpu_dst);
t1 = (double)cvGetTickCount() - t1;//gpu end1
if (j == 0)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
continue;
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
pyrDown(src, dst);
CPU_ON;
pyrDown(src, dst);
CPU_OFF;
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
WARMUP_ON;
ocl::pyrDown(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::pyrDown(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrDown(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL
}

View File

@@ -0,0 +1,143 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////// PyrLKOpticalFlow ////////////////////////
TEST(PyrLKOpticalFlow)
{
std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
{
Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
if (frame0.empty())
{
std::string errstr = "can't open " + images1[i];
throw runtime_error(errstr);
}
Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
if (frame1.empty())
{
std::string errstr = "can't open " + images2[i];
throw runtime_error(errstr);
}
Mat gray_frame;
if (i == 0)
{
cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
}
for (int points = Min_Size; points <= Max_Size; points *= Multiple)
{
if (i == 0)
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
else
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
Mat nextPts_cpu;
Mat status_cpu;
vector<Point2f> pts;
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
vector<Point2f> nextPts;
vector<unsigned char> status;
vector<float> err;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_ON;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_OFF;
ocl::PyrLKOpticalFlow d_pyrLK;
ocl::oclMat d_frame0(frame0);
ocl::oclMat d_frame1(frame1);
ocl::oclMat d_pts;
Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
d_pts.upload(pts_mat);
ocl::oclMat d_nextPts;
ocl::oclMat d_status;
ocl::oclMat d_err;
WARMUP_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
WARMUP_OFF;
GPU_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
;
GPU_OFF;
GPU_FULL_ON;
d_frame0.upload(frame0);
d_frame1.upload(frame1);
d_pts.upload(pts_mat);
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
if (!d_nextPts.empty())
{
d_nextPts.download(nextPts_cpu);
}
if (!d_status.empty())
{
d_status.download(status_cpu);
}
GPU_FULL_OFF;
}
}
}

View File

@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,80 +42,46 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp, MatType, int)
///////////// pyrUp ////////////////////////
TEST(pyrUp)
{
int type;
int channels;
//std::vector<cv::ocl::Info> oclinfo;
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
virtual void SetUp()
for (int size = 500; size <= 2000; size *= 2)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(PyrUp, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrUp(srcMat, dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download(cpu_dst); //download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if (j == 0)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
continue;
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
pyrUp(src, dst);
CPU_ON;
pyrUp(src, dst);
CPU_OFF;
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
WARMUP_ON;
ocl::pyrUp(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::pyrUp(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrUp(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL
}

View File

@@ -10,12 +10,12 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,446 +42,109 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv::ocl;
PARAM_TEST_CASE(MergeTestBase, MatType, int)
///////////// Merge////////////////////////
TEST(Merge)
{
int type;
int channels;
Mat dst;
ocl::oclMat d_dst;
//src mat
cv::Mat mat1;
cv::Mat mat2;
cv::Mat mat3;
cv::Mat mat4;
int channels = 4;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
//dst mat
cv::Mat dst;
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int src2x;
int src2y;
int src3x;
int src3y;
int src4x;
int src4y;
int dstx;
int dsty;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat mat2_roi;
cv::Mat mat3_roi;
cv::Mat mat4_roi;
//dst mat with roi
cv::Mat dst_roi;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
//ocl mat with roi
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gmat2;
cv::ocl::oclMat gmat3;
cv::ocl::oclMat gmat4;
cv::ocl::oclMat gdst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
Size size1 = Size(size, size);
std::vector<Mat> src(channels);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
for (int i = 0; i < channels; ++i)
{
src[i] = Mat(size1, all_type[j], cv::Scalar::all(i));
}
merge(src, dst);
CPU_ON;
merge(src, dst);
CPU_OFF;
std::vector<ocl::oclMat> d_src(channels);
for (int i = 0; i < channels; ++i)
{
d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
}
WARMUP_ON;
ocl::merge(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::merge(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
for (int i = 0; i < channels; ++i)
{
d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
}
ocl::merge(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
//cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
roicols = mat1.cols - 1; //start
roirows = mat1.rows - 1;
src1x = 1;
src1y = 1;
src2x = 1;
src2y = 1;
src3x = 1;
src3y = 1;
src4x = 1;
src4y = 1;
dstx = 1;
dsty = 1;
}
else
{
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
src2x = 0;
src2y = 0;
src3x = 0;
src3y = 0;
src4x = 0;
src4y = 0;
dstx = 0;
dsty = 0;
};
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows));
mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
}
};
struct Merge : MergeTestBase {};
TEST_P(Merge, Accuracy)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
std::vector<cv::Mat> dev_src;
dev_src.push_back(mat1_roi);
dev_src.push_back(mat2_roi);
dev_src.push_back(mat3_roi);
dev_src.push_back(mat4_roi);
t0 = (double)cvGetTickCount();//cpu start
cv::merge(dev_src, dst_roi);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1 ]
gmat1 = mat1_roi;
gmat2 = mat2_roi;
gmat3 = mat3_roi;
gmat4 = mat4_roi;
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
std::vector<cv::ocl::oclMat> dev_gsrc;
dev_gsrc.push_back(gmat1);
dev_gsrc.push_back(gmat2);
dev_gsrc.push_back(gmat3);
dev_gsrc.push_back(gmat4);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::merge(dev_gsrc, gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
gdst_whole.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
gmat2 = mat2_roi;
gmat3 = mat3_roi;
gmat4 = mat4_roi;
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
std::vector<cv::ocl::oclMat> dev_gsrc;
dev_gsrc.push_back(gmat1);
dev_gsrc.push_back(gmat2);
dev_gsrc.push_back(gmat3);
dev_gsrc.push_back(gmat4);
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::merge(dev_gsrc, gdst);
};
#endif
}
PARAM_TEST_CASE(SplitTestBase, MatType, int)
///////////// Split////////////////////////
TEST(Split)
{
int type;
int channels;
//int channels = 4;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
//src mat
cv::Mat mat;
//dstmat
cv::Mat dst1;
cv::Mat dst2;
cv::Mat dst3;
cv::Mat dst4;
// set up roi
int roicols;
int roirows;
int srcx;
int srcy;
int dst1x;
int dst1y;
int dst2x;
int dst2y;
int dst3x;
int dst3y;
int dst4x;
int dst4y;
//src mat with roi
cv::Mat mat_roi;
//dst mat with roi
cv::Mat dst1_roi;
cv::Mat dst2_roi;
cv::Mat dst3_roi;
cv::Mat dst4_roi;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst1_whole;
cv::ocl::oclMat gdst2_whole;
cv::ocl::oclMat gdst3_whole;
cv::ocl::oclMat gdst4_whole;
//ocl mat with roi
cv::ocl::oclMat gmat;
cv::ocl::oclMat gdst1;
cv::ocl::oclMat gdst2;
cv::ocl::oclMat gdst3;
cv::ocl::oclMat gdst4;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
Size size1 = Size(size, size);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
std::vector<cv::Mat> dst;
split(src, dst);
CPU_ON;
split(src, dst);
CPU_OFF;
ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
std::vector<cv::ocl::oclMat> d_dst;
WARMUP_ON;
ocl::split(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::split(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::split(d_src, d_dst);
GPU_FULL_OFF;
}
mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
//cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
roicols = mat.cols - 1; //start
roirows = mat.rows - 1;
srcx = 1;
srcx = 1;
dst1x = 1;
dst1y = 1;
dst2x = 1;
dst2y = 1;
dst3x = 1;
dst3y = 1;
dst4x = 1;
dst4y = 1;
}
else
{
roicols = mat.cols;
roirows = mat.rows;
srcx = 0;
srcy = 0;
dst1x = 0;
dst1y = 0;
dst2x = 0;
dst2y = 0;
dst3x = 0;
dst3y = 0;
dst4x = 0;
dst4y = 0;
};
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows));
dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows));
dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows));
}
};
struct Split : SplitTestBase {};
TEST_P(Split, Accuracy)
{
#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick = 0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t0 = 0;
double t1 = 0;
double t2 = 0;
for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
t0 = (double)cvGetTickCount();//cpu start
cv::split(mat_roi, dev_dst);
t0 = (double)cvGetTickCount() - t0;//cpu end
t1 = (double)cvGetTickCount();//gpu start1
gdst1_whole = dst1;
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
gdst2_whole = dst2;
gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
gdst3_whole = dst3;
gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
gdst4_whole = dst4;
gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
gmat = mat_roi;
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::split(gmat, dev_gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst1;
cv::Mat cpu_dst2;
cv::Mat cpu_dst3;
cv::Mat cpu_dst4;
gdst1_whole.download(cpu_dst1);
gdst2_whole.download(cpu_dst2);
gdst3_whole.download(cpu_dst3);
gdst4_whole.download(cpu_dst4);
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalcputick = t0 + totalcputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
if(k == 0)
{
cout << "no roi\n";
}
else
{
cout << "with roi\n";
};
cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
//cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
gdst1_whole = dst1;
gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
gdst2_whole = dst2;
gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
gdst3_whole = dst3;
gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
gdst4_whole = dst4;
gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
gmat = mat_roi;
if(j == 0)
{
cout << "no roi:";
}
else
{
cout << "\nwith roi:";
};
cv::ocl::split(gmat, dev_gdst);
};
#endif
}
//*************test*****************
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
Values(CV_8UC4, CV_32FC4), Values(1, 4)));
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL

View File

@@ -7,12 +7,13 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
@@ -21,12 +22,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -41,4 +42,321 @@
#include "precomp.hpp"
// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
// All images needed in this test are in samples/gpu folder.
// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory
void TestSystem::run()
{
if (is_list_mode_)
{
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
cout << (*it)->name() << endl;
}
return;
}
// Run test initializers
for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
(*it)->run();
}
}
printHeading();
writeHeading();
// Run tests
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
cout << endl << (*it)->name() << ":\n";
setCurrentTest((*it)->name());
//fprintf(record_,"%s\n",(*it)->name().c_str());
(*it)->run();
finishCurrentSubtest();
}
}
catch (const Exception &)
{
// Message is printed via callback
resetCurrentSubtest();
}
catch (const runtime_error &e)
{
printError(e.what());
resetCurrentSubtest();
}
}
printSummary();
writeSummary();
}
void TestSystem::finishCurrentSubtest()
{
if (cur_subtest_is_empty_)
// There is no need to print subtest statistics
{
return;
}
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_);
speedup_full_total_ += fullspeedup;
if (speedup > top_)
{
speedup_faster_count_++;
}
else if (speedup < bottom_)
{
speedup_slower_count_++;
}
else
{
speedup_equal_count_++;
}
if (fullspeedup > top_)
{
speedup_full_faster_count_++;
}
else if (fullspeedup < bottom_)
{
speedup_full_slower_count_++;
}
else
{
speedup_full_equal_count_++;
}
// compute min, max and
std::sort(gpu_times_.begin(), gpu_times_.end());
double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0;
double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0;
double deviation = 0;
if (gpu_times_.size() > 1)
{
double sum = 0;
for (size_t i = 0; i < gpu_times_.size(); i++)
{
int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_);
double diff_time = diff * 1000 / getTickFrequency();
sum += diff_time * diff_time;
}
deviation = std::sqrt(sum / gpu_times_.size());
}
printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
num_subtests_called_++;
resetCurrentSubtest();
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
return sum / samples.size();
}
void TestSystem::printHeading()
{
cout << endl;
cout << setiosflags(ios_base::left);
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
<< "DESCRIPTION\n";
cout << resetiosflags(ios_base::left);
}
void TestSystem::writeHeading()
{
if (!record_)
{
recordname_ += "_OCL.csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
fflush(record_);
}
void TestSystem::printSummary()
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPU exceeded: "
<< setprecision(3) << speedup_faster_count_
<< "\nGPU passed: "
<< setprecision(3) << speedup_equal_count_
<< "\nGPU failed: "
<< setprecision(3) << speedup_slower_count_
<< endl;
cout << "\nGPU exceeded rate: "
<< setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU passed rate: "
<< setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU failed rate: "
<< setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << "\naverage GPUTOTAL speedup: x"
<< setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPUTOTAL exceeded: "
<< setprecision(3) << speedup_full_faster_count_
<< "\nGPUTOTAL passed: "
<< setprecision(3) << speedup_full_equal_count_
<< "\nGPUTOTAL failed: "
<< setprecision(3) << speedup_full_slower_count_
<< endl;
cout << "\nGPUTOTAL exceeded rate: "
<< setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL passed rate: "
<< setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL failed rate: "
<< setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << resetiosflags(ios_base::fixed);
}
void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
{
cout << TAB << setiosflags(ios_base::left);
stringstream stream;
stream << cpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << gpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
cout << setw(14) << stream.str();
stream.str("");
stream << gpu_full_time;
cout << setw(14) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << fullspeedup;
cout << setw(14) << stream.str();
cout << cur_subtest_description_.str();
cout << resetiosflags(ios_base::left) << endl;
}
void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
cur_subtest_description_.str().c_str(),
cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
gpu_min, gpu_max, std_dev);
if (itname_changed_)
{
itname_changed_ = false;
}
fflush(record_);
}
void TestSystem::writeSummary()
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "\nAverage GPU speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n"
"\nAverage GPUTOTAL speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n",
speedup_total_ / std::max(1, num_subtests_called_),
speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_total_ / std::max(1, num_subtests_called_),
speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
);
fflush(record_);
}
void TestSystem::printError(const std::string &msg)
{
if(msg != "CL_INVALID_BUFFER_SIZE")
{
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
}
}
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
{
mat.create(rows, cols, type);
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
string abspath(const string &relpath)
{
return TestSystem::instance().workingDir() + relpath;
}
int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
const char *err_msg, const char * /*file_name*/,
int /*line*/, void * /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}

View File

@@ -7,12 +7,13 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
@@ -21,12 +22,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -39,38 +40,354 @@
//
//M*/
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include <cmath>
#include <cstdio>
#include <iomanip>
#include <stdexcept>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <limits>
#include <algorithm>
#include <iterator>
#include <string>
#include <cstdarg>
#include "opencv2/highgui.hpp"
#include <cstdio>
#include <vector>
#include <numeric>
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/video.hpp"
#include "opencv2/ts.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/ocl.hpp"
#include "utility.hpp"
#include "interpolation.hpp"
#define Min_Size 1000
#define Max_Size 4000
#define Multiple 2
#define TAB " "
#include "opencv2/core/private.hpp"
using namespace std;
using namespace cv;
#endif
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
string abspath(const string &relpath);
int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
typedef struct
{
short x;
short y;
} COOR;
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
cv::Size size, int sp, int sr, int maxIter, float eps, int *tab);
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
int sp, int sr, cv::TermCriteria crit);
class Runnable
{
public:
explicit Runnable(const std::string &runname): name_(runname) {}
virtual ~Runnable() {}
const std::string &name() const
{
return name_;
}
virtual void run() = 0;
private:
std::string name_;
};
class TestSystem
{
public:
static TestSystem &instance()
{
static TestSystem me;
return me;
}
void setWorkingDir(const std::string &val)
{
working_dir_ = val;
}
const std::string &workingDir() const
{
return working_dir_;
}
void setTestFilter(const std::string &val)
{
test_filter_ = val;
}
const std::string &testFilter() const
{
return test_filter_;
}
void setNumIters(int num_iters)
{
num_iters_ = num_iters;
}
void setGPUWarmupIters(int num_iters)
{
gpu_warmup_iters_ = num_iters;
}
void setCPUIters(int num_iters)
{
cpu_num_iters_ = num_iters;
}
void setTopThreshold(double top)
{
top_ = top;
}
void setBottomThreshold(double bottom)
{
bottom_ = bottom;
}
void addInit(Runnable *init)
{
inits_.push_back(init);
}
void addTest(Runnable *test)
{
tests_.push_back(test);
}
void run();
// It's public because OpenCV callback uses it
void printError(const std::string &msg);
std::stringstream &startNewSubtest()
{
finishCurrentSubtest();
return cur_subtest_description_;
}
bool stop() const
{
return cur_iter_idx_ >= num_iters_;
}
bool cpu_stop() const
{
return cur_iter_idx_ >= cpu_num_iters_;
}
bool warmupStop()
{
return cur_warmup_idx_++ >= gpu_warmup_iters_;
}
void warmupComplete()
{
cur_warmup_idx_ = 0;
}
void cpuOn()
{
cpu_started_ = cv::getTickCount();
}
void cpuOff()
{
int64 delta = cv::getTickCount() - cpu_started_;
cpu_times_.push_back(delta);
++cur_iter_idx_;
}
void cpuComplete()
{
cpu_elapsed_ += meanTime(cpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
void gpuOn()
{
gpu_started_ = cv::getTickCount();
}
void gpuOff()
{
int64 delta = cv::getTickCount() - gpu_started_;
gpu_times_.push_back(delta);
++cur_iter_idx_;
}
void gpuComplete()
{
gpu_elapsed_ += meanTime(gpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
void gpufullOn()
{
gpu_full_started_ = cv::getTickCount();
}
void gpufullOff()
{
int64 delta = cv::getTickCount() - gpu_full_started_;
gpu_full_times_.push_back(delta);
++cur_iter_idx_;
}
void gpufullComplete()
{
gpu_full_elapsed_ += meanTime(gpu_full_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
bool isListMode() const
{
return is_list_mode_;
}
void setListMode(bool value)
{
is_list_mode_ = value;
}
void setRecordName(const std::string &name)
{
recordname_ = name;
}
void setCurrentTest(const std::string &name)
{
itname_ = name;
itname_changed_ = true;
}
private:
TestSystem():
cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0),
speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0),
speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
num_iters_(10), cpu_num_iters_(2),
gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
record_(0), recordname_("performance"), itname_changed_(true)
{
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
gpu_full_times_.reserve(num_iters_);
}
void finishCurrentSubtest();
void resetCurrentSubtest()
{
cpu_elapsed_ = 0;
gpu_elapsed_ = 0;
gpu_full_elapsed_ = 0;
cur_subtest_description_.str("");
cur_subtest_is_empty_ = true;
cur_iter_idx_ = 0;
cpu_times_.clear();
gpu_times_.clear();
gpu_full_times_.clear();
}
double meanTime(const std::vector<int64> &samples);
void printHeading();
void printSummary();
void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
void writeHeading();
void writeSummary();
void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
double speedup = 0.0f, double fullspeedup = 0.0f,
double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f);
std::string working_dir_;
std::string test_filter_;
std::vector<Runnable *> inits_;
std::vector<Runnable *> tests_;
std::stringstream cur_subtest_description_;
bool cur_subtest_is_empty_;
int64 cpu_started_;
int64 gpu_started_;
int64 gpu_full_started_;
double cpu_elapsed_;
double gpu_elapsed_;
double gpu_full_elapsed_;
double speedup_total_;
double speedup_full_total_;
int num_subtests_called_;
int speedup_faster_count_;
int speedup_slower_count_;
int speedup_equal_count_;
int speedup_full_faster_count_;
int speedup_full_slower_count_;
int speedup_full_equal_count_;
bool is_list_mode_;
double top_;
double bottom_;
int num_iters_;
int cpu_num_iters_; //there's no need to set cpu running same times with gpu
int gpu_warmup_iters_; //gpu warm up times, default is 1
int cur_iter_idx_;
int cur_warmup_idx_; //current gpu warm up times
std::vector<int64> cpu_times_;
std::vector<int64> gpu_times_;
std::vector<int64> gpu_full_times_;
FILE *record_;
std::string recordname_;
std::string itname_;
bool itname_changed_;
};
#define GLOBAL_INIT(name) \
struct name##_init: Runnable { \
name##_init(): Runnable(#name) { \
TestSystem::instance().addInit(this); \
} \
void run(); \
} name##_init_instance; \
void name##_init::run()
#define TEST(name) \
struct name##_test: Runnable { \
name##_test(): Runnable(#name) { \
TestSystem::instance().addTest(this); \
} \
void run(); \
} name##_test_instance; \
void name##_test::run()
#define SUBTEST TestSystem::instance().startNewSubtest()
#define CPU_ON \
while (!TestSystem::instance().cpu_stop()) { \
TestSystem::instance().cpuOn()
#define CPU_OFF \
TestSystem::instance().cpuOff(); \
} TestSystem::instance().cpuComplete()
#define GPU_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define GPU_OFF \
ocl::finish(); \
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
#define GPU_FULL_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpufullOn()
#define GPU_FULL_OFF \
TestSystem::instance().gpufullOff(); \
} TestSystem::instance().gpufullComplete()
#define WARMUP_ON \
while (!TestSystem::instance().warmupStop()) {
#define WARMUP_OFF \
ocl::finish(); \
} TestSystem::instance().warmupComplete()

View File

@@ -1,265 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#define VARNAME(A) #A
using namespace std;
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
//std::string generateVarList(int first,...)
//{
// vector<std::string> varname;
//
// va_list argp;
// string s;
// stringstream ss;
// va_start(argp,first);
// int i=first;
// while(i!=-1)
// {
// ss<<i<<",";
// i=va_arg(argp,int);
// };
// s=ss.str();
// va_end(argp);
// return s;
//};
//std::string generateVarList(int& p1,int& p2)
//{
// stringstream ss;
// ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y;
// return ss.str();
//};
int randomInt(int minVal, int maxVal)
{
RNG &rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
double randomDouble(double minVal, double maxVal)
{
RNG &rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
Size randomSize(int minVal, int maxVal)
{
return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
}
Scalar randomScalar(double minVal, double maxVal)
{
return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
}
Mat randomMat(Size size, int type, double minVal, double maxVal)
{
return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
}
/*
void showDiff(InputArray gold_, InputArray actual_, double eps)
{
Mat gold;
if (gold_.kind() == _InputArray::MAT)
gold = gold_.getMat();
else
gold_.getGpuMat().download(gold);
Mat actual;
if (actual_.kind() == _InputArray::MAT)
actual = actual_.getMat();
else
actual_.getGpuMat().download(actual);
Mat diff;
absdiff(gold, actual, diff);
threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
namedWindow("gold", WINDOW_NORMAL);
namedWindow("actual", WINDOW_NORMAL);
namedWindow("diff", WINDOW_NORMAL);
imshow("gold", gold);
imshow("actual", actual);
imshow("diff", diff);
waitKey();
}
*/
/*
bool supportFeature(const DeviceInfo& info, FeatureSet feature)
{
return TargetArchs::builtWith(feature) && info.supports(feature);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}
vector<DeviceInfo> devices(FeatureSet feature)
{
const vector<DeviceInfo>& d = devices();
vector<DeviceInfo> devs_filtered;
if (TargetArchs::builtWith(feature))
{
devs_filtered.reserve(d.size());
for (size_t i = 0, size = d.size(); i < size; ++i)
{
const DeviceInfo& info = d[i];
if (info.supports(feature))
devs_filtered.push_back(info);
}
}
return devs_filtered;
}
*/
vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
{
vector<MatType> v;
v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
for (int depth = depth_start; depth <= depth_end; ++depth)
{
for (int cn = cn_start; cn <= cn_end; ++cn)
{
v.push_back(CV_MAKETYPE(depth, cn));
}
}
return v;
}
const vector<MatType> &all_types()
{
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
return v;
}
Mat readImage(const string &fileName, int flags)
{
return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
}
Mat readImageType(const string &fname, int type)
{
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
if (CV_MAT_CN(type) == 4)
{
Mat temp;
cvtColor(src, temp, cv::COLOR_BGR2BGRA);
swap(src, temp);
}
src.convertTo(src, CV_MAT_DEPTH(type));
return src;
}
double checkNorm(const Mat &m)
{
return norm(m, NORM_INF);
}
double checkNorm(const Mat &m1, const Mat &m2)
{
return norm(m1, m2, NORM_INF);
}
double checkSimilarity(const Mat &m1, const Mat &m2)
{
Mat diff;
matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
return std::abs(diff.at<float>(0, 0) - 1.f);
}
/*
void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
{
(*os) << info.name();
}
*/
void PrintTo(const Inverse &inverse, std::ostream *os)
{
if (inverse)
(*os) << "inverse";
else
(*os) << "direct";
}

View File

@@ -1,182 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_TEST_UTILITY_HPP__
#define __OPENCV_TEST_UTILITY_HPP__
//#define PRINT_KERNEL_RUN_TIME
#ifdef PRINT_KERNEL_RUN_TIME
#define LOOP_TIMES 1
#else
#define LOOP_TIMES 1
#endif
#define MWIDTH 1920
#define MHEIGHT 1080
#define CLBINPATH ".\\"
#define LOOPROISTART 0
#define LOOPROIEND 1
int randomInt(int minVal, int maxVal);
double randomDouble(double minVal, double maxVal);
//std::string generateVarList(int first,...);
std::string generateVarList(int &p1, int &p2);
cv::Size randomSize(int minVal, int maxVal);
cv::Scalar randomScalar(double minVal, double maxVal);
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
//! return true if device supports specified feature and gpu module was built with support the feature.
//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
//! return all devices compatible with current gpu module build.
//const std::vector<cv::ocl::DeviceInfo>& devices();
//! return all devices compatible with current gpu module build which support specified feature.
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
//! read image from testdata folder.
cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);
cv::Mat readImageType(const std::string &fname, int type);
double checkNorm(const cv::Mat &m);
double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
#define EXPECT_MAT_NORM(mat, eps) \
{ \
EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
}
/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
{ \
ASSERT_EQ(mat1.type(), mat2.type()); \
ASSERT_EQ(mat1.size(), mat2.size()); \
EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \
}*/
#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \
{ \
ASSERT_EQ(mat1.type(), mat2.type()); \
ASSERT_EQ(mat1.size(), mat2.size()); \
EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \
}
#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
{ \
ASSERT_EQ(mat1.type(), mat2.type()); \
ASSERT_EQ(mat1.size(), mat2.size()); \
EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
}
namespace cv
{
namespace ocl
{
// void PrintTo(const DeviceInfo& info, std::ostream* os);
}
}
using perf::MatDepth;
using perf::MatType;
//! return vector with types from specified range.
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
const std::vector<MatType> &all_types();
class Inverse
{
public:
inline Inverse(bool val = false) : val_(val) {}
inline operator bool() const
{
return val_;
}
private:
bool val_;
};
void PrintTo(const Inverse &useRoi, std::ostream *os);
CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
void run_perf_test();
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define ALL_DEVICES testing::ValuesIn(devices())
#define DEVICES(feature) testing::ValuesIn(devices(feature))
#define ALL_TYPES testing::ValuesIn(all_types())
#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
#endif // __OPENCV_TEST_UTILITY_HPP__

View File

@@ -205,7 +205,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
@@ -219,8 +219,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
cl_command_queue clq = (cl_command_queue)getoclCommandQueue();
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) );
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{

View File

@@ -351,6 +351,11 @@ namespace cv
return &(Context::getContext()->impl->clCmdQueue);
}
void finish()
{
clFinish(Context::getContext()->impl->clCmdQueue);
}
void queryDeviceInfo(DEVICE_INFO info_type, void* info)
{
static Info::Impl* impl = Context::getContext()->impl;
@@ -709,7 +714,7 @@ namespace cv
clReleaseEvent(event);
#endif
clFinish(clCxt->impl->clCmdQueue);
clFlush(clCxt->impl->clCmdQueue);
openCLSafeCall(clReleaseKernel(kernel));
}
@@ -905,16 +910,18 @@ namespace cv
std::auto_ptr<Context> Context::clCxt;
int Context::val = 0;
static Mutex cs;
Context *Context::getContext()
static volatile int context_tear_down = 0;
Context* Context::getContext()
{
if(*((volatile int*)&val) != 1)
{
AutoLock al(cs);
if(*((volatile int*)&val) != 1)
{
if (context_tear_down)
return clCxt.get();
if( 0 == clCxt.get())
clCxt.reset(new Context);
std::vector<Info> oclinfo;
CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
oclinfo[0].impl->setDevice(0, 0, 0);
@@ -1042,9 +1049,14 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID )
{
// application hangs if call clReleaseCommandQueue here, so release context only
// without context release application hangs as well
cl_context ctx = (cl_context)getoclContext();
if(ctx)
openCLSafeCall(clReleaseContext(ctx));
context_tear_down = 1;
Context* cv_ctx = Context::getContext();
if(cv_ctx)
{
cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext);
if(ctx)
openCLSafeCall(clReleaseContext(ctx));
}
}
return TRUE;
}

View File

@@ -142,7 +142,7 @@ namespace cv
format.image_channel_data_type = CL_FLOAT;
break;
default:
throw std::exception();
CV_Error(-1, "Image forma is not supported");
break;
}
switch(channels)
@@ -157,7 +157,7 @@ namespace cv
format.image_channel_order = CL_RGBA;
break;
default:
throw std::exception();
CV_Error(-1, "Image forma is not supported");
break;
}
#if CL_VERSION_1_2
@@ -195,7 +195,8 @@ namespace cv
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
}
clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
}
else
{
devData = (cl_mem)mat.data;
@@ -204,7 +205,7 @@ namespace cv
clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
if ((mat.cols * mat.elemSize() != mat.step))
{
clFinish((cl_command_queue)mat.clCxt->oclCommandQueue());
clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
clReleaseMemObject(devData);
}
@@ -229,7 +230,8 @@ namespace cv
try
{
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
//_support = true;
finish();
_support = true;
}
catch (const cv::Exception& e)
{

View File

@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -63,6 +67,9 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -111,7 +118,10 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -145,7 +155,10 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -249,7 +262,10 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -288,7 +304,10 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -319,7 +338,10 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -387,8 +409,8 @@ __kernel void arithm_s_absdiff_C1_D5 (__global float *src1, int src1_step, int
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_absdiff_C1_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *dst, int dst_step, int dst_offset,
double4 src2, int rows, int cols, int dst_step1)
__global double *dst, int dst_step, int dst_offset,
double4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -422,7 +444,10 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -465,7 +490,7 @@ __kernel void arithm_s_absdiff_C2_D2 (__global ushort *src1, int src1_step, in
}
__kernel void arithm_s_absdiff_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -509,7 +534,7 @@ __kernel void arithm_s_absdiff_C2_D4 (__global int *src1, int src1_step, int s
}
__kernel void arithm_s_absdiff_C2_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *dst, int dst_step, int dst_offset,
float4 src2, int rows, int cols, int dst_step1)
float4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -564,7 +589,10 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -618,7 +646,10 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -644,16 +675,16 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int src1_offset,
@@ -668,7 +699,10 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -694,16 +728,16 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_absdiff_C3_D4 (__global int *src1, int src1_step, int src1_offset,
@@ -735,9 +769,9 @@ __kernel void arithm_s_absdiff_C3_D4 (__global int *src1, int src1_step, int s
int tmp_data_1 = convert_int_sat(abs_diff(src1_data_1, src2_data_1));
int tmp_data_2 = convert_int_sat(abs_diff(src1_data_2, src2_data_2));
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_absdiff_C3_D5 (__global float *src1, int src1_step, int src1_offset,
@@ -769,9 +803,9 @@ __kernel void arithm_s_absdiff_C3_D5 (__global float *src1, int src1_step, int
float tmp_data_1 = fabs(src1_data_1 - src2_data_1);
float tmp_data_2 = fabs(src1_data_2 - src2_data_2);
*((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
@@ -805,9 +839,9 @@ __kernel void arithm_s_absdiff_C3_D6 (__global double *src1, int src1_step, in
double tmp_data_1 = fabs(src1_data_1 - src2_data_1);
double tmp_data_2 = fabs(src1_data_2 - src2_data_2);
*((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif

View File

@@ -45,7 +45,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -64,7 +68,10 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -112,7 +119,10 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -147,7 +157,10 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -252,7 +265,10 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -311,7 +327,10 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -348,7 +367,10 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -477,7 +499,10 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -664,7 +689,10 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -724,7 +752,10 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -754,16 +785,16 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
@@ -780,7 +811,10 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -810,16 +844,16 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
@@ -861,9 +895,9 @@ __kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, i
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
@@ -905,9 +939,9 @@ __kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, i
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= data_2;
}
}
@@ -951,9 +985,9 @@ __kernel void arithm_add_with_mask_C3_D6 (__global double *src1, int src1_step,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif

View File

@@ -42,8 +42,12 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double F;
#else
typedef float F;
@@ -52,10 +56,10 @@ typedef float F;
/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset,
__global uchar *src2, int src2_step,int src2_offset,
F alpha,F beta,F gama,
__global uchar *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
__global uchar *src2, int src2_step,int src2_offset,
F alpha,F beta,F gama,
__global uchar *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -65,7 +69,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -87,7 +94,7 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
// short4 tmp = convert_short4_sat(src1_data) * alpha + convert_short4_sat(src2_data) * beta + gama;
short4 tmp;
short4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
@@ -100,7 +107,7 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
*((__global uchar4 *)(dst + dst_index)) = dst_data;
// dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama;
// dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama;
}
}
@@ -108,10 +115,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
__kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offset,
__global ushort *src2, int src2_step,int src2_offset,
F alpha,F beta,F gama,
__global ushort *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
__global ushort *src2, int src2_step,int src2_offset,
F alpha,F beta,F gama,
__global ushort *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -122,34 +129,37 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) & (int)0xfffffff8);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
int4 tmp;
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
int4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
@@ -182,7 +192,10 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
@@ -190,26 +203,26 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) - (dst_align << 1 ));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
int4 tmp;
// int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
int4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
@@ -228,7 +241,7 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
__kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
__global int *src2, int src2_step,int src2_offset,
F alpha,F beta, F gama,
F alpha,F beta, F gama,
__global int *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
@@ -241,9 +254,12 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
x = x << 2;
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#define dst_align ((dst_offset >> bitOfInt) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> bitOfInt) & 3)
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
@@ -252,26 +268,26 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix));
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
int4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
int4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
int4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
int4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
float4 tmp;
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
float4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
@@ -291,7 +307,7 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
__kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset,
__global float *src2, int src2_step,int src2_offset,
F alpha,F beta, F gama,
F alpha,F beta, F gama,
__global float *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
@@ -304,7 +320,10 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@@ -313,32 +332,32 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
if(src1_index < 0)
{
float4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
float4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
if(src1_index < 0)
{
float4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
float4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
// double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
// float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
float4 tmp_data;
// float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
float4 tmp_data;
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama;
tmp_data.w = src1_data.w * alpha + src2_data.w * beta + gama;
// float4 tmp_data = convert_float4(tmp);
// float4 tmp_data = convert_float4(tmp);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.y : dst_data.y;
@@ -353,7 +372,7 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
#if defined (DOUBLE_SUPPORT)
__kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offset,
__global double *src2, int src2_step,int src2_offset,
F alpha,F beta, F gama,
F alpha,F beta, F gama,
__global double *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
@@ -366,7 +385,10 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@@ -375,25 +397,25 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
if(src1_index < 0)
{
double4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
double4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
// double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
double4 tmp_data;
if(src1_index < 0)
{
double4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
double4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
// double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
double4 tmp_data;
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama;

View File

@@ -44,9 +44,13 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar without mask**************************************/
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
@@ -59,7 +63,10 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -99,7 +106,10 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -131,7 +141,10 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -233,7 +246,10 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -378,7 +394,10 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -432,7 +451,10 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -458,16 +480,16 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src1_offset,
@@ -482,7 +504,10 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -508,16 +533,16 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_offset,
@@ -549,9 +574,9 @@ __kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_
int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1);
int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2);
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src1_offset,
@@ -583,9 +608,9 @@ __kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src
float tmp_data_1 = src1_data_1 + src2_data_1;
float tmp_data_2 = src1_data_2 + src2_data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
@@ -619,9 +644,9 @@ __kernel void arithm_s_add_C3_D6 (__global double *src1, int src1_step, int sr
double tmp_data_1 = src1_data_1 + src2_data_1;
double tmp_data_2 = src1_data_2 + src2_data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif

View File

@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar with mask**************************************/
@@ -61,7 +65,10 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -111,7 +118,10 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -146,7 +156,10 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -267,7 +280,10 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -443,7 +459,10 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -501,7 +520,10 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -530,16 +552,16 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
@@ -555,7 +577,10 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -584,16 +609,16 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
@@ -633,9 +658,9 @@ __kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
@@ -675,9 +700,9 @@ __kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_ste
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= data_2;
*((__global float *)((__global char *)dst + dst_index + 0))= data_0;
*((__global float *)((__global char *)dst + dst_index + 4))= data_1;
*((__global float *)((__global char *)dst + dst_index + 8))= data_2;
}
}
@@ -719,9 +744,9 @@ __kernel void arithm_s_add_with_mask_C3_D6 (__global double *src1, int src1_st
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= data_2;
*((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global double *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif

View File

@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -51,9 +55,9 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and without mask**************************************/
__kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -62,30 +66,33 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data & src2_data;
@@ -101,9 +108,9 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
__kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -112,7 +119,10 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -120,23 +130,23 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
char4 src1_data = vload4(0, src1 + src1_index_fix);
char4 src2_data = vload4(0, src2 + src2_index_fix);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
char4 src1_data = vload4(0, src1 + src1_index_fix);
char4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
char4 dst_data = *((__global char4 *)(dst + dst_index));
char4 tmp_data = src1_data & src2_data;
@@ -151,9 +161,9 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
__kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -163,7 +173,10 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -171,23 +184,23 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = src1_data & src2_data;
@@ -203,9 +216,9 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
__kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -215,7 +228,10 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -223,23 +239,23 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data = src1_data & src2_data;
@@ -255,9 +271,9 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
__kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -277,9 +293,9 @@ __kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1
}
__kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -300,9 +316,9 @@ __kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);

View File

@@ -43,18 +43,22 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with mask**************************************/
__kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -91,11 +98,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -132,11 +143,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
__kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -171,11 +186,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
__kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -198,8 +217,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data & src2_data;
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data & src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
@@ -210,11 +229,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -242,11 +262,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -274,12 +295,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -305,15 +326,15 @@ __kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_
}
}
#endif
__kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -347,11 +371,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -384,11 +412,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -413,11 +442,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -442,11 +472,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -471,11 +502,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -500,12 +532,13 @@ __kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -530,15 +563,15 @@ __kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
__kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -596,11 +632,12 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -657,11 +697,12 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -701,23 +745,24 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -757,23 +805,24 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -808,16 +857,17 @@ __kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -852,17 +902,18 @@ __kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -897,20 +948,20 @@ __kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
__kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -937,11 +988,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_and_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);

View File

@@ -42,19 +42,22 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -63,7 +66,10 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -86,9 +92,10 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -97,7 +104,10 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -119,9 +129,10 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -131,7 +142,10 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -150,9 +164,10 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -162,7 +177,10 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -181,9 +199,10 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -202,9 +221,10 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -232,11 +252,11 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step,
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -256,9 +276,10 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -290,9 +314,10 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -322,9 +350,10 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -343,9 +372,10 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -364,9 +394,10 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -384,9 +415,10 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -403,12 +435,13 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step,
char8 tmp_data = src1_data & src2_data;
*((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -428,9 +461,10 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -484,9 +521,10 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -539,9 +580,10 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -577,21 +622,22 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -627,21 +676,22 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -668,14 +718,15 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i
int tmp_data_1 = src1_data_1 & src2_data_1;
int tmp_data_2 = src1_data_2 & src2_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -702,15 +753,16 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step,
char4 tmp_data_1 = src1_data_1 & src2_data_1;
char4 tmp_data_2 = src1_data_2 & src2_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -737,15 +789,16 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i
short4 tmp_data_1 = src1_data_1 & src2_data_1;
short4 tmp_data_2 = src1_data_2 & src2_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -765,9 +818,10 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -786,9 +840,10 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -806,9 +861,10 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -826,9 +882,10 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -846,9 +903,10 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -869,9 +927,10 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -897,10 +956,10 @@ __kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, i
short4 tmp_data_2 = src1_data_2 & src2_data_2;
short4 tmp_data_3 = src1_data_3 & src2_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}

View File

@@ -42,20 +42,22 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with scalar with mask**************************************/
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -65,7 +67,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -90,10 +95,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -103,7 +109,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -127,10 +136,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -140,7 +150,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -161,10 +174,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -174,7 +188,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -195,10 +212,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -223,10 +241,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -252,10 +271,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -280,10 +300,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -293,7 +314,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -316,10 +340,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -329,7 +354,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -351,10 +379,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -378,10 +407,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -405,10 +435,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -432,10 +463,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int sr
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -461,10 +493,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -489,10 +522,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -502,7 +536,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -549,10 +586,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -562,7 +600,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -608,10 +649,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -621,7 +663,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -650,22 +695,23 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -675,7 +721,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -704,22 +753,23 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -753,15 +803,16 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int sr
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -795,16 +846,17 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int s
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -838,16 +890,17 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int sr
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -872,10 +925,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -899,10 +953,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -925,10 +980,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -951,10 +1007,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -977,10 +1034,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int sr
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1006,10 +1064,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);

View File

@@ -43,9 +43,12 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -61,25 +64,28 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = ~ src1_data;
/* if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
*/
/* if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
*/
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
@@ -91,8 +97,8 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
__kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -101,7 +107,10 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -124,8 +133,8 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
__kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -135,7 +144,10 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -159,8 +171,8 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
__kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -170,7 +182,10 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -194,8 +209,8 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
__kernel void arithm_bitwise_not_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);

View File

@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -51,9 +55,9 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or without mask**************************************/
__kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -62,29 +66,32 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data | src2_data;
@@ -99,9 +106,9 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
__kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -110,7 +117,10 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -135,9 +145,9 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
__kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -147,7 +157,10 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -173,9 +186,9 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
__kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -185,7 +198,10 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -211,9 +227,9 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
__kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -233,9 +249,9 @@ __kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_
}
__kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -256,9 +272,9 @@ __kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);

View File

@@ -43,18 +43,22 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with mask**************************************/
__kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -91,11 +98,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -132,11 +143,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
__kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -171,11 +186,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -198,8 +217,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data | src2_data;
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data | src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
@@ -210,11 +229,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -242,11 +262,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -273,13 +294,13 @@ __kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_s
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -308,12 +329,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -347,11 +371,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -384,11 +412,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -413,11 +442,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -442,11 +472,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -471,11 +502,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -501,11 +533,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -533,12 +566,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -596,11 +632,12 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -657,11 +697,12 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -701,23 +745,24 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -757,23 +805,24 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -808,16 +857,17 @@ __kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -852,17 +902,18 @@ __kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_s
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -897,20 +948,20 @@ __kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_s
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
__kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -937,11 +988,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_or_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);

View File

@@ -43,16 +43,21 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -61,7 +66,10 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -84,9 +92,10 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -95,7 +104,10 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -117,9 +129,10 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -129,7 +142,10 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -148,9 +164,10 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -160,7 +177,10 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -179,9 +199,10 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -200,9 +221,10 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -222,9 +244,10 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -245,10 +268,10 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -259,7 +282,10 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -280,9 +306,10 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -293,7 +320,10 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -313,9 +343,10 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -335,9 +366,10 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -358,8 +390,8 @@ __kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -378,9 +410,10 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -400,9 +433,10 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -423,9 +457,10 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -436,7 +471,10 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -480,9 +518,10 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -493,7 +532,10 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -536,9 +578,10 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -549,7 +592,10 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -575,21 +621,22 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -600,7 +647,10 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -626,21 +676,22 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -668,14 +719,15 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in
int tmp_data_1 = src1_data_1 | src2_data_1;
int tmp_data_2 = src1_data_2 | src2_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -700,15 +752,16 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i
char4 tmp_data_1 = src1_data_1 | src2_data_1;
char4 tmp_data_2 = src1_data_2 | src2_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -736,15 +789,16 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in
short4 tmp_data_1 = src1_data_1 | src2_data_1;
short4 tmp_data_2 = src1_data_2 | src2_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -765,9 +819,10 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -787,9 +842,10 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -808,9 +864,10 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step,
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -829,9 +886,10 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -850,9 +908,10 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -874,9 +933,10 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -903,10 +963,10 @@ __kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, in
short4 tmp_data_2 = src1_data_2 | src2_data_2;
short4 tmp_data_3 = src1_data_3 | src2_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}

View File

@@ -43,17 +43,21 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with scalar with mask**************************************/
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -64,7 +68,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -89,10 +96,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -103,7 +111,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -127,10 +138,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -141,7 +153,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -162,10 +177,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -176,7 +192,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -197,10 +216,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -226,10 +246,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int s
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -254,12 +275,12 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -285,10 +306,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -299,7 +321,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -322,10 +347,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -336,7 +362,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -358,10 +387,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -386,10 +416,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -414,10 +445,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int s
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -442,10 +474,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -463,17 +496,18 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int sr
char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
char8 src_data2 = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
char8 data = src_data1 | src_data2;
char8 data = src_data1 | src_data2;
data = mask_data ? data : dst_data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@@ -499,10 +533,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -513,7 +548,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -560,10 +598,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -574,7 +613,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -620,10 +662,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -634,7 +677,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -663,22 +709,23 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -689,7 +736,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -718,22 +768,23 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -768,15 +819,16 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -811,17 +863,18 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int sr
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -855,16 +908,17 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@@ -890,10 +944,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@@ -918,10 +973,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@@ -945,10 +1001,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@@ -972,10 +1029,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int s
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@@ -999,10 +1057,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@@ -1029,10 +1088,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);

View File

@@ -43,17 +43,20 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor without mask**************************************/
__kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -62,7 +65,10 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -70,23 +76,23 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
uchar4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data ^ src2_data;
@@ -101,9 +107,9 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
__kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -112,7 +118,10 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@@ -120,23 +129,23 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
char4 src1_data = vload4(0, src1 + src1_index_fix);
char4 src2_data = vload4(0, src2 + src2_index_fix);
if(src1_index < 0)
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
char4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
char4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
char4 dst_data = *((__global char4 *)(dst + dst_index));
char4 tmp_data = src1_data ^ src2_data;
@@ -151,9 +160,9 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
__kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -163,7 +172,10 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -171,23 +183,23 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
ushort4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = src1_data ^ src2_data;
@@ -203,9 +215,9 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
__kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -215,7 +227,10 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@@ -223,25 +238,25 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
if(src1_index < 0)
{
short4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
}
if(src2_index < 0)
{
short4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
@@ -259,9 +274,9 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
__kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -281,9 +296,9 @@ __kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1
}
__kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -301,12 +316,11 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);

View File

@@ -43,18 +43,22 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor with mask**************************************/
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -91,11 +98,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -132,11 +143,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -171,11 +186,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -198,8 +217,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data ^ src2_data;
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
short2 tmp_data = src1_data ^ src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
@@ -210,11 +229,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -242,11 +262,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -273,13 +294,13 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -308,12 +329,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -347,11 +371,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -384,11 +412,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -413,11 +442,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -442,11 +472,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -471,11 +502,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -501,11 +533,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -533,12 +566,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_
#endif
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -596,11 +632,12 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -657,11 +697,12 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -701,23 +745,24 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -757,23 +805,24 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -808,16 +857,17 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -852,17 +902,18 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -897,20 +948,20 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
*((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
*((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
*((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -937,11 +988,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
int x = get_global_id(0);

View File

@@ -42,19 +42,21 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************xor with scalar without mask**************************************/
__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -63,7 +65,10 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -86,9 +91,10 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -97,7 +103,10 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
@@ -119,9 +128,10 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -131,7 +141,10 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -150,9 +163,10 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -162,7 +176,10 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -181,9 +198,10 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -202,9 +220,10 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -234,9 +253,10 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step,
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -256,9 +276,10 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -290,9 +314,10 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -322,9 +350,10 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -343,9 +372,10 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -364,9 +394,10 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -384,9 +415,10 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -403,12 +435,13 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step,
char8 tmp_data = src1_data ^ src2_data;
*((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -428,9 +461,10 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -484,9 +521,10 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -539,9 +580,10 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -577,21 +622,22 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
@@ -627,21 +676,22 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
? tmp_data_1.x : data_1.x;
? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_1.y : data_1.y;
? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
? tmp_data_2.xy : data_2.xy;
? tmp_data_2.xy : data_2.xy;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
*((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
*((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -668,14 +718,15 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i
int tmp_data_1 = src1_data_1 ^ src2_data_1;
int tmp_data_2 = src1_data_2 ^ src2_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -702,15 +753,16 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step,
char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
*((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
*((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
*((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -737,15 +789,16 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i
short4 tmp_data_1 = src1_data_1 ^ src2_data_1;
short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -765,9 +818,10 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -786,9 +840,10 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -806,9 +861,10 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -826,9 +882,10 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -846,9 +903,10 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -869,9 +927,10 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
__kernel void arithm_s_bitwise_xor_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
@@ -897,11 +956,11 @@ __kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, i
short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
short4 tmp_data_3 = src1_data_3 ^ src2_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
*((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
*((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}
#endif
#endif

Some files were not shown because too many files have changed in this diff Show More