Merge remote-tracking branch 'origin/2.4' into merge-2.4
Conflicts: CMakeLists.txt cmake/OpenCVDetectCUDA.cmake doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst modules/core/src/cmdparser.cpp modules/gpu/CMakeLists.txt modules/gpu/doc/introduction.rst modules/gpu/perf/perf_video.cpp modules/highgui/doc/reading_and_writing_images_and_video.rst modules/ocl/src/cl_context.cpp modules/video/include/opencv2/video/background_segm.hpp samples/cpp/image_sequence.cpp samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp samples/python/chessboard.py samples/python/cvutils.py samples/python/demhist.py samples/python/dft.py samples/python/distrans.py samples/python/edge.py samples/python/ffilldemo.py samples/python/fitellipse.py samples/python/houghlines.py samples/python/inpaint.py samples/python/logpolar.py samples/python/morphology.py samples/python/numpy_array.py samples/python/watershed.py
This commit is contained in:
@@ -44,6 +44,8 @@
|
||||
#ifndef __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
|
||||
#define __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
|
||||
|
||||
#include "opencv2/ocl.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
|
@@ -189,11 +189,8 @@ static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __deviceSelected = false;
|
||||
static bool selectOpenCLDevice()
|
||||
{
|
||||
__deviceSelected = true;
|
||||
|
||||
std::string platform;
|
||||
std::vector<std::string> deviceTypes;
|
||||
std::string deviceName;
|
||||
@@ -528,26 +525,38 @@ private:
|
||||
|
||||
static ContextImpl* currentContext = NULL;
|
||||
|
||||
static bool __deviceSelected = false;
|
||||
|
||||
Context* Context::getContext()
|
||||
{
|
||||
if (currentContext == NULL)
|
||||
{
|
||||
if (!__initialized || !__deviceSelected)
|
||||
static bool defaultInitiaization = false;
|
||||
if (!defaultInitiaization)
|
||||
{
|
||||
cv::AutoLock lock(getInitializationMutex());
|
||||
if (!__initialized)
|
||||
try
|
||||
{
|
||||
if (initializeOpenCLDevices() == 0)
|
||||
if (!__initialized)
|
||||
{
|
||||
CV_Error(Error::OpenCLInitError, "OpenCL not available");
|
||||
if (initializeOpenCLDevices() == 0)
|
||||
{
|
||||
CV_Error(Error::OpenCLInitError, "OpenCL not available");
|
||||
}
|
||||
}
|
||||
if (!__deviceSelected)
|
||||
{
|
||||
if (!selectOpenCLDevice())
|
||||
{
|
||||
CV_Error(Error::OpenCLInitError, "Can't select OpenCL device");
|
||||
}
|
||||
}
|
||||
defaultInitiaization = true;
|
||||
}
|
||||
if (!__deviceSelected)
|
||||
catch (...)
|
||||
{
|
||||
if (!selectOpenCLDevice())
|
||||
{
|
||||
CV_Error(Error::OpenCLInitError, "Can't select OpenCL device");
|
||||
}
|
||||
defaultInitiaization = true;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
CV_Assert(currentContext != NULL);
|
||||
@@ -744,10 +753,16 @@ int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, co
|
||||
|
||||
void setDevice(const DeviceInfo* info)
|
||||
{
|
||||
if (!__deviceSelected)
|
||||
try
|
||||
{
|
||||
ContextImpl::setContext(info);
|
||||
__deviceSelected = true;
|
||||
|
||||
ContextImpl::setContext(info);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
__deviceSelected = true;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool supportsFeature(FEATURE_TYPE featureType)
|
||||
|
@@ -192,6 +192,7 @@ void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
|
||||
clFinish(getClCommandQueue(ctx));
|
||||
#endif
|
||||
CheckBuffers data(mainBuffer, size, widthInBytes, height);
|
||||
cv::AutoLock lock(getInitializationMutex());
|
||||
__check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
|
||||
}
|
||||
#endif
|
||||
@@ -253,10 +254,17 @@ void openCLFree(void *devPtr)
|
||||
bool failBefore = false, failAfter = false;
|
||||
#endif
|
||||
CheckBuffers data;
|
||||
std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
|
||||
if (i != __check_buffers.end())
|
||||
{
|
||||
data = i->second;
|
||||
cv::AutoLock lock(getInitializationMutex());
|
||||
std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
|
||||
if (i != __check_buffers.end())
|
||||
{
|
||||
data = i->second;
|
||||
__check_buffers.erase(i);
|
||||
}
|
||||
}
|
||||
if (data.mainBuffer != NULL)
|
||||
{
|
||||
#ifdef CHECK_MEMORY_CORRUPTION
|
||||
Context* ctx = Context::getContext();
|
||||
std::vector<uchar> checkBefore(__memory_corruption_guard_bytes);
|
||||
@@ -286,7 +294,6 @@ void openCLFree(void *devPtr)
|
||||
clFinish(getClCommandQueue(ctx));
|
||||
#endif
|
||||
openCLSafeCall(clReleaseMemObject(data.mainBuffer));
|
||||
__check_buffers.erase(i);
|
||||
}
|
||||
#if defined(CHECK_MEMORY_CORRUPTION)
|
||||
if (failBefore)
|
||||
|
@@ -923,7 +923,7 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
//use known local data stride to precalulate indexes
|
||||
int DATA_SIZE_X = (localThreads[0]+cascade->orig_window_size.width);
|
||||
// check that maximal value is less than maximal unsigned short
|
||||
assert(DATA_SIZE_X*cascade->orig_window_size.height+cascade->orig_window_size.width < USHRT_MAX);
|
||||
assert(DATA_SIZE_X*cascade->orig_window_size.height+cascade->orig_window_size.width < (int)USHRT_MAX);
|
||||
for(int i = 0;i<nodenum;++i)
|
||||
{//process each node from classifier
|
||||
struct NodePK
|
||||
|
@@ -42,6 +42,10 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
|
||||
# pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
|
@@ -47,6 +47,13 @@
|
||||
/////////////////////////////////Macro for border type////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined (DOUBLE_SUPPORT) && defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define FPTYPE double
|
||||
#else
|
||||
#define FPTYPE float
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
#elif defined BORDER_REPLICATE
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
@@ -116,7 +123,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
|
||||
int dst_startY = (gY << 1) + dst_y_off;
|
||||
|
||||
float dx_data[ksY+1],dy_data[ksY+1], data[3][ksY+1];
|
||||
__local float temp[6][THREADS];
|
||||
__local FPTYPE temp[6][THREADS];
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
for (int i=0; i < ksY+1; i++)
|
||||
@@ -136,7 +143,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
|
||||
data[2][i] = dy_data[i] * dy_data[i];
|
||||
}
|
||||
#else
|
||||
int clamped_col = min(dst_cols, col);
|
||||
int clamped_col = min(2*dst_cols, col);
|
||||
for (int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col;
|
||||
@@ -154,7 +161,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
|
||||
data[2][i] = dy_data[i] * dy_data[i];
|
||||
}
|
||||
#endif
|
||||
float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
|
||||
FPTYPE sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
|
||||
for (int i=1; i < ksY; i++)
|
||||
{
|
||||
sum0 += data[0][i];
|
||||
@@ -162,16 +169,16 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
|
||||
sum2 += data[2][i];
|
||||
}
|
||||
|
||||
float sum01 = sum0 + data[0][0];
|
||||
float sum02 = sum0 + data[0][ksY];
|
||||
FPTYPE sum01 = sum0 + data[0][0];
|
||||
FPTYPE sum02 = sum0 + data[0][ksY];
|
||||
temp[0][col] = sum01;
|
||||
temp[1][col] = sum02;
|
||||
float sum11 = sum1 + data[1][0];
|
||||
float sum12 = sum1 + data[1][ksY];
|
||||
FPTYPE sum11 = sum1 + data[1][0];
|
||||
FPTYPE sum12 = sum1 + data[1][ksY];
|
||||
temp[2][col] = sum11;
|
||||
temp[3][col] = sum12;
|
||||
float sum21 = sum2 + data[2][0];
|
||||
float sum22 = sum2 + data[2][ksY];
|
||||
FPTYPE sum21 = sum2 + data[2][0];
|
||||
FPTYPE sum22 = sum2 + data[2][ksY];
|
||||
temp[4][col] = sum21;
|
||||
temp[5][col] = sum22;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -184,8 +191,14 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
|
||||
int till = (ksX + 1)%2;
|
||||
float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
for (int k=0; k<6; k++)
|
||||
{
|
||||
FPTYPE temp_sum = 0;
|
||||
for (int i=-anX; i<=anX - till; i++)
|
||||
tmp_sum[k] += temp[k][col+i];
|
||||
{
|
||||
temp_sum += temp[k][col+i];
|
||||
}
|
||||
tmp_sum[k] = temp_sum;
|
||||
}
|
||||
|
||||
if (posX < dst_cols && (posY) < dst_rows)
|
||||
{
|
||||
|
@@ -254,8 +254,28 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
|
||||
}
|
||||
|
||||
////////////////////////////////cornerHarris//////////////////////////////////////////
|
||||
struct CornerHarris :
|
||||
public ImgprocTestBase
|
||||
{
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
Mat whole, roi;
|
||||
gdst_whole.download(whole);
|
||||
gdst_roi.download(roi);
|
||||
|
||||
typedef CornerTestBase CornerHarris;
|
||||
absdiff(whole, dst_whole, whole);
|
||||
absdiff(roi, dst_roi, roi);
|
||||
|
||||
divide(whole, dst_whole, whole);
|
||||
divide(roi, dst_roi, roi);
|
||||
|
||||
absdiff(dst_whole, dst_whole, dst_whole);
|
||||
absdiff(dst_roi, dst_roi, dst_roi);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
|
||||
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(CornerHarris, Mat)
|
||||
{
|
||||
@@ -269,7 +289,7 @@ OCL_TEST_P(CornerHarris, Mat)
|
||||
cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
|
||||
ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
|
||||
|
||||
Near(1e-5, true);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user