Merge branch '2.4' of https://github.com/Itseez/opencv into 2.4_pyrup_fix
This commit is contained in:
commit
1ecc765903
3
3rdparty/libjasper/CMakeLists.txt
vendored
3
3rdparty/libjasper/CMakeLists.txt
vendored
@ -23,7 +23,8 @@ if(WIN32 AND NOT MINGW)
|
||||
add_definitions(-DJAS_WIN_MSVC_BUILD)
|
||||
endif(WIN32 AND NOT MINGW)
|
||||
|
||||
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
|
||||
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes
|
||||
-Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
|
||||
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
|
||||
ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005
|
||||
|
||||
|
@ -594,12 +594,15 @@ endif()
|
||||
status("")
|
||||
status(" GUI: ")
|
||||
|
||||
if(HAVE_QT)
|
||||
if(HAVE_QT5)
|
||||
status(" QT 5.x:" HAVE_QT THEN "YES (ver ${Qt5Core_VERSION_STRING})" ELSE NO)
|
||||
status(" QT OpenGL support:" HAVE_QT_OPENGL THEN "YES (${Qt5OpenGL_LIBRARIES} ${Qt5OpenGL_VERSION_STRING})" ELSE NO)
|
||||
elseif(HAVE_QT)
|
||||
status(" QT 4.x:" HAVE_QT THEN "YES (ver ${QT_VERSION_MAJOR}.${QT_VERSION_MINOR}.${QT_VERSION_PATCH} ${QT_EDITION})" ELSE NO)
|
||||
status(" QT OpenGL support:" HAVE_QT_OPENGL THEN "YES (${QT_QTOPENGL_LIBRARY})" ELSE NO)
|
||||
else()
|
||||
if(DEFINED WITH_QT)
|
||||
status(" QT 4.x:" NO)
|
||||
status(" QT:" NO)
|
||||
endif()
|
||||
if(DEFINED WITH_WIN32UI)
|
||||
status(" Win32 UI:" HAVE_WIN32UI THEN YES ELSE NO)
|
||||
|
@ -176,7 +176,8 @@ macro(android_get_compatible_target VAR)
|
||||
endmacro()
|
||||
|
||||
unset(__android_project_chain CACHE)
|
||||
#add_android_project(target_name ${path} NATIVE_DEPS opencv_core LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 11)
|
||||
|
||||
# add_android_project(target_name ${path} NATIVE_DEPS opencv_core LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 11)
|
||||
macro(add_android_project target path)
|
||||
# parse arguments
|
||||
set(android_proj_arglist NATIVE_DEPS LIBRARY_DEPS SDK_TARGET IGNORE_JAVA IGNORE_MANIFEST)
|
||||
@ -212,6 +213,16 @@ macro(add_android_project target path)
|
||||
ocv_check_dependencies(${android_proj_NATIVE_DEPS} opencv_java)
|
||||
endif()
|
||||
|
||||
if(EXISTS "${path}/jni/Android.mk" )
|
||||
# find if native_app_glue is used
|
||||
file(STRINGS "${path}/jni/Android.mk" NATIVE_APP_GLUE REGEX ".*(call import-module,android/native_app_glue)" )
|
||||
if(NATIVE_APP_GLUE)
|
||||
if(ANDROID_NATIVE_API_LEVEL LESS 9 OR NOT EXISTS "${ANDROID_NDK}/sources/android/native_app_glue")
|
||||
set(OCV_DEPENDENCIES_FOUND FALSE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(OCV_DEPENDENCIES_FOUND AND android_proj_sdk_target AND ANDROID_EXECUTABLE AND ANT_EXECUTABLE AND ANDROID_TOOLS_Pkg_Revision GREATER 13 AND EXISTS "${path}/${ANDROID_MANIFEST_FILE}")
|
||||
|
||||
project(${target})
|
||||
@ -268,9 +279,6 @@ macro(add_android_project target path)
|
||||
file(STRINGS "${path}/jni/Android.mk" JNI_LIB_NAME REGEX "LOCAL_MODULE[ ]*:=[ ]*.*" )
|
||||
string(REGEX REPLACE "LOCAL_MODULE[ ]*:=[ ]*([a-zA-Z_][a-zA-Z_0-9]*)[ ]*" "\\1" JNI_LIB_NAME "${JNI_LIB_NAME}")
|
||||
|
||||
# find using of native app glue to determine native activity
|
||||
file(STRINGS "${path}/jni/Android.mk" NATIVE_APP_GLUE REGEX ".*(call import-module,android/native_app_glue)" )
|
||||
|
||||
if(JNI_LIB_NAME)
|
||||
ocv_include_modules_recurse(${android_proj_NATIVE_DEPS})
|
||||
ocv_include_directories("${path}/jni")
|
||||
@ -291,9 +299,9 @@ macro(add_android_project target path)
|
||||
)
|
||||
|
||||
get_target_property(android_proj_jni_location "${JNI_LIB_NAME}" LOCATION)
|
||||
if (NOT (CMAKE_BUILD_TYPE MATCHES "debug"))
|
||||
add_custom_command(TARGET ${JNI_LIB_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-unneeded "${android_proj_jni_location}")
|
||||
endif()
|
||||
if (NOT (CMAKE_BUILD_TYPE MATCHES "debug"))
|
||||
add_custom_command(TARGET ${JNI_LIB_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-unneeded "${android_proj_jni_location}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -101,7 +101,7 @@ endif()
|
||||
|
||||
if(MSVC64 OR MINGW64)
|
||||
set(X86_64 1)
|
||||
elseif(MSVC AND NOT CMAKE_CROSSCOMPILING)
|
||||
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(X86_64 1)
|
||||
|
@ -13,12 +13,31 @@ if(WITH_WIN32UI)
|
||||
endif(WITH_WIN32UI)
|
||||
|
||||
# --- QT4 ---
|
||||
ocv_clear_vars(HAVE_QT)
|
||||
ocv_clear_vars(HAVE_QT HAVE_QT5)
|
||||
if(WITH_QT)
|
||||
find_package(Qt4)
|
||||
if(QT4_FOUND)
|
||||
set(HAVE_QT TRUE)
|
||||
add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 2.8.3 AND NOT WITH_QT EQUAL 4)
|
||||
find_package(Qt5Core)
|
||||
find_package(Qt5Gui)
|
||||
find_package(Qt5Widgets)
|
||||
find_package(Qt5Test)
|
||||
find_package(Qt5Concurrent)
|
||||
if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND)
|
||||
set(HAVE_QT5 ON)
|
||||
set(HAVE_QT ON)
|
||||
add_definitions(-DHAVE_QT)
|
||||
find_package(Qt5OpenGL)
|
||||
if(Qt5OpenGL_FOUND)
|
||||
set(QT_QTOPENGL_FOUND ON)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_QT)
|
||||
find_package(Qt4)
|
||||
if(QT4_FOUND)
|
||||
set(HAVE_QT TRUE)
|
||||
add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -1477,6 +1477,6 @@ The function reconstructs 3-dimensional points (in homogeneous coordinates) by u
|
||||
|
||||
.. [HH08] Hirschmuller, H. Stereo Processing by Semiglobal Matching and Mutual Information, PAMI(30), No. 2, February 2008, pp. 328-341.
|
||||
|
||||
.. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://gregslabaugh.name/publications/euler.pdf
|
||||
.. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://www.soi.city.ac.uk/~sbbh653/publications/euler.pdf (verified: 2013-04-15)
|
||||
|
||||
.. [Zhang2000] Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
|
||||
|
@ -2855,8 +2855,9 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
|
||||
|
||||
if( _mean.data )
|
||||
{
|
||||
CV_Assert( _mean.size() == mean_sz );
|
||||
CV_Assert( _mean.size() == mean_sz );
|
||||
_mean.convertTo(mean, ctype);
|
||||
covar_flags |= CV_COVAR_USE_AVG;
|
||||
}
|
||||
|
||||
calcCovarMatrix( data, covar, mean, covar_flags, ctype );
|
||||
|
@ -42,7 +42,6 @@ template <typename Distance>
|
||||
void find_nearest(const Matrix<typename Distance::ElementType>& dataset, typename Distance::ElementType* query, int* matches, int nn,
|
||||
int skip = 0, Distance distance = Distance())
|
||||
{
|
||||
typedef typename Distance::ElementType ElementType;
|
||||
typedef typename Distance::ResultType DistanceType;
|
||||
int n = nn + skip;
|
||||
|
||||
|
@ -70,7 +70,26 @@ set(highgui_srcs
|
||||
|
||||
file(GLOB highgui_ext_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
|
||||
|
||||
if(HAVE_QT)
|
||||
if(HAVE_QT5)
|
||||
set(CMAKE_AUTOMOC ON)
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
QT5_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
|
||||
list(APPEND highgui_srcs src/window_QT.cpp src/window_QT.h ${_RCC_OUTFILES})
|
||||
|
||||
foreach(dt5_dep Core Gui Widgets Test Concurrent)
|
||||
add_definitions(${Qt5${dt5_dep}_DEFINITIONS})
|
||||
include_directories(${Qt5${dt5_dep}_INCLUDE_DIRS})
|
||||
list(APPEND HIGHGUI_LIBRARIES ${Qt5${dt5_dep}_LIBRARIES})
|
||||
endforeach()
|
||||
|
||||
if(HAVE_QT_OPENGL)
|
||||
add_definitions(${Qt5OpenGL_DEFINITIONS})
|
||||
include_directories(${Qt5OpenGL_INCLUDE_DIRS})
|
||||
list(APPEND HIGHGUI_LIBRARIES ${Qt5OpenGL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
elseif(HAVE_QT)
|
||||
if (HAVE_QT_OPENGL)
|
||||
set(QT_USE_QTOPENGL TRUE)
|
||||
endif()
|
||||
|
@ -1665,6 +1665,17 @@ static int icvSetPropertyCAM_V4L(CvCaptureCAM_V4L* capture, int property_id, dou
|
||||
width = height = 0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_FPS:
|
||||
struct v4l2_streamparm setfps;
|
||||
memset (&setfps, 0, sizeof(struct v4l2_streamparm));
|
||||
setfps.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
setfps.parm.capture.timeperframe.numerator = 1;
|
||||
setfps.parm.capture.timeperframe.denominator = value;
|
||||
if (xioctl (capture->deviceHandle, VIDIOC_S_PARM, &setfps) < 0){
|
||||
fprintf(stderr, "HIGHGUI ERROR: V4L: Unable to set camera FPS\n");
|
||||
retval=0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
retval = icvSetControl(capture, property_id, value);
|
||||
}
|
||||
|
@ -52,6 +52,11 @@
|
||||
#include <stdio.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
// the following defines are a hack to avoid multiple problems with frame ponter handling and setjmp
|
||||
// see http://gcc.gnu.org/ml/gcc/2011-10/msg00324.html for some details
|
||||
#define mingw_getsp(...) 0
|
||||
#define __builtin_frame_address(...) 0
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
#define XMD_H // prevent redefinition of INT32
|
||||
|
@ -73,6 +73,11 @@
|
||||
#pragma warning( disable: 4611 )
|
||||
#endif
|
||||
|
||||
// the following defines are a hack to avoid multiple problems with frame ponter handling and setjmp
|
||||
// see http://gcc.gnu.org/ml/gcc/2011-10/msg00324.html for some details
|
||||
#define mingw_getsp(...) 0
|
||||
#define __builtin_frame_address(...) 0
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
|
@ -48,13 +48,13 @@
|
||||
#endif
|
||||
|
||||
#include <QAbstractEventDispatcher>
|
||||
#include <QtGui/QApplication>
|
||||
#include <QApplication>
|
||||
#include <QFile>
|
||||
#include <QPushButton>
|
||||
#include <QtGui/QGraphicsView>
|
||||
#include <QGraphicsView>
|
||||
#include <QSizePolicy>
|
||||
#include <QInputDialog>
|
||||
#include <QtGui/QBoxLayout>
|
||||
#include <QBoxLayout>
|
||||
#include <QSettings>
|
||||
#include <qtimer.h>
|
||||
#include <QtConcurrentRun>
|
||||
@ -78,7 +78,7 @@
|
||||
#include <QRadioButton>
|
||||
#include <QButtonGroup>
|
||||
#include <QMenu>
|
||||
#include <QtTest/QTest>
|
||||
#include <QTest>
|
||||
|
||||
//start private enum
|
||||
enum { CV_MODE_NORMAL = 0, CV_MODE_OPENGL = 1 };
|
||||
|
@ -406,7 +406,7 @@ Finds the convex hull of a point set.
|
||||
|
||||
:param hull_storage: Output memory storage in the old API (``cvConvexHull2`` returns a sequence containing the convex hull points or their indices).
|
||||
|
||||
:param clockwise: Orientation flag. If it is true, the output convex hull is oriented clockwise. Otherwise, it is oriented counter-clockwise. The usual screen coordinate system is assumed so that the origin is at the top-left corner, x axis is oriented to the right, and y axis is oriented downwards.
|
||||
:param clockwise: Orientation flag. If it is true, the output convex hull is oriented clockwise. Otherwise, it is oriented counter-clockwise. The assumed coordinate system has its X axis pointing to the right, and its Y axis pointing upwards.
|
||||
|
||||
:param orientation: Convex hull orientation parameter in the old API, ``CV_CLOCKWISE`` or ``CV_COUNTERCLOCKWISE``.
|
||||
|
||||
|
@ -93,7 +93,6 @@ icvFloodFill_CnIR( uchar* pImage, int step, CvSize roi, CvPoint seed,
|
||||
_Tp newVal, CvConnectedComp* region, int flags,
|
||||
std::vector<CvFFillSegment>* buffer )
|
||||
{
|
||||
typedef typename cv::DataType<_Tp>::channel_type _CTp;
|
||||
_Tp* img = (_Tp*)(pImage + step * seed.y);
|
||||
int i, L, R;
|
||||
int area = 0;
|
||||
@ -252,7 +251,6 @@ icvFloodFillGrad_CnIR( uchar* pImage, int step, uchar* pMask, int maskStep,
|
||||
CvConnectedComp* region, int flags,
|
||||
std::vector<CvFFillSegment>* buffer )
|
||||
{
|
||||
typedef typename cv::DataType<_Tp>::channel_type _CTp;
|
||||
_Tp* img = (_Tp*)(pImage + step*seed.y);
|
||||
uchar* mask = (pMask += maskStep + 1) + maskStep*seed.y;
|
||||
int i, L, R;
|
||||
|
@ -1219,8 +1219,6 @@ static void resizeGeneric_( const Mat& src, Mat& dst,
|
||||
const int* yofs, const void* _beta,
|
||||
int xmin, int xmax, int ksize )
|
||||
{
|
||||
typedef typename HResize::value_type T;
|
||||
typedef typename HResize::buf_type WT;
|
||||
typedef typename HResize::alpha_type AT;
|
||||
|
||||
const AT* beta = (const AT*)_beta;
|
||||
|
@ -61,7 +61,7 @@ protected:
|
||||
{
|
||||
int ObjNum = m_TrackList.GetBlobNum();
|
||||
int i;
|
||||
char video_name[1024];
|
||||
char video_name[1024+1];
|
||||
char* struct_name = NULL;
|
||||
CvFileStorage* storage = cvOpenFileStorage(m_pFileName,NULL,CV_STORAGE_WRITE_TEXT);
|
||||
|
||||
|
@ -117,10 +117,10 @@ class CvKDTreeWrap : public CvFeatureTree {
|
||||
CvMat* results) {
|
||||
int rn = results->rows * results->cols;
|
||||
std::vector<int> inbounds;
|
||||
dispatch_cvtype(mat, ((__treetype*)data)->
|
||||
find_ortho_range((typename __treetype::scalar_type*)bounds_min->data.ptr,
|
||||
assert(CV_MAT_DEPTH(mat->type) == CV_32F || CV_MAT_DEPTH(mat->type) == CV_64F);
|
||||
((__treetype*)data)->find_ortho_range((typename __treetype::scalar_type*)bounds_min->data.ptr,
|
||||
(typename __treetype::scalar_type*)bounds_max->data.ptr,
|
||||
inbounds));
|
||||
inbounds);
|
||||
std::copy(inbounds.begin(),
|
||||
inbounds.begin() + std::min((int)inbounds.size(), rn),
|
||||
(int*) results->data.ptr);
|
||||
|
@ -802,6 +802,44 @@ namespace cv
|
||||
int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
|
||||
};
|
||||
|
||||
class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier
|
||||
{
|
||||
public:
|
||||
OclCascadeClassifierBuf() :
|
||||
m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
|
||||
|
||||
~OclCascadeClassifierBuf() {}
|
||||
|
||||
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
|
||||
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
|
||||
Size minSize = Size(), Size maxSize = Size());
|
||||
void release();
|
||||
|
||||
private:
|
||||
void Init(const int rows, const int cols, double scaleFactor, int flags,
|
||||
const int outputsz, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize);
|
||||
void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
|
||||
void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
|
||||
const double scaleFactor, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize);
|
||||
void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
|
||||
|
||||
int m_rows;
|
||||
int m_cols;
|
||||
int m_flags;
|
||||
int m_loopcount;
|
||||
int m_nodenum;
|
||||
bool findBiggestObject;
|
||||
bool initialized;
|
||||
double m_scaleFactor;
|
||||
Size m_minSize;
|
||||
Size m_maxSize;
|
||||
vector<CvSize> sizev;
|
||||
vector<float> scalev;
|
||||
oclMat gimg1, gsum, gsqsum;
|
||||
void * buffers;
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////// Pyramid /////////////////////////////////////
|
||||
@ -1731,6 +1769,44 @@ namespace cv
|
||||
std::vector<oclMat> datas;
|
||||
oclMat out;
|
||||
};
|
||||
class CV_EXPORTS StereoConstantSpaceBP
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 128 };
|
||||
enum { DEFAULT_ITERS = 8 };
|
||||
enum { DEFAULT_LEVELS = 4 };
|
||||
enum { DEFAULT_NR_PLANE = 4 };
|
||||
static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
|
||||
explicit StereoConstantSpaceBP(
|
||||
int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int nr_plane = DEFAULT_NR_PLANE,
|
||||
int msg_type = CV_32F);
|
||||
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
|
||||
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
|
||||
int min_disp_th = 0,
|
||||
int msg_type = CV_32F);
|
||||
void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
|
||||
int ndisp;
|
||||
int iters;
|
||||
int levels;
|
||||
int nr_plane;
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
int min_disp_th;
|
||||
int msg_type;
|
||||
bool use_local_init_data_cost;
|
||||
private:
|
||||
oclMat u[2], d[2], l[2], r[2];
|
||||
oclMat disp_selected_pyr[2];
|
||||
oclMat data_cost;
|
||||
oclMat data_cost_selected;
|
||||
oclMat temp;
|
||||
oclMat out;
|
||||
};
|
||||
}
|
||||
}
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200
|
||||
|
@ -65,12 +65,12 @@ namespace cv
|
||||
static const int OPT_SIZE = 100;
|
||||
|
||||
static const char * T_ARR [] = {
|
||||
"uchar",
|
||||
"char",
|
||||
"ushort",
|
||||
"short",
|
||||
"int",
|
||||
"float -D T_FLOAT",
|
||||
"uchar",
|
||||
"char",
|
||||
"ushort",
|
||||
"short",
|
||||
"int",
|
||||
"float -D T_FLOAT",
|
||||
"double"};
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
|
||||
@ -86,8 +86,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
T_ARR[query.depth()], distType, block_size, m_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -128,8 +128,8 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
T_ARR[query.depth()], distType, block_size);
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
@ -171,8 +171,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
T_ARR[query.depth()], distType, block_size, m_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -212,8 +212,8 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
T_ARR[query.depth()], distType, block_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -312,8 +312,8 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
T_ARR[query.depth()], distType, block_size, m_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -348,8 +348,8 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
T_ARR[query.depth()], distType, block_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -384,8 +384,8 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
T_ARR[query.depth()], distType, block_size, m_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -420,8 +420,8 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
char opt [OPT_SIZE] = "";
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
sprintf(opt,
|
||||
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
T_ARR[query.depth()], distType, block_size);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
@ -561,7 +561,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return;
|
||||
|
||||
|
||||
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
|
||||
CV_Assert(train.cols == query.cols && train.type() == query.type());
|
||||
|
||||
@ -673,7 +673,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
|
||||
return;
|
||||
|
||||
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
|
||||
|
||||
|
||||
const int nQuery = query.rows;
|
||||
|
||||
ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
|
||||
@ -845,8 +845,8 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer
|
||||
if (query.empty() || trainCollection.empty())
|
||||
return;
|
||||
|
||||
typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
|
||||
const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
|
||||
// typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
|
||||
// const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
|
||||
|
||||
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
|
||||
|
||||
@ -993,7 +993,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector<
|
||||
|
||||
// radiusMatchSingle
|
||||
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
|
||||
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
|
||||
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return;
|
||||
@ -1095,9 +1095,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &qu
|
||||
if (query.empty() || empty())
|
||||
return;
|
||||
|
||||
#if 0
|
||||
typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
|
||||
const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
|
||||
#if 0
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
|
@ -60,7 +60,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||
const oclMat &src3, double beta, oclMat &dst, int flags)
|
||||
{
|
||||
CV_Assert(src1.cols == src2.rows &&
|
||||
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
||||
(src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols)));
|
||||
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
||||
if(!src3.empty())
|
||||
{
|
||||
|
@ -20,6 +20,7 @@
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Wu Xinglong, wxl370@126.com
|
||||
// Wang Yao, bitwangyaoyao@gmail.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -145,7 +146,8 @@ typedef struct
|
||||
int imgoff;
|
||||
float factor;
|
||||
} detect_piramid_info;
|
||||
#ifdef WIN32
|
||||
|
||||
#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
|
||||
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
|
||||
typedef _ALIGNED_ON(128) struct GpuHidHaarFeature
|
||||
{
|
||||
@ -842,15 +844,13 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
|
||||
} /* j */
|
||||
}
|
||||
}
|
||||
|
||||
CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemStorage *storage, double scaleFactor,
|
||||
int minNeighbors, int flags, CvSize minSize, CvSize maxSize)
|
||||
{
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
|
||||
//double alltime = (double)cvGetTickCount();
|
||||
//double t = (double)cvGetTickCount();
|
||||
const double GROUP_EPS = 0.2;
|
||||
oclMat gtemp, gsum1, gtilted1, gsqsum1, gnormImg, gsumcanny;
|
||||
CvSeq *result_seq = 0;
|
||||
cv::Ptr<CvMemStorage> temp_storage;
|
||||
|
||||
@ -861,7 +861,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
int datasize=0;
|
||||
int totalclassifier=0;
|
||||
|
||||
//void *out;
|
||||
GpuHidHaarClassifierCascade *gcascade;
|
||||
GpuHidHaarStageClassifier *stage;
|
||||
GpuHidHaarClassifier *classifier;
|
||||
@ -870,11 +869,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
int *candidate;
|
||||
cl_int status;
|
||||
|
||||
// bool doCannyPruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != 0;
|
||||
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||
// bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
|
||||
|
||||
//double t = 0;
|
||||
if( maxSize.height == 0 || maxSize.width == 0 )
|
||||
{
|
||||
maxSize.height = gimg.rows;
|
||||
@ -896,27 +892,20 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
if( findBiggestObject )
|
||||
flags &= ~CV_HAAR_SCALE_IMAGE;
|
||||
|
||||
//gtemp = oclMat( gimg.rows, gimg.cols, CV_8UC1);
|
||||
//gsum1 = oclMat( gimg.rows + 1, gimg.cols + 1, CV_32SC1 );
|
||||
//gsqsum1 = oclMat( gimg.rows + 1, gimg.cols + 1, CV_32FC1 );
|
||||
|
||||
if( !cascade->hid_cascade )
|
||||
/*out = (void *)*/gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
|
||||
if( cascade->hid_cascade->has_tilted_features )
|
||||
gtilted1 = oclMat( gimg.rows + 1, gimg.cols + 1, CV_32SC1 );
|
||||
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
|
||||
|
||||
result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
|
||||
|
||||
if( CV_MAT_CN(gimg.type()) > 1 )
|
||||
{
|
||||
oclMat gtemp;
|
||||
cvtColor( gimg, gtemp, CV_BGR2GRAY );
|
||||
gimg = gtemp;
|
||||
}
|
||||
|
||||
if( findBiggestObject )
|
||||
flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
//printf( "before if time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
|
||||
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
|
||||
CV_Error(CV_StsError, "Image too small");
|
||||
@ -924,12 +913,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
CvSize winSize0 = cascade->orig_window_size;
|
||||
//float scalefactor = 1.1f;
|
||||
//float factor = 1.f;
|
||||
int totalheight = 0;
|
||||
int indexy = 0;
|
||||
CvSize sz;
|
||||
//t = (double)cvGetTickCount();
|
||||
vector<CvSize> sizev;
|
||||
vector<float> scalev;
|
||||
for(factor = 1.f;; factor *= scaleFactor)
|
||||
@ -950,20 +936,15 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
sizev.push_back(sz);
|
||||
scalev.push_back(factor);
|
||||
}
|
||||
//int flag = 0;
|
||||
|
||||
oclMat gimg1(gimg.rows, gimg.cols, CV_8UC1);
|
||||
oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
|
||||
oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
|
||||
|
||||
//cl_mem cascadebuffer;
|
||||
cl_mem stagebuffer;
|
||||
//cl_mem classifierbuffer;
|
||||
cl_mem nodebuffer;
|
||||
cl_mem candidatebuffer;
|
||||
cl_mem scaleinfobuffer;
|
||||
//cl_kernel kernel;
|
||||
//kernel = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade");
|
||||
cv::Rect roi, roi2;
|
||||
cv::Mat imgroi, imgroisq;
|
||||
cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
|
||||
@ -971,18 +952,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
|
||||
size_t blocksize = 8;
|
||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||
size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->computeUnits()) *localThreads[0],
|
||||
size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0],
|
||||
localThreads[1], 1
|
||||
};
|
||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||
int loopcount = sizev.size();
|
||||
detect_piramid_info *scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
|
||||
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
// printf( "pre time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
//int *it =scaleinfo;
|
||||
// t = (double)cvGetTickCount();
|
||||
|
||||
for( int i = 0; i < loopcount; i++ )
|
||||
{
|
||||
sz = sizev[i];
|
||||
@ -992,7 +968,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
resizeroi = gimg1(roi2);
|
||||
gimgroi = gsum(roi);
|
||||
gimgroisq = gsqsum(roi);
|
||||
//scaleinfo[i].rows = gimgroi.rows;
|
||||
int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
|
||||
int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
|
||||
scaleinfo[i].width_height = (width << 16) | height;
|
||||
@ -1000,76 +975,40 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
|
||||
int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
|
||||
int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
|
||||
//outputsz +=width*height;
|
||||
|
||||
scaleinfo[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
|
||||
scaleinfo[i].imgoff = gimgroi.offset >> 2;
|
||||
scaleinfo[i].factor = factor;
|
||||
//printf("rows = %d,ystep = %d,width = %d,height = %d,grpnumperline = %d,totalgrp = %d,imgoff = %d,factor = %f\n",
|
||||
// scaleinfo[i].rows,scaleinfo[i].ystep,scaleinfo[i].width,scaleinfo[i].height,scaleinfo[i].grpnumperline,
|
||||
// scaleinfo[i].totalgrp,scaleinfo[i].imgoff,scaleinfo[i].factor);
|
||||
cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
|
||||
//cv::imwrite("D:\\1.jpg",gimg1);
|
||||
cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
|
||||
//cv::ocl::oclMat chk(sz.height,sz.width,CV_32SC1),chksq(sz.height,sz.width,CV_32FC1);
|
||||
//cv::ocl::integral(gimg1, chk, chksq);
|
||||
//double r = cv::norm(chk,gimgroi,NORM_INF);
|
||||
//if(r > std::numeric_limits<double>::epsilon())
|
||||
//{
|
||||
// printf("failed");
|
||||
//}
|
||||
indexy += sz.height;
|
||||
}
|
||||
//int ystep = factor > 2 ? 1 : 2;
|
||||
// t = (double)cvGetTickCount() - t;
|
||||
//printf( "resize integral time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
//t = (double)cvGetTickCount();
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
//int m,n;
|
||||
//m = (gsum.cols - 1 - cascade->orig_window_size.width + ystep - 1)/ystep;
|
||||
//n = (gsum.rows - 1 - cascade->orig_window_size.height + ystep - 1)/ystep;
|
||||
//int counter = m*n;
|
||||
|
||||
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
|
||||
//if(flag == 0){
|
||||
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
|
||||
//memset((char*)candidate,0,4*sizeof(int)*outputsz);
|
||||
gpuSetImagesForHaarClassifierCascade( cascade,/* &sum1, &sqsum1, _tilted,*/ 1., gsum.step / 4 );
|
||||
|
||||
//cascadebuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifierCascade),NULL,&status);
|
||||
//openCLVerifyCall(status);
|
||||
//openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,cascadebuffer,1,0,sizeof(GpuHidHaarClassifierCascade),gcascade,0,NULL,NULL));
|
||||
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
|
||||
|
||||
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
|
||||
|
||||
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
|
||||
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
|
||||
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
|
||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
|
||||
nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
|
||||
//openCLVerifyCall(status);
|
||||
|
||||
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
|
||||
//flag = 1;
|
||||
//}
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
|
||||
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
//printf( "update time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
|
||||
//size_t globalThreads[3] = { counter+blocksize*blocksize-counter%(blocksize*blocksize),1,1};
|
||||
//t = (double)cvGetTickCount();
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
int startnode = 0;
|
||||
@ -1087,11 +1026,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
pq.s[3] = gcascade->pq3;
|
||||
float correction = gcascade->inv_window_area;
|
||||
|
||||
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
|
||||
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
|
||||
// openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
|
||||
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
|
||||
@ -1111,28 +1045,20 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
//t = (double)cvGetTickCount();
|
||||
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
|
||||
|
||||
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
// t = (double)cvGetTickCount() - t;
|
||||
//printf( "post time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
//t = (double)cvGetTickCount();
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
|
||||
free(scaleinfo);
|
||||
free(candidate);
|
||||
//openCLSafeCall(clReleaseMemObject(cascadebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(stagebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(nodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(candidatebuffer));
|
||||
// openCLSafeCall(clReleaseKernel(kernel));
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
//printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1150,7 +1076,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
cl_mem stagebuffer;
|
||||
//cl_mem classifierbuffer;
|
||||
cl_mem nodebuffer;
|
||||
cl_mem candidatebuffer;
|
||||
cl_mem scaleinfobuffer;
|
||||
@ -1187,24 +1112,20 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
size_t blocksize = 8;
|
||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||
size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
|
||||
localThreads[1], 1
|
||||
};
|
||||
localThreads[1], 1 };
|
||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
|
||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
|
||||
nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
|
||||
nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
|
||||
loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
//cl_kernel kernel;
|
||||
//kernel = openCLGetKernelFromSource(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2");
|
||||
//cl_kernel kernel2 = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier");
|
||||
for(int i = 0; i < loopcount; i++)
|
||||
{
|
||||
sz = sizev[i];
|
||||
@ -1223,7 +1144,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
int height = (gsum.rows - 1 - sz.height + ystep - 1) / ystep;
|
||||
int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
|
||||
int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
|
||||
//outputsz +=width*height;
|
||||
|
||||
scaleinfo[i].width_height = (width << 16) | height;
|
||||
scaleinfo[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
|
||||
scaleinfo[i].imgoff = 0;
|
||||
@ -1241,28 +1162,20 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
size_t globalThreads2[3] = {nodenum, 1, 1};
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||
|
||||
//clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
|
||||
//clFinish(gsum.clCxt->impl->clCmdQueue);
|
||||
}
|
||||
//clReleaseKernel(kernel2);
|
||||
|
||||
int step = gsum.step / 4;
|
||||
int startnode = 0;
|
||||
int splitstage = 3;
|
||||
int splitnode = stage[0].count + stage[1].count + stage[2].count;
|
||||
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
|
||||
//openCLVerifyCall(status);
|
||||
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
|
||||
//openCLVerifyCall(status);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
|
||||
pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
|
||||
correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
|
||||
//int argcount = 0;
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
|
||||
@ -1271,22 +1184,21 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
|
||||
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
|
||||
candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
|
||||
candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
{
|
||||
@ -1297,7 +1209,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
free(scaleinfo);
|
||||
free(p);
|
||||
free(correction);
|
||||
clEnqueueUnmapMemObject((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, candidate, 0, 0, 0);
|
||||
clEnqueueUnmapMemObject(qu, candidatebuffer, candidate, 0, 0, 0);
|
||||
openCLSafeCall(clReleaseMemObject(stagebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(nodebuffer));
|
||||
@ -1306,21 +1218,17 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
openCLSafeCall(clReleaseMemObject(pbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(correctionbuffer));
|
||||
}
|
||||
//t = (double)cvGetTickCount() ;
|
||||
|
||||
cvFree(&cascade->hid_cascade);
|
||||
// printf("%d\n",globalcounter);
|
||||
rectList.resize(allCandidates.size());
|
||||
if(!allCandidates.empty())
|
||||
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
|
||||
|
||||
//cout << "count = " << rectList.size()<< endl;
|
||||
|
||||
if( minNeighbors != 0 || findBiggestObject )
|
||||
groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
|
||||
else
|
||||
rweights.resize(rectList.size(), 0);
|
||||
|
||||
|
||||
if( findBiggestObject && rectList.size() )
|
||||
{
|
||||
CvAvgComp result_comp = {{0, 0, 0, 0}, 0};
|
||||
@ -1346,13 +1254,565 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
cvSeqPush( result_seq, &c );
|
||||
}
|
||||
}
|
||||
//t = (double)cvGetTickCount() - t;
|
||||
//printf( "get face time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
|
||||
//alltime = (double)cvGetTickCount() - alltime;
|
||||
//printf( "all time = %g ms\n", alltime/((double)cvGetTickFrequency()*1000.) );
|
||||
|
||||
return result_seq;
|
||||
}
|
||||
|
||||
struct OclBuffers
|
||||
{
|
||||
cl_mem stagebuffer;
|
||||
cl_mem nodebuffer;
|
||||
cl_mem candidatebuffer;
|
||||
cl_mem scaleinfobuffer;
|
||||
cl_mem pbuffer;
|
||||
cl_mem correctionbuffer;
|
||||
cl_mem newnodebuffer;
|
||||
};
|
||||
|
||||
struct getRect
|
||||
{
|
||||
Rect operator()(const CvAvgComp &e) const
|
||||
{
|
||||
return e.rect;
|
||||
}
|
||||
};
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
|
||||
double scaleFactor, int minNeighbors, int flags,
|
||||
Size minSize, Size maxSize)
|
||||
{
|
||||
int blocksize = 8;
|
||||
int grp_per_CU = 12;
|
||||
size_t localThreads[3] = { blocksize, blocksize, 1 };
|
||||
size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0],
|
||||
localThreads[1],
|
||||
1 };
|
||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||
|
||||
Init(gimg.rows, gimg.cols, scaleFactor, flags, outputsz, localThreads, minSize, maxSize);
|
||||
|
||||
const double GROUP_EPS = 0.2;
|
||||
|
||||
cv::ConcurrentRectVector allCandidates;
|
||||
std::vector<cv::Rect> rectList;
|
||||
std::vector<int> rweights;
|
||||
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
GpuHidHaarClassifierCascade *gcascade;
|
||||
GpuHidHaarStageClassifier *stage;
|
||||
GpuHidHaarClassifier *classifier;
|
||||
GpuHidHaarTreeNode *node;
|
||||
|
||||
if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
|
||||
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
|
||||
|
||||
if( CV_MAT_CN(gimg.type()) > 1 )
|
||||
{
|
||||
oclMat gtemp;
|
||||
cvtColor( gimg, gtemp, CV_BGR2GRAY );
|
||||
gimg = gtemp;
|
||||
}
|
||||
|
||||
int *candidate;
|
||||
|
||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
int indexy = 0;
|
||||
CvSize sz;
|
||||
|
||||
cv::Rect roi, roi2;
|
||||
cv::Mat imgroi, imgroisq;
|
||||
cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
|
||||
|
||||
for( int i = 0; i < m_loopcount; i++ )
|
||||
{
|
||||
sz = sizev[i];
|
||||
roi = Rect(0, indexy, sz.width, sz.height);
|
||||
roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
|
||||
resizeroi = gimg1(roi2);
|
||||
gimgroi = gsum(roi);
|
||||
gimgroisq = gsqsum(roi);
|
||||
|
||||
cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
|
||||
cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
|
||||
indexy += sz.height;
|
||||
}
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
|
||||
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count,
|
||||
stage, 0, NULL, NULL));
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
int startnode = 0;
|
||||
int pixelstep = gsum.step / 4;
|
||||
int splitstage = 3;
|
||||
int splitnode = stage[0].count + stage[1].count + stage[2].count;
|
||||
cl_int4 p, pq;
|
||||
p.s[0] = gcascade->p0;
|
||||
p.s[1] = gcascade->p1;
|
||||
p.s[2] = gcascade->p2;
|
||||
p.s[3] = gcascade->p3;
|
||||
pq.s[0] = gcascade->pq0;
|
||||
pq.s[1] = gcascade->pq1;
|
||||
pq.s[2] = gcascade->pq2;
|
||||
pq.s[3] = gcascade->pq3;
|
||||
float correction = gcascade->inv_window_area;
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
|
||||
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
|
||||
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
|
||||
memset(candidate, 0, 4 * sizeof(int) * outputsz);
|
||||
openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
|
||||
free((void *)candidate);
|
||||
candidate = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::ocl::integral(gimg, gsum, gsqsum);
|
||||
|
||||
gpuSetHaarClassifierCascade(cascade);
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
|
||||
cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
|
||||
float *correction = (float *)malloc(sizeof(float) * m_loopcount);
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
double factor;
|
||||
for(int i = 0; i < m_loopcount; i++)
|
||||
{
|
||||
factor = scalev[i];
|
||||
int equRect_x = (int)(factor * gcascade->p0 + 0.5);
|
||||
int equRect_y = (int)(factor * gcascade->p1 + 0.5);
|
||||
int equRect_w = (int)(factor * gcascade->p3 + 0.5);
|
||||
int equRect_h = (int)(factor * gcascade->p2 + 0.5);
|
||||
p[i].s[0] = equRect_x;
|
||||
p[i].s[1] = equRect_y;
|
||||
p[i].s[2] = equRect_x + equRect_w;
|
||||
p[i].s[3] = equRect_y + equRect_h;
|
||||
correction[i] = 1. / (equRect_w * equRect_h);
|
||||
int startnodenum = m_nodenum * i;
|
||||
float factor2 = (float)factor;
|
||||
|
||||
vector<pair<size_t, const void *> > args1;
|
||||
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
|
||||
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
|
||||
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
|
||||
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
|
||||
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
||||
|
||||
size_t globalThreads2[3] = {m_nodenum, 1, 1};
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||
}
|
||||
|
||||
int step = gsum.step / 4;
|
||||
int startnode = 0;
|
||||
int splitstage = 3;
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
{
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
}
|
||||
|
||||
free(p);
|
||||
free(correction);
|
||||
clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
|
||||
}
|
||||
|
||||
rectList.resize(allCandidates.size());
|
||||
if(!allCandidates.empty())
|
||||
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
|
||||
|
||||
if( minNeighbors != 0 || findBiggestObject )
|
||||
groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
|
||||
else
|
||||
rweights.resize(rectList.size(), 0);
|
||||
|
||||
GenResult(faces, rectList, rweights);
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
|
||||
double scaleFactor, int flags,
|
||||
const int outputsz, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize)
|
||||
{
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
|
||||
if( !CV_IS_HAAR_CLASSIFIER(cascade) )
|
||||
CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
|
||||
|
||||
if( scaleFactor <= 1 )
|
||||
CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
|
||||
|
||||
if( cols < minSize.width || rows < minSize.height )
|
||||
CV_Error(CV_StsError, "Image too small");
|
||||
|
||||
int datasize=0;
|
||||
int totalclassifier=0;
|
||||
|
||||
if( !cascade->hid_cascade )
|
||||
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
|
||||
|
||||
if( maxSize.height == 0 || maxSize.width == 0 )
|
||||
{
|
||||
maxSize.height = rows;
|
||||
maxSize.width = cols;
|
||||
}
|
||||
|
||||
findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||
if( findBiggestObject )
|
||||
flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
|
||||
|
||||
CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
|
||||
CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
|
||||
|
||||
m_scaleFactor = scaleFactor;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
m_flags = flags;
|
||||
m_minSize = minSize;
|
||||
m_maxSize = maxSize;
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::CreateBaseBufs(const int datasize, const int totalclassifier,
|
||||
const int flags, const int outputsz)
|
||||
{
|
||||
if (!initialized)
|
||||
{
|
||||
buffers = malloc(sizeof(OclBuffers));
|
||||
|
||||
size_t tempSize =
|
||||
sizeof(GpuHidHaarStageClassifier) * ((GpuHidHaarClassifierCascade *)oldCascade->hid_cascade)->count;
|
||||
m_nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - tempSize - sizeof(GpuHidHaarClassifier) * totalclassifier)
|
||||
/ sizeof(GpuHidHaarTreeNode);
|
||||
|
||||
((OclBuffers *)buffers)->stagebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, tempSize);
|
||||
((OclBuffers *)buffers)->nodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, m_nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
}
|
||||
|
||||
if (initialized
|
||||
&& ((m_flags & CV_HAAR_SCALE_IMAGE) ^ (flags & CV_HAAR_SCALE_IMAGE)))
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
|
||||
}
|
||||
|
||||
if (flags & CV_HAAR_SCALE_IMAGE)
|
||||
{
|
||||
((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
|
||||
CL_MEM_WRITE_ONLY,
|
||||
4 * sizeof(int) * outputsz);
|
||||
}
|
||||
else
|
||||
{
|
||||
((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
4 * sizeof(int) * outputsz);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
|
||||
const int rows, const int cols, const int flags,
|
||||
const double scaleFactor, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize)
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
if ((m_flags & CV_HAAR_SCALE_IMAGE) && !(flags & CV_HAAR_SCALE_IMAGE))
|
||||
{
|
||||
gimg1.release();
|
||||
gsum.release();
|
||||
gsqsum.release();
|
||||
}
|
||||
else if (!(m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
|
||||
}
|
||||
else if ((m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
|
||||
{
|
||||
if (fabs(m_scaleFactor - scaleFactor) < 1e-6
|
||||
&& (rows == m_rows && cols == m_cols)
|
||||
&& (minSize.width == m_minSize.width)
|
||||
&& (minSize.height == m_minSize.height)
|
||||
&& (maxSize.width == m_maxSize.width)
|
||||
&& (maxSize.height == m_maxSize.height))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fabs(m_scaleFactor - scaleFactor) < 1e-6
|
||||
&& (rows == m_rows && cols == m_cols)
|
||||
&& (minSize.width == m_minSize.width)
|
||||
&& (minSize.height == m_minSize.height)
|
||||
&& (maxSize.width == m_maxSize.width)
|
||||
&& (maxSize.height == m_maxSize.height))
|
||||
{
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int loopcount;
|
||||
int indexy = 0;
|
||||
int totalheight = 0;
|
||||
double factor;
|
||||
Rect roi;
|
||||
CvSize sz;
|
||||
CvSize winSize0 = oldCascade->orig_window_size;
|
||||
detect_piramid_info *scaleinfo;
|
||||
if (flags & CV_HAAR_SCALE_IMAGE)
|
||||
{
|
||||
for(factor = 1.f;; factor *= scaleFactor)
|
||||
{
|
||||
CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
|
||||
sz.width = cvRound( cols / factor ) + 1;
|
||||
sz.height = cvRound( rows / factor ) + 1;
|
||||
CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
|
||||
|
||||
if( sz1.width <= 0 || sz1.height <= 0 )
|
||||
break;
|
||||
if( winSize.width > maxSize.width || winSize.height > maxSize.height )
|
||||
break;
|
||||
if( winSize.width < minSize.width || winSize.height < minSize.height )
|
||||
continue;
|
||||
|
||||
totalheight += sz.height;
|
||||
sizev.push_back(sz);
|
||||
scalev.push_back(static_cast<float>(factor));
|
||||
}
|
||||
|
||||
loopcount = sizev.size();
|
||||
gimg1.create(rows, cols, CV_8UC1);
|
||||
gsum.create(totalheight + 4, cols + 1, CV_32SC1);
|
||||
gsqsum.create(totalheight + 4, cols + 1, CV_32FC1);
|
||||
|
||||
scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
|
||||
for( int i = 0; i < loopcount; i++ )
|
||||
{
|
||||
sz = sizev[i];
|
||||
roi = Rect(0, indexy, sz.width, sz.height);
|
||||
int width = sz.width - 1 - oldCascade->orig_window_size.width;
|
||||
int height = sz.height - 1 - oldCascade->orig_window_size.height;
|
||||
int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
|
||||
int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
|
||||
|
||||
((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
|
||||
((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
|
||||
((detect_piramid_info *)scaleinfo)[i].imgoff = gsum(roi).offset >> 2;
|
||||
((detect_piramid_info *)scaleinfo)[i].factor = scalev[i];
|
||||
|
||||
indexy += sz.height;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(factor = 1;
|
||||
cvRound(factor * winSize0.width) < cols - 10 && cvRound(factor * winSize0.height) < rows - 10;
|
||||
factor *= scaleFactor)
|
||||
{
|
||||
CvSize winSize = { cvRound( winSize0.width * factor ), cvRound( winSize0.height * factor ) };
|
||||
if( winSize.width < minSize.width || winSize.height < minSize.height )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
sizev.push_back(winSize);
|
||||
scalev.push_back(factor);
|
||||
}
|
||||
|
||||
loopcount = scalev.size();
|
||||
if(loopcount == 0)
|
||||
{
|
||||
loopcount = 1;
|
||||
sizev.push_back(minSize);
|
||||
scalev.push_back( min(cvRound(minSize.width / winSize0.width), cvRound(minSize.height / winSize0.height)) );
|
||||
}
|
||||
|
||||
((OclBuffers *)buffers)->pbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
|
||||
sizeof(cl_int4) * loopcount);
|
||||
((OclBuffers *)buffers)->correctionbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
|
||||
sizeof(cl_float) * loopcount);
|
||||
((OclBuffers *)buffers)->newnodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_WRITE,
|
||||
loopcount * m_nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
|
||||
scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
|
||||
for( int i = 0; i < loopcount; i++ )
|
||||
{
|
||||
sz = sizev[i];
|
||||
factor = scalev[i];
|
||||
int ystep = cvRound(std::max(2., factor));
|
||||
int width = (cols - 1 - sz.width + ystep - 1) / ystep;
|
||||
int height = (rows - 1 - sz.height + ystep - 1) / ystep;
|
||||
int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
|
||||
int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
|
||||
|
||||
((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
|
||||
((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
|
||||
((detect_piramid_info *)scaleinfo)[i].imgoff = 0;
|
||||
((detect_piramid_info *)scaleinfo)[i].factor = factor;
|
||||
}
|
||||
}
|
||||
|
||||
if (loopcount != m_loopcount)
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
|
||||
}
|
||||
((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
|
||||
}
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
|
||||
sizeof(detect_piramid_info)*loopcount,
|
||||
scaleinfo, 0, NULL, NULL));
|
||||
free(scaleinfo);
|
||||
|
||||
m_loopcount = loopcount;
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& faces,
|
||||
const std::vector<cv::Rect> &rectList,
|
||||
const std::vector<int> &rweights)
|
||||
{
|
||||
CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
|
||||
|
||||
if( findBiggestObject && rectList.size() )
|
||||
{
|
||||
CvAvgComp result_comp = {{0, 0, 0, 0}, 0};
|
||||
|
||||
for( size_t i = 0; i < rectList.size(); i++ )
|
||||
{
|
||||
cv::Rect r = rectList[i];
|
||||
if( r.area() > cv::Rect(result_comp.rect).area() )
|
||||
{
|
||||
result_comp.rect = r;
|
||||
result_comp.neighbors = rweights[i];
|
||||
}
|
||||
}
|
||||
cvSeqPush( result_seq, &result_comp );
|
||||
}
|
||||
else
|
||||
{
|
||||
for( size_t i = 0; i < rectList.size(); i++ )
|
||||
{
|
||||
CvAvgComp c;
|
||||
c.rect = rectList[i];
|
||||
c.neighbors = rweights[i];
|
||||
cvSeqPush( result_seq, &c );
|
||||
}
|
||||
}
|
||||
|
||||
vector<CvAvgComp> vecAvgComp;
|
||||
Seq<CvAvgComp>(result_seq).copyTo(vecAvgComp);
|
||||
faces.resize(vecAvgComp.size());
|
||||
std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::release()
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
|
||||
|
||||
if( (m_flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
cvFree(&oldCascade->hid_cascade);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
|
||||
}
|
||||
|
||||
free(buffers);
|
||||
buffers = NULL;
|
||||
}
|
||||
|
||||
#ifndef _MAX_PATH
|
||||
#define _MAX_PATH 1024
|
||||
|
@ -43,6 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
@ -327,7 +327,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
|
||||
mom->m12 = dstsum[8];
|
||||
mom->m03 = dstsum[9];
|
||||
delete [] dstsum;
|
||||
|
||||
openCLSafeCall(clReleaseMemObject(sum));
|
||||
icvCompleteMomentState( mom );
|
||||
}
|
||||
|
||||
|
@ -79,15 +79,73 @@
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
|
||||
#endif
|
||||
|
||||
#define THREADS 256
|
||||
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) >= (l_edge) && (i) < (r_edge) ? (elem1) : (elem2)
|
||||
|
||||
inline void update_dst_C1_D0(__global uchar *dst, __local uint* temp,
|
||||
int dst_rows, int dst_cols,
|
||||
int dst_startX, int dst_x_off,
|
||||
float alpha)
|
||||
{
|
||||
if(get_local_id(0) < anX || get_local_id(0) >= (THREADS-ksX+anX+1))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint4 tmp_sum = 0;
|
||||
int posX = dst_startX - dst_x_off + (get_local_id(0)-anX)*4;
|
||||
int posY = (get_group_id(1) << 1);
|
||||
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
tmp_sum += vload4(get_local_id(0), temp+i);
|
||||
}
|
||||
|
||||
if(posY < dst_rows && posX < dst_cols)
|
||||
{
|
||||
tmp_sum /= (uint4) alpha;
|
||||
if(posX >= 0 && posX < dst_cols)
|
||||
*(dst) = tmp_sum.x;
|
||||
if(posX+1 >= 0 && posX+1 < dst_cols)
|
||||
*(dst + 1) = tmp_sum.y;
|
||||
if(posX+2 >= 0 && posX+2 < dst_cols)
|
||||
*(dst + 2) = tmp_sum.z;
|
||||
if(posX+3 >= 0 && posX+3 < dst_cols)
|
||||
*(dst + 3) = tmp_sum.w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void update_dst_C4_D0(__global uchar4 *dst, __local uint4* temp,
|
||||
int dst_rows, int dst_cols,
|
||||
int dst_startX, int dst_x_off,
|
||||
float alpha)
|
||||
{
|
||||
if(get_local_id(0) >= (THREADS-ksX+1))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int posX = dst_startX - dst_x_off + get_local_id(0);
|
||||
int posY = (get_group_id(1) << 1);
|
||||
|
||||
uint4 temp_sum = 0;
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
temp_sum += temp[get_local_id(0) + anX + i];
|
||||
}
|
||||
|
||||
if(posX >= 0 && posX < dst_cols && posY >= 0 && posY < dst_rows)
|
||||
*dst = convert_uchar4(convert_float4(temp_sum)/alpha);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////8uC1////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#define THREADS 256
|
||||
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) >= (l_edge) && (i) < (r_edge) ? (elem1) : (elem2)
|
||||
__kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global uchar *dst, float alpha,
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
{
|
||||
|
||||
int col = get_local_id(0);
|
||||
@ -105,115 +163,84 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
|
||||
int dst_startY = (gY << 1) + dst_y_off;
|
||||
|
||||
uint4 data[ksY+1];
|
||||
__local uint4 temp[(THREADS<<1)];
|
||||
__local uint4 temp[2][THREADS];
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
if(startY+i >=0 && startY+i < src_whole_rows && startX+col*4 >=0 && startX+col*4+3<src_whole_cols)
|
||||
{
|
||||
if(startY+i >=0 && startY+i < src_whole_rows && startX+col*4 >=0 && startX+col*4+3<src_whole_cols)
|
||||
data[i] = convert_uint4(vload4(col,(__global uchar*)(src+(startY+i)*src_step + startX)));
|
||||
else
|
||||
{
|
||||
data[i]=0;
|
||||
int con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4 >=0 && startX+col*4<src_whole_cols;
|
||||
if(con)data[i].s0 = *(src+(startY+i)*src_step + startX + col*4);
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+1 >=0 && startX+col*4+1<src_whole_cols;
|
||||
if(con)data[i].s1 = *(src+(startY+i)*src_step + startX + col*4+1) ;
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+2 >=0 && startX+col*4+2<src_whole_cols;
|
||||
if(con)data[i].s2 = *(src+(startY+i)*src_step + startX + col*4+2);
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+3 >=0 && startX+col*4+3<src_whole_cols;
|
||||
if(con)data[i].s3 = *(src+(startY+i)*src_step + startX + col*4+3);
|
||||
}
|
||||
data[i].x = *(src+(startY+i)*src_step + startX + col * 4);
|
||||
data[i].y = *(src+(startY+i)*src_step + startX + col * 4 + 1);
|
||||
data[i].z = *(src+(startY+i)*src_step + startX + col * 4 + 2);
|
||||
data[i].w = *(src+(startY+i)*src_step + startX + col * 4 + 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
data[i]=0;
|
||||
int con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4 >=0 && startX+col*4<src_whole_cols;
|
||||
if(con)data[i].s0 = *(src+(startY+i)*src_step + startX + col*4);
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+1 >=0 && startX+col*4+1<src_whole_cols;
|
||||
if(con)data[i].s1 = *(src+(startY+i)*src_step + startX + col*4+1) ;
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+2 >=0 && startX+col*4+2<src_whole_cols;
|
||||
if(con)data[i].s2 = *(src+(startY+i)*src_step + startX + col*4+2);
|
||||
con = startY+i >=0 && startY+i < src_whole_rows && startX+col*4+3 >=0 && startX+col*4+3<src_whole_cols;
|
||||
if(con)data[i].s3 = *(src+(startY+i)*src_step + startX + col*4+3);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
int not_all_in_range;
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
not_all_in_range = (startX+col*4<0) | (startX+col*4+3>src_whole_cols-1)
|
||||
| (startY+i<0) | (startY+i>src_whole_rows-1);
|
||||
if(not_all_in_range)
|
||||
{
|
||||
int selected_row;
|
||||
int4 selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
int not_all_in_range;
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
not_all_in_range = (startX+col*4<0) | (startX+col*4+3>src_whole_cols-1)
|
||||
| (startY+i<0) | (startY+i>src_whole_rows-1);
|
||||
if(not_all_in_range)
|
||||
{
|
||||
int selected_row;
|
||||
int4 selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
|
||||
selected_col.x = ADDR_L(startX+col*4, 0, src_whole_cols);
|
||||
selected_col.x = ADDR_R(startX+col*4, src_whole_cols, selected_col.x);
|
||||
selected_col.x = ADDR_L(startX+col*4, 0, src_whole_cols);
|
||||
selected_col.x = ADDR_R(startX+col*4, src_whole_cols, selected_col.x);
|
||||
|
||||
selected_col.y = ADDR_L(startX+col*4+1, 0, src_whole_cols);
|
||||
selected_col.y = ADDR_R(startX+col*4+1, src_whole_cols, selected_col.y);
|
||||
selected_col.y = ADDR_L(startX+col*4+1, 0, src_whole_cols);
|
||||
selected_col.y = ADDR_R(startX+col*4+1, src_whole_cols, selected_col.y);
|
||||
|
||||
selected_col.z = ADDR_L(startX+col*4+2, 0, src_whole_cols);
|
||||
selected_col.z = ADDR_R(startX+col*4+2, src_whole_cols, selected_col.z);
|
||||
selected_col.z = ADDR_L(startX+col*4+2, 0, src_whole_cols);
|
||||
selected_col.z = ADDR_R(startX+col*4+2, src_whole_cols, selected_col.z);
|
||||
|
||||
selected_col.w = ADDR_L(startX+col*4+3, 0, src_whole_cols);
|
||||
selected_col.w = ADDR_R(startX+col*4+3, src_whole_cols, selected_col.w);
|
||||
selected_col.w = ADDR_L(startX+col*4+3, 0, src_whole_cols);
|
||||
selected_col.w = ADDR_R(startX+col*4+3, src_whole_cols, selected_col.w);
|
||||
|
||||
data[i].x = *(src + selected_row * src_step + selected_col.x);
|
||||
data[i].y = *(src + selected_row * src_step + selected_col.y);
|
||||
data[i].z = *(src + selected_row * src_step + selected_col.z);
|
||||
data[i].w = *(src + selected_row * src_step + selected_col.w);
|
||||
}
|
||||
else
|
||||
{
|
||||
data[i] = convert_uint4(vload4(col,(__global uchar*)(src+(startY+i)*src_step + startX)));
|
||||
}
|
||||
}
|
||||
data[i].x = *(src + selected_row * src_step + selected_col.x);
|
||||
data[i].y = *(src + selected_row * src_step + selected_col.y);
|
||||
data[i].z = *(src + selected_row * src_step + selected_col.z);
|
||||
data[i].w = *(src + selected_row * src_step + selected_col.w);
|
||||
}
|
||||
else
|
||||
{
|
||||
data[i] = convert_uint4(vload4(col,(__global uchar*)(src+(startY+i)*src_step + startX)));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
uint4 sum0 = 0, sum1 = 0, sum2 = 0;
|
||||
uint4 tmp_sum = 0;
|
||||
for(int i=1; i < ksY; i++)
|
||||
{
|
||||
sum0 += (data[i]);
|
||||
tmp_sum += (data[i]);
|
||||
}
|
||||
sum1 = sum0 + (data[0]);
|
||||
sum2 = sum0 + (data[ksY]);
|
||||
|
||||
int index = dst_startY * dst_step + dst_startX + (col-anX)*4;
|
||||
|
||||
temp[col] = sum1;
|
||||
temp[col+THREADS] = sum2;
|
||||
temp[0][col] = tmp_sum + (data[0]);
|
||||
temp[1][col] = tmp_sum + (data[ksY]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(col >= anX && col < (THREADS-ksX+anX+1))
|
||||
{
|
||||
int posX = dst_startX - dst_x_off + (col-anX)*4;
|
||||
int posY = (gY << 1);
|
||||
uint4 tmp_sum1=0, tmp_sum2=0;
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
tmp_sum1 += vload4(col, (__local uint*)temp+i);
|
||||
}
|
||||
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
tmp_sum2 += vload4(col, (__local uint*)(temp+THREADS)+i);
|
||||
}
|
||||
|
||||
if(posY < dst_rows && posX < dst_cols)
|
||||
{
|
||||
if(posX >= 0 && posX < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX + (col-anX)*4) = tmp_sum1.x/alpha;
|
||||
if(posX+1 >= 0 && posX+1 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+1 + (col-anX)*4) = tmp_sum1.y/alpha;
|
||||
if(posX+2 >= 0 && posX+2 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+2 + (col-anX)*4) = tmp_sum1.z/alpha;
|
||||
if(posX+3 >= 0 && posX+3 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+3 + (col-anX)*4) = tmp_sum1.w/alpha;
|
||||
}
|
||||
if(posY+1 < dst_rows && posX < dst_cols)
|
||||
{
|
||||
dst_startY+=1;
|
||||
if(posX >= 0 && posX < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX + (col-anX)*4) = tmp_sum2.x/alpha;
|
||||
if(posX+1 >= 0 && posX+1 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+1 + (col-anX)*4) = tmp_sum2.y/alpha;
|
||||
if(posX+2 >= 0 && posX+2 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+2 + (col-anX)*4) = tmp_sum2.z/alpha;
|
||||
if(posX+3 >= 0 && posX+3 < dst_cols)
|
||||
*(dst+dst_startY * dst_step + dst_startX+3 + (col-anX)*4) = tmp_sum2.w/alpha;
|
||||
}
|
||||
}
|
||||
update_dst_C1_D0(dst+index, (__local uint *)(temp[0]),
|
||||
dst_rows, dst_cols, dst_startX, dst_x_off, alpha);
|
||||
update_dst_C1_D0(dst+index+dst_step, (__local uint *)(temp[1]),
|
||||
dst_rows, dst_cols, dst_startX, dst_x_off, alpha);
|
||||
|
||||
}
|
||||
|
||||
@ -221,9 +248,9 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
|
||||
/////////////////////////////////////////8uC4////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uchar4 *dst, float alpha,
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
{
|
||||
int col = get_local_id(0);
|
||||
const int gX = get_group_id(0);
|
||||
@ -238,81 +265,63 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
|
||||
int startY = (gY << 1) - anY + src_y_off;
|
||||
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
|
||||
int dst_startY = (gY << 1) + dst_y_off;
|
||||
//int end_addr = (src_whole_rows-1)*(src_step>>2) + src_whole_cols-4;
|
||||
|
||||
int end_addr = src_whole_cols-4;
|
||||
uint4 data[ksY+1];
|
||||
__local uint4 temp[2][THREADS];
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
bool con;
|
||||
uint4 ss;
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
||||
|
||||
//int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
||||
//ss = convert_uint4(src[cur_addr]);
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
if(con)
|
||||
ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
|
||||
|
||||
data[i] = con ? ss : 0;
|
||||
data[i].x = con ? src[(startY+i)*(src_step>>2) + cur_col].x : 0;
|
||||
data[i].y = con ? src[(startY+i)*(src_step>>2) + cur_col].y : 0;
|
||||
data[i].z = con ? src[(startY+i)*(src_step>>2) + cur_col].z : 0;
|
||||
data[i].w = con ? src[(startY+i)*(src_step>>2) + cur_col].w : 0;
|
||||
}
|
||||
#else
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
|
||||
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
|
||||
}
|
||||
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
|
||||
}
|
||||
|
||||
#endif
|
||||
uint4 sum0 = 0, sum1 = 0, sum2 = 0;
|
||||
uint4 tmp_sum = 0;
|
||||
for(int i=1; i < ksY; i++)
|
||||
{
|
||||
sum0 += (data[i]);
|
||||
tmp_sum += (data[i]);
|
||||
}
|
||||
sum1 = sum0 + (data[0]);
|
||||
sum2 = sum0 + (data[ksY]);
|
||||
temp[0][col] = sum1;
|
||||
temp[1][col] = sum2;
|
||||
|
||||
int index = dst_startY * (dst_step>>2)+ dst_startX + col;
|
||||
|
||||
temp[0][col] = tmp_sum + (data[0]);
|
||||
temp[1][col] = tmp_sum + (data[ksY]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(col < (THREADS-(ksX-1)))
|
||||
{
|
||||
col += anX;
|
||||
int posX = dst_startX - dst_x_off + col - anX;
|
||||
int posY = (gY << 1);
|
||||
update_dst_C4_D0(dst+index, (__local uint4 *)(temp[0]),
|
||||
dst_rows, dst_cols, dst_startX, dst_x_off, alpha);
|
||||
update_dst_C4_D0(dst+index+(dst_step>>2), (__local uint4 *)(temp[1]),
|
||||
dst_rows, dst_cols, dst_startX, dst_x_off, alpha);
|
||||
|
||||
uint4 tmp_sum[2]={(uint4)(0,0,0,0),(uint4)(0,0,0,0)};
|
||||
for(int k=0; k<2; k++)
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
tmp_sum[k] += temp[k][col+i];
|
||||
}
|
||||
for(int i=0; i<2; i++)
|
||||
{
|
||||
if(posX >= 0 && posX < dst_cols && (posY+i) >= 0 && (posY+i) < dst_rows)
|
||||
dst[(dst_startY+i) * (dst_step>>2)+ dst_startX + col - anX] = convert_uchar4(convert_float4(tmp_sum[i])/alpha);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////32fC1////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void boxFilter_C1_D5(__global const float *restrict src, __global float *dst, float alpha,
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
{
|
||||
int col = get_local_id(0);
|
||||
const int gX = get_group_id(0);
|
||||
@ -327,7 +336,6 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
||||
int startY = (gY << 1) - anY + src_y_off;
|
||||
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
|
||||
int dst_startY = (gY << 1) + dst_y_off;
|
||||
int end_addr = (src_whole_rows-1)*(src_step>>2) + src_whole_cols-4;
|
||||
float data[ksY+1];
|
||||
__local float temp[2][THREADS];
|
||||
#ifdef BORDER_CONSTANT
|
||||
@ -336,28 +344,25 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
||||
//int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
|
||||
//ss = src[cur_addr];
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
//ss = src[(startY+i)*(src_step>>2) + cur_col];
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:0;
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:(float)0;
|
||||
|
||||
data[i] = con ? ss : 0.f;
|
||||
}
|
||||
#else
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
data[i] = src[selected_row * (src_step>>2) + selected_col];
|
||||
}
|
||||
data[i] = src[selected_row * (src_step>>2) + selected_col];
|
||||
}
|
||||
|
||||
#endif
|
||||
float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
|
||||
@ -376,7 +381,7 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
||||
int posX = dst_startX - dst_x_off + col - anX;
|
||||
int posY = (gY << 1);
|
||||
|
||||
float tmp_sum[2]={0.0, 0.0};
|
||||
float tmp_sum[2]= {0.0, 0.0};
|
||||
for(int k=0; k<2; k++)
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
@ -395,9 +400,9 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
|
||||
/////////////////////////////////////////32fC4////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global float4 *dst, float alpha,
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step
|
||||
)
|
||||
{
|
||||
int col = get_local_id(0);
|
||||
const int gX = get_group_id(0);
|
||||
@ -412,7 +417,6 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
|
||||
int startY = (gY << 1) - anY + src_y_off;
|
||||
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
|
||||
int dst_startY = (gY << 1) + dst_y_off;
|
||||
int end_addr = (src_whole_rows-1)*(src_step>>4) + src_whole_cols-16;
|
||||
float4 data[ksY+1];
|
||||
__local float4 temp[2][THREADS];
|
||||
#ifdef BORDER_CONSTANT
|
||||
@ -421,28 +425,25 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
|
||||
//int cur_addr = clamp((startY+i)*(src_step>>4)+(startX+col),0,end_addr);
|
||||
//ss = src[cur_addr];
|
||||
|
||||
int cur_col = clamp(startX + col, 0, src_whole_cols);
|
||||
//ss = src[(startY+i)*(src_step>>4) + cur_col];
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:0;
|
||||
ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:(float4)0;
|
||||
|
||||
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
|
||||
}
|
||||
#else
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
for(int i=0; i < ksY+1; i++)
|
||||
{
|
||||
int selected_row;
|
||||
int selected_col;
|
||||
selected_row = ADDR_H(startY+i, 0, src_whole_rows);
|
||||
selected_row = ADDR_B(startY+i, src_whole_rows, selected_row);
|
||||
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
|
||||
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
|
||||
|
||||
data[i] = src[selected_row * (src_step>>4) + selected_col];
|
||||
}
|
||||
data[i] = src[selected_row * (src_step>>4) + selected_col];
|
||||
}
|
||||
|
||||
#endif
|
||||
float4 sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
|
||||
@ -461,7 +462,7 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
|
||||
int posX = dst_startX - dst_x_off + col - anX;
|
||||
int posY = (gY << 1);
|
||||
|
||||
float4 tmp_sum[2]={(float4)(0.0,0.0,0.0,0.0), (float4)(0.0,0.0,0.0,0.0)};
|
||||
float4 tmp_sum[2]= {(float4)(0.0,0.0,0.0,0.0), (float4)(0.0,0.0,0.0,0.0)};
|
||||
for(int k=0; k<2; k++)
|
||||
for(int i=-anX; i<=anX; i++)
|
||||
{
|
||||
|
@ -112,7 +112,7 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
|
||||
} GpuHidHaarClassifierCascade;
|
||||
|
||||
|
||||
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(//constant GpuHidHaarClassifierCascade * cascade,
|
||||
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
|
||||
global GpuHidHaarStageClassifier * stagecascadeptr,
|
||||
global int4 * info,
|
||||
global GpuHidHaarTreeNode * nodeptr,
|
||||
@ -128,12 +128,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
const int splitnode,
|
||||
const int4 p,
|
||||
const int4 pq,
|
||||
const float correction
|
||||
//const int width,
|
||||
//const int height,
|
||||
//const int grpnumperline,
|
||||
//const int totalgrp
|
||||
)
|
||||
const float correction)
|
||||
{
|
||||
int grpszx = get_local_size(0);
|
||||
int grpszy = get_local_size(1);
|
||||
@ -145,13 +140,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int lcl_sz = mul24(grpszx,grpszy);
|
||||
int lcl_id = mad24(lclidy,grpszx,lclidx);
|
||||
|
||||
//assume lcl_sz == 256 or 128 or 64
|
||||
//int lcl_sz_shift = (lcl_sz == 256) ? 8 : 7;
|
||||
//lcl_sz_shift = (lcl_sz == 64) ? 6 : lcl_sz_shift;
|
||||
__local int lclshare[1024];
|
||||
|
||||
#define OFF 0
|
||||
__local int* lcldata = lclshare + OFF;//for save win data
|
||||
__local int* lcldata = lclshare;//for save win data
|
||||
__local int* glboutindex = lcldata + 28*28;//for save global out index
|
||||
__local int* lclcount = glboutindex + 1;//for save the numuber of temp pass pixel
|
||||
__local int* lcloutindex = lclcount + 1;//for save info of temp pass pixel
|
||||
@ -181,7 +171,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int totalgrp = scaleinfo1.y & 0xffff;
|
||||
int imgoff = scaleinfo1.z;
|
||||
float factor = as_float(scaleinfo1.w);
|
||||
//int ystep =1;// factor > 2.0 ? 1 : 2;
|
||||
|
||||
__global const int * sum = sum1 + imgoff;
|
||||
__global const float * sqsum = sqsum1 + imgoff;
|
||||
@ -191,8 +180,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int grpidx = grploop - mul24(grpidy, grpnumperline);
|
||||
int x = mad24(grpidx,grpszx,lclidx);
|
||||
int y = mad24(grpidy,grpszy,lclidy);
|
||||
//candidate_result.x = convert_int_rtn(x*factor);
|
||||
//candidate_result.y = convert_int_rtn(y*factor);
|
||||
int grpoffx = x-lclidx;
|
||||
int grpoffy = y-lclidy;
|
||||
|
||||
@ -207,18 +194,11 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int glb_x = grpoffx + (lcl_x<<2);
|
||||
int glb_y = grpoffy + lcl_y;
|
||||
|
||||
int glb_off = mad24(glb_y,pixelstep,glb_x);
|
||||
int glb_off = mad24(min(glb_y, height - 1),pixelstep,glb_x);
|
||||
int4 data = *(__global int4*)&sum[glb_off];
|
||||
int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2);
|
||||
|
||||
#if OFF
|
||||
lcldata[lcl_off] = data.x;
|
||||
lcldata[lcl_off+1] = data.y;
|
||||
lcldata[lcl_off+2] = data.z;
|
||||
lcldata[lcl_off+3] = data.w;
|
||||
#else
|
||||
vstore4(data, 0, &lcldata[lcl_off]);
|
||||
#endif
|
||||
}
|
||||
|
||||
lcloutindex[lcl_id] = 0;
|
||||
@ -231,184 +211,170 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int lcl_off = mad24(lclidy,readwidth,lclidx);
|
||||
int4 cascadeinfo1, cascadeinfo2;
|
||||
cascadeinfo1 = p;
|
||||
cascadeinfo2 = pq;// + mad24(y, pixelstep, x);
|
||||
cascadeinfo2 = pq;
|
||||
|
||||
cascadeinfo1.x +=lcl_off;
|
||||
cascadeinfo1.z +=lcl_off;
|
||||
mean = (lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.x)] - lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.z)] -
|
||||
lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
|
||||
*correction;
|
||||
|
||||
//if((x < width) && (y < height))
|
||||
int p_offset = mad24(y, pixelstep, x);
|
||||
|
||||
cascadeinfo2.x +=p_offset;
|
||||
cascadeinfo2.z +=p_offset;
|
||||
variance_norm_factor =sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.x)] - sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.z)] -
|
||||
sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
|
||||
|
||||
variance_norm_factor = variance_norm_factor * correction - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
|
||||
|
||||
for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
|
||||
{
|
||||
cascadeinfo1.x +=lcl_off;
|
||||
cascadeinfo1.z +=lcl_off;
|
||||
mean = (lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.x)] - lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.z)] -
|
||||
lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
|
||||
*correction;
|
||||
float stage_sum = 0.f;
|
||||
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
|
||||
|
||||
int p_offset = mad24(y, pixelstep, x);
|
||||
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
cascadeinfo2.x +=p_offset;
|
||||
cascadeinfo2.z +=p_offset;
|
||||
variance_norm_factor =sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.x)] - sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.z)] -
|
||||
sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
|
||||
info1.x +=lcl_off;
|
||||
info1.z +=lcl_off;
|
||||
info2.x +=lcl_off;
|
||||
info2.z +=lcl_off;
|
||||
|
||||
variance_norm_factor = variance_norm_factor * correction - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
|
||||
//if( cascade->is_stump_based )
|
||||
//{
|
||||
for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
|
||||
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
|
||||
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
|
||||
|
||||
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
|
||||
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
|
||||
|
||||
info3.x +=lcl_off;
|
||||
info3.z +=lcl_off;
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
nodecounter++;
|
||||
}
|
||||
|
||||
result = (stage_sum >= stagethreshold);
|
||||
}
|
||||
|
||||
if(result && (x < width) && (y < height))
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
nodecounter = splitnode;
|
||||
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
|
||||
{
|
||||
lclcount[0]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
|
||||
int perfscale = queuecount > 4 ? 3 : 2;
|
||||
int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale;
|
||||
int lcl_compute_win = lcl_sz >> perfscale;
|
||||
int lcl_compute_win_id = (lcl_id >>(6-perfscale));
|
||||
int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale);
|
||||
int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
|
||||
for(int queueloop=0; queueloop<queuecount_loop; queueloop++)
|
||||
{
|
||||
float stage_sum = 0.f;
|
||||
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
|
||||
int temp_coord = lcloutindex[lcl_compute_win_id<<1];
|
||||
float variance_norm_factor = as_float(lcloutindex[(lcl_compute_win_id<<1)+1]);
|
||||
int queue_pixel = mad24(((temp_coord & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
|
||||
|
||||
if(lcl_compute_win_id < queuecount)
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
|
||||
|
||||
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
int tempnodecounter = lcl_compute_id;
|
||||
float part_sum = 0.f;
|
||||
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
|
||||
|
||||
info1.x +=lcl_off;
|
||||
info1.z +=lcl_off;
|
||||
info2.x +=lcl_off;
|
||||
info2.z +=lcl_off;
|
||||
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
|
||||
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
|
||||
info1.x +=queue_pixel;
|
||||
info1.z +=queue_pixel;
|
||||
info2.x +=queue_pixel;
|
||||
info2.z +=queue_pixel;
|
||||
|
||||
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
|
||||
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
|
||||
|
||||
|
||||
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
|
||||
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
|
||||
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
|
||||
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
|
||||
|
||||
info3.x +=queue_pixel;
|
||||
info3.z +=queue_pixel;
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
|
||||
//if((info3.z - info3.x) && (!stageinfo.z))
|
||||
//{
|
||||
info3.x +=lcl_off;
|
||||
info3.z +=lcl_off;
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
//}
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
nodecounter++;
|
||||
part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
tempnodecounter +=lcl_compute_win;
|
||||
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
|
||||
partialsum[lcl_id]=part_sum;
|
||||
}
|
||||
|
||||
result = (stage_sum >= stagethreshold);
|
||||
}
|
||||
|
||||
if(result && (x < width) && (y < height))
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
nodecounter = splitnode;
|
||||
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
|
||||
{
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
//if(lcl_id == 0)
|
||||
lclcount[0]=0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
|
||||
int perfscale = queuecount > 4 ? 3 : 2;
|
||||
int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale;
|
||||
int lcl_compute_win = lcl_sz >> perfscale;
|
||||
int lcl_compute_win_id = (lcl_id >>(6-perfscale));
|
||||
int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale);
|
||||
int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
|
||||
for(int queueloop=0; queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/; queueloop++)
|
||||
if(lcl_compute_win_id < queuecount)
|
||||
{
|
||||
float stage_sum = 0.f;
|
||||
int temp_coord = lcloutindex[lcl_compute_win_id<<1];
|
||||
float variance_norm_factor = as_float(lcloutindex[(lcl_compute_win_id<<1)+1]);
|
||||
int queue_pixel = mad24(((temp_coord & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
|
||||
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lcl_compute_win_id < queuecount)
|
||||
for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
|
||||
{
|
||||
|
||||
int tempnodecounter = lcl_compute_id;
|
||||
float part_sum = 0.f;
|
||||
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
|
||||
|
||||
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
info1.x +=queue_pixel;
|
||||
info1.z +=queue_pixel;
|
||||
info2.x +=queue_pixel;
|
||||
info2.z +=queue_pixel;
|
||||
|
||||
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
|
||||
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
|
||||
|
||||
|
||||
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
|
||||
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
|
||||
//if((info3.z - info3.x) && (!stageinfo.z))
|
||||
//{
|
||||
info3.x +=queue_pixel;
|
||||
info3.z +=queue_pixel;
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
//}
|
||||
part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
tempnodecounter +=lcl_compute_win;
|
||||
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
|
||||
partialsum[lcl_id]=part_sum;
|
||||
stage_sum += partialsum[lcl_id+i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lcl_compute_win_id < queuecount)
|
||||
if(stage_sum >= stagethreshold && (lcl_compute_id==0))
|
||||
{
|
||||
for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
|
||||
{
|
||||
stage_sum += partialsum[lcl_id+i];
|
||||
}
|
||||
if(stage_sum >= stagethreshold && (lcl_compute_id==0))
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = temp_coord;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
|
||||
}
|
||||
lcl_compute_win_id +=(1<<perfscale);
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = temp_coord;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
queuecount = lclcount[0];
|
||||
lcl_compute_win_id +=(1<<perfscale);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
nodecounter += stageinfo.x;
|
||||
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
|
||||
//barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(lcl_id<queuecount)
|
||||
{
|
||||
int temp = lcloutindex[lcl_id<<1];
|
||||
int x = mad24(grpidx,grpszx,temp & 0xffff);
|
||||
int y = mad24(grpidy,grpszy,((temp & (int)0xffff0000) >> 16));
|
||||
temp = glboutindex[0];
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor*20.f);
|
||||
candidate_result.x = convert_int_rtn(x*factor);
|
||||
candidate_result.y = convert_int_rtn(y*factor);
|
||||
atomic_inc(glboutindex);
|
||||
candidate[outputoff+temp+lcl_id] = candidate_result;
|
||||
}
|
||||
}//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
|
||||
|
||||
queuecount = lclcount[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}//end if((x < width) && (y < height))
|
||||
nodecounter += stageinfo.x;
|
||||
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
|
||||
|
||||
if(lcl_id<queuecount)
|
||||
{
|
||||
int temp = lcloutindex[lcl_id<<1];
|
||||
int x = mad24(grpidx,grpszx,temp & 0xffff);
|
||||
int y = mad24(grpidy,grpszy,((temp & (int)0xffff0000) >> 16));
|
||||
temp = glboutindex[0];
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor*20.f);
|
||||
candidate_result.x = convert_int_rtn(x*factor);
|
||||
candidate_result.y = convert_int_rtn(y*factor);
|
||||
atomic_inc(glboutindex);
|
||||
candidate[outputoff+temp+lcl_id] = candidate_result;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}//end for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
|
||||
//outputoff +=mul24(width,height);
|
||||
}//end for(int scalei = 0; scalei <loopcount; scalei++)
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
//
|
||||
// @Authors
|
||||
// Wu Xinglong, wxl370@126.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -52,11 +53,11 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarFeature
|
||||
{
|
||||
struct __attribute__((aligned(32)))
|
||||
{
|
||||
int p0 __attribute__((aligned(4)));
|
||||
int p1 __attribute__((aligned(4)));
|
||||
int p2 __attribute__((aligned(4)));
|
||||
int p3 __attribute__((aligned(4)));
|
||||
float weight __attribute__((aligned(4)));
|
||||
int p0 __attribute__((aligned(4)));
|
||||
int p1 __attribute__((aligned(4)));
|
||||
int p2 __attribute__((aligned(4)));
|
||||
int p3 __attribute__((aligned(4)));
|
||||
float weight __attribute__((aligned(4)));
|
||||
}
|
||||
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
|
||||
}
|
||||
@ -113,173 +114,168 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
global const int *restrict sum,
|
||||
global const float *restrict sqsum,
|
||||
global int4 *candidate,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const int step,
|
||||
const int loopcount,
|
||||
const int start_stage,
|
||||
const int split_stage,
|
||||
const int end_stage,
|
||||
const int startnode,
|
||||
const int splitnode,
|
||||
global int4 *p,
|
||||
//const int4 * pq,
|
||||
global float *correction,
|
||||
const int nodecount)
|
||||
{
|
||||
int grpszx = get_local_size(0);
|
||||
int grpszy = get_local_size(1);
|
||||
int grpnumx = get_num_groups(0);
|
||||
int grpidx = get_group_id(0);
|
||||
int lclidx = get_local_id(0);
|
||||
int lclidy = get_local_id(1);
|
||||
int lcl_sz = mul24(grpszx, grpszy);
|
||||
int lcl_id = mad24(lclidy, grpszx, lclidx);
|
||||
__local int lclshare[1024];
|
||||
__local int *glboutindex = lclshare + 0;
|
||||
__local int *lclcount = glboutindex + 1;
|
||||
__local int *lcloutindex = lclcount + 1;
|
||||
__local float *partialsum = (__local float *)(lcloutindex + (lcl_sz << 1));
|
||||
glboutindex[0] = 0;
|
||||
int outputoff = mul24(grpidx, 256);
|
||||
candidate[outputoff + (lcl_id << 2)] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 1] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 2] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 3] = (int4)0;
|
||||
int grpszx = get_local_size(0);
|
||||
int grpszy = get_local_size(1);
|
||||
int grpnumx = get_num_groups(0);
|
||||
int grpidx = get_group_id(0);
|
||||
int lclidx = get_local_id(0);
|
||||
int lclidy = get_local_id(1);
|
||||
int lcl_sz = mul24(grpszx, grpszy);
|
||||
int lcl_id = mad24(lclidy, grpszx, lclidx);
|
||||
__local int glboutindex[1];
|
||||
__local int lclcount[1];
|
||||
__local int lcloutindex[64];
|
||||
glboutindex[0] = 0;
|
||||
int outputoff = mul24(grpidx, 256);
|
||||
candidate[outputoff + (lcl_id << 2)] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 1] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 2] = (int4)0;
|
||||
candidate[outputoff + (lcl_id << 2) + 3] = (int4)0;
|
||||
int max_idx = rows * cols - 1;
|
||||
for (int scalei = 0; scalei < loopcount; scalei++)
|
||||
{
|
||||
int4 scaleinfo1;
|
||||
scaleinfo1 = info[scalei];
|
||||
int width = (scaleinfo1.x & 0xffff0000) >> 16;
|
||||
int height = scaleinfo1.x & 0xffff;
|
||||
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
|
||||
int totalgrp = scaleinfo1.y & 0xffff;
|
||||
float factor = as_float(scaleinfo1.w);
|
||||
float correction_t = correction[scalei];
|
||||
int ystep = (int)(max(2.0f, factor) + 0.5f);
|
||||
|
||||
for (int scalei = 0; scalei < loopcount; scalei++)
|
||||
for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx)
|
||||
{
|
||||
int4 scaleinfo1;
|
||||
scaleinfo1 = info[scalei];
|
||||
int width = (scaleinfo1.x & 0xffff0000) >> 16;
|
||||
int height = scaleinfo1.x & 0xffff;
|
||||
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
|
||||
int totalgrp = scaleinfo1.y & 0xffff;
|
||||
float factor = as_float(scaleinfo1.w);
|
||||
float correction_t = correction[scalei];
|
||||
int ystep = (int)(max(2.0f, factor) + 0.5f);
|
||||
int4 cascadeinfo = p[scalei];
|
||||
int grpidy = grploop / grpnumperline;
|
||||
int grpidx = grploop - mul24(grpidy, grpnumperline);
|
||||
int ix = mad24(grpidx, grpszx, lclidx);
|
||||
int iy = mad24(grpidy, grpszy, lclidy);
|
||||
int x = ix * ystep;
|
||||
int y = iy * ystep;
|
||||
lcloutindex[lcl_id] = 0;
|
||||
lclcount[0] = 0;
|
||||
int nodecounter;
|
||||
float mean, variance_norm_factor;
|
||||
//if((ix < width) && (iy < height))
|
||||
{
|
||||
const int p_offset = mad24(y, step, x);
|
||||
cascadeinfo.x += p_offset;
|
||||
cascadeinfo.z += p_offset;
|
||||
mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
|
||||
* correction_t;
|
||||
variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
|
||||
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
|
||||
bool result = true;
|
||||
nodecounter = startnode + nodecount * scalei;
|
||||
|
||||
for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx)
|
||||
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
|
||||
{
|
||||
int4 cascadeinfo = p[scalei];
|
||||
int grpidy = grploop / grpnumperline;
|
||||
int grpidx = grploop - mul24(grpidy, grpnumperline);
|
||||
int ix = mad24(grpidx, grpszx, lclidx);
|
||||
int iy = mad24(grpidy, grpszy, lclidy);
|
||||
int x = ix * ystep;
|
||||
int y = iy * ystep;
|
||||
lcloutindex[lcl_id] = 0;
|
||||
lclcount[0] = 0;
|
||||
int result = 1, nodecounter;
|
||||
float mean, variance_norm_factor;
|
||||
//if((ix < width) && (iy < height))
|
||||
{
|
||||
const int p_offset = mad24(y, step, x);
|
||||
cascadeinfo.x += p_offset;
|
||||
cascadeinfo.z += p_offset;
|
||||
mean = (sum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sum[mad24(cascadeinfo.y, step, cascadeinfo.z)] -
|
||||
sum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sum[mad24(cascadeinfo.w, step, cascadeinfo.z)])
|
||||
* correction_t;
|
||||
variance_norm_factor = sqsum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sqsum[mad24(cascadeinfo.y, step, cascadeinfo.z)] -
|
||||
sqsum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sqsum[mad24(cascadeinfo.w, step, cascadeinfo.z)];
|
||||
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
|
||||
result = 1;
|
||||
nodecounter = startnode + nodecount * scalei;
|
||||
|
||||
for (int stageloop = start_stage; stageloop < end_stage && result; stageloop++)
|
||||
{
|
||||
float stage_sum = 0.f;
|
||||
int4 stageinfo = *(global int4 *)(stagecascadeptr + stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
|
||||
for (int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++)
|
||||
{
|
||||
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
|
||||
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
info1.x += p_offset;
|
||||
info1.z += p_offset;
|
||||
info2.x += p_offset;
|
||||
info2.z += p_offset;
|
||||
float classsum = (sum[mad24(info1.y, step, info1.x)] - sum[mad24(info1.y, step, info1.z)] -
|
||||
sum[mad24(info1.w, step, info1.x)] + sum[mad24(info1.w, step, info1.z)]) * w.x;
|
||||
classsum += (sum[mad24(info2.y, step, info2.x)] - sum[mad24(info2.y, step, info2.z)] -
|
||||
sum[mad24(info2.w, step, info2.x)] + sum[mad24(info2.w, step, info2.z)]) * w.y;
|
||||
info3.x += p_offset;
|
||||
info3.z += p_offset;
|
||||
classsum += (sum[mad24(info3.y, step, info3.x)] - sum[mad24(info3.y, step, info3.z)] -
|
||||
sum[mad24(info3.w, step, info3.x)] + sum[mad24(info3.w, step, info3.z)]) * w.z;
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
nodecounter++;
|
||||
}
|
||||
|
||||
result = (stage_sum >= stagethreshold);
|
||||
}
|
||||
|
||||
if (result && (ix < width) && (iy < height))
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex << 1] = (y << 16) | x;
|
||||
lcloutindex[(queueindex << 1) + 1] = as_int(variance_norm_factor);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
nodecounter = splitnode + nodecount * scalei;
|
||||
|
||||
if (lcl_id < queuecount)
|
||||
{
|
||||
int temp = lcloutindex[lcl_id << 1];
|
||||
int x = temp & 0xffff;
|
||||
int y = (temp & (int)0xffff0000) >> 16;
|
||||
temp = glboutindex[0];
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f);
|
||||
candidate_result.x = x;
|
||||
candidate_result.y = y;
|
||||
atomic_inc(glboutindex);
|
||||
candidate[outputoff + temp + lcl_id] = candidate_result;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
float stage_sum = 0.f;
|
||||
int stagecount = stagecascadeptr[stageloop].count;
|
||||
for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
|
||||
{
|
||||
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
|
||||
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
info1.x += p_offset;
|
||||
info1.z += p_offset;
|
||||
info2.x += p_offset;
|
||||
info2.z += p_offset;
|
||||
float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
|
||||
classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
|
||||
info3.x += p_offset;
|
||||
info3.z += p_offset;
|
||||
classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
nodecounter++;
|
||||
}
|
||||
result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (result && (ix < width) && (iy < height))
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex] = (y << 16) | x;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
|
||||
if (lcl_id < queuecount)
|
||||
{
|
||||
int temp = lcloutindex[lcl_id];
|
||||
int x = temp & 0xffff;
|
||||
int y = (temp & (int)0xffff0000) >> 16;
|
||||
temp = atomic_inc(glboutindex);
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f);
|
||||
candidate_result.x = x;
|
||||
candidate_result.y = y;
|
||||
candidate[outputoff + temp + lcl_id] = candidate_result;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum)
|
||||
{
|
||||
int counter = get_global_id(0);
|
||||
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
|
||||
GpuHidHaarTreeNode t1 = *(orinode + counter);
|
||||
int counter = get_global_id(0);
|
||||
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
|
||||
GpuHidHaarTreeNode t1 = *(orinode + counter);
|
||||
#pragma unroll
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
|
||||
tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f);
|
||||
tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f);
|
||||
tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f);
|
||||
}
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
|
||||
tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f);
|
||||
tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f);
|
||||
tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f);
|
||||
}
|
||||
|
||||
t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]);
|
||||
counter += nodenum;
|
||||
t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]);
|
||||
counter += nodenum;
|
||||
#pragma unroll
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
newnode[counter].p[i][0] = tr_x[i];
|
||||
newnode[counter].p[i][1] = tr_y[i];
|
||||
newnode[counter].p[i][2] = tr_x[i] + tr_w[i];
|
||||
newnode[counter].p[i][3] = tr_y[i] + tr_h[i];
|
||||
newnode[counter].weight[i] = t1.weight[i] * weight_scale;
|
||||
}
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
newnode[counter].p[i][0] = tr_x[i];
|
||||
newnode[counter].p[i][1] = tr_y[i];
|
||||
newnode[counter].p[i][2] = tr_x[i] + tr_w[i];
|
||||
newnode[counter].p[i][3] = tr_y[i] + tr_h[i];
|
||||
newnode[counter].weight[i] = t1.weight[i] * weight_scale;
|
||||
}
|
||||
|
||||
newnode[counter].left = t1.left;
|
||||
newnode[counter].right = t1.right;
|
||||
newnode[counter].threshold = t1.threshold;
|
||||
newnode[counter].alpha[0] = t1.alpha[0];
|
||||
newnode[counter].alpha[1] = t1.alpha[1];
|
||||
newnode[counter].left = t1.left;
|
||||
newnode[counter].right = t1.right;
|
||||
newnode[counter].threshold = t1.threshold;
|
||||
newnode[counter].alpha[0] = t1.alpha[0];
|
||||
newnode[counter].alpha[1] = t1.alpha[1];
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,48 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
@ -609,22 +654,33 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/4, TILE_SIZE);
|
||||
src_step /= sizeof(*src_data);
|
||||
int rstep = min(src_step, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols -x);
|
||||
if(tileSize_width < TILE_SIZE)
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0;
|
||||
int maxIdx = mul24(src_rows, src_cols);
|
||||
int yOff = (y+lidy)*src_step;
|
||||
int index;
|
||||
if(tileSize_width < TILE_SIZE && yOff < src_rows)
|
||||
for(int i = tileSize_width; i < rstep && (yOff+x+i) < maxIdx; i++ )
|
||||
*(src_data+yOff+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
{
|
||||
#pragma unroll
|
||||
for(int j=0; j<4; j++)
|
||||
tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1);
|
||||
{
|
||||
index = yOff+(x+i+j)*kcn+coi-1;
|
||||
if (index < maxIdx)
|
||||
tmp_coi[j] = *(src_data+index);
|
||||
else
|
||||
tmp_coi[j] = 0;
|
||||
}
|
||||
tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3));
|
||||
for(int i=0; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
|
||||
tmp[i/VLEN_F] = (*(__global float4 *)(src_data+yOff+x+i));
|
||||
float4 zero = (float4)(0);
|
||||
float4 full = (float4)(255);
|
||||
if( binary )
|
||||
@ -714,35 +770,59 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
|
||||
// accumulate moments computed in each tile
|
||||
dst_step /= sizeof(F);
|
||||
|
||||
int dst_x_off = mad24(wgidy, dst_cols, wgidx);
|
||||
int dst_off = 0;
|
||||
int max_dst_index = 10 * blocky * get_global_size(1);
|
||||
|
||||
// + m00 ( = m00' )
|
||||
*(dst_m + mad24(DST_ROW_00 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[0];
|
||||
dst_off = mad24(DST_ROW_00 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_10 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[1] + xm;
|
||||
dst_off = mad24(DST_ROW_10 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_01 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[2] + ym;
|
||||
dst_off = mad24(DST_ROW_01 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_20 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[3] + x * (mom[1] * 2 + xm);
|
||||
dst_off = mad24(DST_ROW_20 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_11 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
dst_off = mad24(DST_ROW_11 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_02 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[5] + y * (mom[2] * 2 + ym);
|
||||
dst_off = mad24(DST_ROW_02 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_30 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
dst_off = mad24(DST_ROW_30 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
*(dst_m + mad24(DST_ROW_21 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
dst_off = mad24(DST_ROW_21 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
*(dst_m + mad24(DST_ROW_12 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
dst_off = mad24(DST_ROW_12 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
dst_off = mad24(DST_ROW_03 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -50,59 +52,40 @@
|
||||
#define STEREO_MIND 0 // The minimum d range to check
|
||||
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
|
||||
|
||||
int SQ(int a)
|
||||
{
|
||||
return a * a;
|
||||
}
|
||||
#ifndef radius
|
||||
#define radius 64
|
||||
#endif
|
||||
|
||||
unsigned int CalcSSD(volatile __local unsigned int *col_ssd_cache,
|
||||
volatile __local unsigned int *col_ssd, int radius)
|
||||
unsigned int CalcSSD(__local unsigned int *col_ssd)
|
||||
{
|
||||
unsigned int cache = 0;
|
||||
unsigned int cache2 = 0;
|
||||
unsigned int cache = col_ssd[0];
|
||||
|
||||
for(int i = 1; i <= radius; i++)
|
||||
#pragma unroll
|
||||
for(int i = 1; i <= (radius << 1); i++)
|
||||
cache += col_ssd[i];
|
||||
|
||||
col_ssd_cache[0] = cache;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < BLOCK_W - radius)
|
||||
cache2 = col_ssd_cache[radius];
|
||||
else
|
||||
for(int i = radius + 1; i < (2 * radius + 1); i++)
|
||||
cache2 += col_ssd[i];
|
||||
|
||||
return col_ssd[0] + cache + cache2;
|
||||
return cache;
|
||||
}
|
||||
|
||||
uint2 MinSSD(volatile __local unsigned int *col_ssd_cache,
|
||||
volatile __local unsigned int *col_ssd, int radius)
|
||||
uint2 MinSSD(__local unsigned int *col_ssd)
|
||||
{
|
||||
unsigned int ssd[N_DISPARITIES];
|
||||
const int win_size = (radius << 1);
|
||||
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
|
||||
ssd[0] = CalcSSD(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[1] = CalcSSD(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[2] = CalcSSD(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[3] = CalcSSD(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[4] = CalcSSD(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[5] = CalcSSD(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[6] = CalcSSD(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
ssd[7] = CalcSSD(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * radius), radius);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
|
||||
ssd[0] = CalcSSD(col_ssd + 0 * (BLOCK_W + win_size));
|
||||
ssd[1] = CalcSSD(col_ssd + 1 * (BLOCK_W + win_size));
|
||||
ssd[2] = CalcSSD(col_ssd + 2 * (BLOCK_W + win_size));
|
||||
ssd[3] = CalcSSD(col_ssd + 3 * (BLOCK_W + win_size));
|
||||
ssd[4] = CalcSSD(col_ssd + 4 * (BLOCK_W + win_size));
|
||||
ssd[5] = CalcSSD(col_ssd + 5 * (BLOCK_W + win_size));
|
||||
ssd[6] = CalcSSD(col_ssd + 6 * (BLOCK_W + win_size));
|
||||
ssd[7] = CalcSSD(col_ssd + 7 * (BLOCK_W + win_size));
|
||||
|
||||
unsigned int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
|
||||
|
||||
int bestIdx = 0;
|
||||
|
||||
for (int i = 0; i < N_DISPARITIES; i++)
|
||||
{
|
||||
if (mssd == ssd[i])
|
||||
@ -113,124 +96,66 @@ uint2 MinSSD(volatile __local unsigned int *col_ssd_cache,
|
||||
}
|
||||
|
||||
void StepDown(int idx1, int idx2, __global unsigned char* imageL,
|
||||
__global unsigned char* imageR, int d, volatile __local unsigned int *col_ssd, int radius)
|
||||
__global unsigned char* imageR, int d, __local unsigned int *col_ssd)
|
||||
{
|
||||
unsigned char leftPixel1;
|
||||
unsigned char leftPixel2;
|
||||
unsigned char rightPixel1[8];
|
||||
unsigned char rightPixel2[8];
|
||||
unsigned int diff1, diff2;
|
||||
|
||||
leftPixel1 = imageL[idx1];
|
||||
leftPixel2 = imageL[idx2];
|
||||
|
||||
idx1 = idx1 - d;
|
||||
idx2 = idx2 - d;
|
||||
|
||||
rightPixel1[7] = imageR[idx1 - 7];
|
||||
rightPixel1[0] = imageR[idx1 - 0];
|
||||
rightPixel1[1] = imageR[idx1 - 1];
|
||||
rightPixel1[2] = imageR[idx1 - 2];
|
||||
rightPixel1[3] = imageR[idx1 - 3];
|
||||
rightPixel1[4] = imageR[idx1 - 4];
|
||||
rightPixel1[5] = imageR[idx1 - 5];
|
||||
rightPixel1[6] = imageR[idx1 - 6];
|
||||
|
||||
rightPixel2[7] = imageR[idx2 - 7];
|
||||
rightPixel2[0] = imageR[idx2 - 0];
|
||||
rightPixel2[1] = imageR[idx2 - 1];
|
||||
rightPixel2[2] = imageR[idx2 - 2];
|
||||
rightPixel2[3] = imageR[idx2 - 3];
|
||||
rightPixel2[4] = imageR[idx2 - 4];
|
||||
rightPixel2[5] = imageR[idx2 - 5];
|
||||
rightPixel2[6] = imageR[idx2 - 6];
|
||||
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
|
||||
diff1 = leftPixel1 - rightPixel1[0];
|
||||
diff2 = leftPixel2 - rightPixel2[0];
|
||||
col_ssd[0 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[1];
|
||||
diff2 = leftPixel2 - rightPixel2[1];
|
||||
col_ssd[1 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[2];
|
||||
diff2 = leftPixel2 - rightPixel2[2];
|
||||
col_ssd[2 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[3];
|
||||
diff2 = leftPixel2 - rightPixel2[3];
|
||||
col_ssd[3 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[4];
|
||||
diff2 = leftPixel2 - rightPixel2[4];
|
||||
col_ssd[4 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[5];
|
||||
diff2 = leftPixel2 - rightPixel2[5];
|
||||
col_ssd[5 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[6];
|
||||
diff2 = leftPixel2 - rightPixel2[6];
|
||||
col_ssd[6 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[7];
|
||||
diff2 = leftPixel2 - rightPixel2[7];
|
||||
col_ssd[7 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
|
||||
uint8 imgR1 = convert_uint8(vload8(0, imageR + (idx1 - d - 7)));
|
||||
uint8 imgR2 = convert_uint8(vload8(0, imageR + (idx2 - d - 7)));
|
||||
uint8 diff1 = (uint8)(imageL[idx1]) - imgR1;
|
||||
uint8 diff2 = (uint8)(imageL[idx2]) - imgR2;
|
||||
uint8 res = diff2 * diff2 - diff1 * diff1;
|
||||
const int win_size = (radius << 1);
|
||||
col_ssd[0 * (BLOCK_W + win_size)] += res.s7;
|
||||
col_ssd[1 * (BLOCK_W + win_size)] += res.s6;
|
||||
col_ssd[2 * (BLOCK_W + win_size)] += res.s5;
|
||||
col_ssd[3 * (BLOCK_W + win_size)] += res.s4;
|
||||
col_ssd[4 * (BLOCK_W + win_size)] += res.s3;
|
||||
col_ssd[5 * (BLOCK_W + win_size)] += res.s2;
|
||||
col_ssd[6 * (BLOCK_W + win_size)] += res.s1;
|
||||
col_ssd[7 * (BLOCK_W + win_size)] += res.s0;
|
||||
}
|
||||
|
||||
void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
|
||||
__global unsigned char* imageR, int d,
|
||||
volatile __local unsigned int *col_ssd, int radius)
|
||||
__local unsigned int *col_ssd)
|
||||
{
|
||||
unsigned char leftPixel1;
|
||||
int idx;
|
||||
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
for(int i = 0; i < (2 * radius + 1); i++)
|
||||
uint8 leftPixel1;
|
||||
uint8 diffa = 0;
|
||||
int idx = y_tex * im_pitch + x_tex;
|
||||
const int win_size = (radius << 1);
|
||||
for(int i = 0; i < (win_size + 1); i++)
|
||||
{
|
||||
idx = y_tex * im_pitch + x_tex;
|
||||
leftPixel1 = imageL[idx];
|
||||
idx = idx - d;
|
||||
leftPixel1 = (uint8)(imageL[idx]);
|
||||
uint8 imgR = convert_uint8(vload8(0, imageR + (idx - d - 7)));
|
||||
uint8 res = leftPixel1 - imgR;
|
||||
diffa += res * res;
|
||||
|
||||
diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);
|
||||
diffa[1] += SQ(leftPixel1 - imageR[idx - 1]);
|
||||
diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
|
||||
diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
|
||||
diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
|
||||
diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
|
||||
diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
|
||||
diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
|
||||
|
||||
y_tex += 1;
|
||||
idx += im_pitch;
|
||||
}
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
|
||||
col_ssd[0 * (BLOCK_W + 2 * radius)] = diffa[0];
|
||||
col_ssd[1 * (BLOCK_W + 2 * radius)] = diffa[1];
|
||||
col_ssd[2 * (BLOCK_W + 2 * radius)] = diffa[2];
|
||||
col_ssd[3 * (BLOCK_W + 2 * radius)] = diffa[3];
|
||||
col_ssd[4 * (BLOCK_W + 2 * radius)] = diffa[4];
|
||||
col_ssd[5 * (BLOCK_W + 2 * radius)] = diffa[5];
|
||||
col_ssd[6 * (BLOCK_W + 2 * radius)] = diffa[6];
|
||||
col_ssd[7 * (BLOCK_W + 2 * radius)] = diffa[7];
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
|
||||
col_ssd[0 * (BLOCK_W + win_size)] = diffa.s7;
|
||||
col_ssd[1 * (BLOCK_W + win_size)] = diffa.s6;
|
||||
col_ssd[2 * (BLOCK_W + win_size)] = diffa.s5;
|
||||
col_ssd[3 * (BLOCK_W + win_size)] = diffa.s4;
|
||||
col_ssd[4 * (BLOCK_W + win_size)] = diffa.s3;
|
||||
col_ssd[5 * (BLOCK_W + win_size)] = diffa.s2;
|
||||
col_ssd[6 * (BLOCK_W + win_size)] = diffa.s1;
|
||||
col_ssd[7 * (BLOCK_W + win_size)] = diffa.s0;
|
||||
}
|
||||
|
||||
__kernel void stereoKernel(__global unsigned char *left, __global unsigned char *right,
|
||||
__global unsigned int *cminSSDImage, int cminSSD_step,
|
||||
__global unsigned char *disp, int disp_step,int cwidth, int cheight,
|
||||
int img_step, int maxdisp, int radius,
|
||||
int img_step, int maxdisp,
|
||||
__local unsigned int *col_ssd_cache)
|
||||
{
|
||||
|
||||
volatile __local unsigned int *col_ssd = col_ssd_cache + BLOCK_W + get_local_id(0);
|
||||
volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;
|
||||
__local unsigned int *col_ssd = col_ssd_cache + get_local_id(0);
|
||||
__local unsigned int *col_ssd_extra = get_local_id(0) < (radius << 1) ? col_ssd + BLOCK_W : 0;
|
||||
|
||||
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
|
||||
// int Y = get_group_id(1) * ROWSperTHREAD + radius;
|
||||
|
||||
#define Y (get_group_id(1) * ROWSperTHREAD + radius)
|
||||
|
||||
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
|
||||
__global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
|
||||
__global unsigned char* disparImage = disp + X + Y * disp_step;
|
||||
|
||||
int end_row = ROWSperTHREAD < (cheight - Y) ? ROWSperTHREAD:(cheight - Y);
|
||||
@ -244,14 +169,14 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
|
||||
{
|
||||
y_tex = Y - radius;
|
||||
|
||||
InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd, radius);
|
||||
InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd);
|
||||
if (col_ssd_extra > 0)
|
||||
if (x_tex + BLOCK_W < cwidth)
|
||||
InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra, radius);
|
||||
InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
|
||||
|
||||
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
|
||||
uint2 minSSD = MinSSD(col_ssd);
|
||||
if (X < cwidth - radius && Y < cheight - radius)
|
||||
{
|
||||
if (minSSD.x < minSSDImage[0])
|
||||
@ -264,21 +189,18 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
|
||||
for(int row = 1; row < end_row; row++)
|
||||
{
|
||||
int idx1 = y_tex * img_step + x_tex;
|
||||
int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex;
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
StepDown(idx1, idx2, left, right, d, col_ssd, radius);
|
||||
StepDown(idx1, idx2, left, right, d, col_ssd);
|
||||
if (col_ssd_extra > 0)
|
||||
if (x_tex + BLOCK_W < cwidth)
|
||||
StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra, radius);
|
||||
|
||||
y_tex += 1;
|
||||
StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
|
||||
uint2 minSSD = MinSSD(col_ssd);
|
||||
if (X < cwidth - radius && row < cheight - radius - Y)
|
||||
{
|
||||
int idx = row * cminSSD_step;
|
||||
@ -288,10 +210,11 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
|
||||
minSSDImage[idx] = minSSD.x;
|
||||
}
|
||||
}
|
||||
|
||||
y_tex++;
|
||||
} // for row loop
|
||||
} // for d loop
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////// Sobel Prefiler (signal channel)//////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
1402
modules/ocl/src/opencl/stereocsbp.cl
Normal file
1402
modules/ocl/src/opencl/stereocsbp.cl
Normal file
File diff suppressed because it is too large
Load Diff
763
modules/ocl/src/stereo_csbp.cpp
Normal file
763
modules/ocl/src/stereo_csbp.cpp
Normal file
@ -0,0 +1,763 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int &, int &, int &, int &)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float,
|
||||
float, float, int, int)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
|
||||
void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &, const oclMat &, oclMat &)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !defined (HAVE_OPENCL) */
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *stereocsbp;
|
||||
}
|
||||
|
||||
}
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
namespace stereoCSBP
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////common////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
static inline int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
static string get_kernel_name(string kernel_name, int data_type)
|
||||
{
|
||||
stringstream idxStr;
|
||||
if(data_type == CV_16S)
|
||||
idxStr << "0";
|
||||
else
|
||||
idxStr << "1";
|
||||
kernel_name += idxStr.str();
|
||||
|
||||
return kernel_name;
|
||||
}
|
||||
using cv::ocl::StereoConstantSpaceBP;
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////init_data_cost//////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
static void init_data_cost_caller(const oclMat &left, const oclMat &right, oclMat &temp,
|
||||
StereoConstantSpaceBP &rthis,
|
||||
int msg_step, int h, int w, int level)
|
||||
{
|
||||
Context *clCxt = left.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
int channels = left.oclchannels();
|
||||
|
||||
string kernelName = get_kernel_name("init_data_cost_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8 ,1};
|
||||
size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int cdisp_step1 = msg_step * h;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&right.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&level));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&channels));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&rthis.data_weight));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&cdisp_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
|
||||
static void init_data_cost_reduce_caller(const oclMat &left, const oclMat &right, oclMat &temp,
|
||||
StereoConstantSpaceBP &rthis,
|
||||
int msg_step, int h, int w, int level)
|
||||
{
|
||||
|
||||
Context *clCxt = left.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
int channels = left.oclchannels();
|
||||
int win_size = (int)std::pow(2.f, level);
|
||||
|
||||
string kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
const int threadsNum = 256;
|
||||
//size_t blockSize = threadsNum;
|
||||
size_t localThreads[3] = {win_size, 1, threadsNum / win_size};
|
||||
size_t globalThreads[3] = {w *localThreads[0],
|
||||
h * divUp(rthis.ndisp, localThreads[2]) *localThreads[1], 1 * localThreads[2]
|
||||
};
|
||||
|
||||
int local_mem_size = threadsNum * sizeof(float);
|
||||
int cdisp_step1 = msg_step * h;
|
||||
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&right.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, local_mem_size, (void *)NULL));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&level));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&win_size));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&channels));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&left.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_float), (void *)&rthis.data_weight));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
|
||||
static void get_first_initial_local_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
|
||||
oclMat &temp, StereoConstantSpaceBP &rthis,
|
||||
int h, int w, int nr_plane, int msg_step)
|
||||
{
|
||||
Context *clCxt = temp.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8 ,1};
|
||||
size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int disp_step = msg_step * h;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
|
||||
oclMat &temp, StereoConstantSpaceBP &rthis,
|
||||
int h, int w, int nr_plane, int msg_step)
|
||||
{
|
||||
Context *clCxt = temp.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8, 1};
|
||||
size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int disp_step = msg_step * h;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
|
||||
static void init_data_cost(const oclMat &left, const oclMat &right, oclMat &temp, StereoConstantSpaceBP &rthis,
|
||||
uchar *disp_selected_pyr, uchar *data_cost_selected,
|
||||
size_t msg_step, int h, int w, int level, int nr_plane)
|
||||
{
|
||||
|
||||
if(level <= 1)
|
||||
init_data_cost_caller(left, right, temp, rthis, msg_step, h, w, level);
|
||||
else
|
||||
init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level);
|
||||
|
||||
if(rthis.use_local_init_data_cost == true)
|
||||
{
|
||||
get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step);
|
||||
}
|
||||
else
|
||||
{
|
||||
get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w,
|
||||
nr_plane, msg_step);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////compute_data_cost//////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static void compute_data_cost_caller(uchar *disp_selected_pyr, uchar *data_cost,
|
||||
StereoConstantSpaceBP &rthis, int msg_step1,
|
||||
int msg_step2, const oclMat &left, const oclMat &right, int h,
|
||||
int w, int h2, int level, int nr_plane)
|
||||
{
|
||||
Context *clCxt = left.clCxt;
|
||||
int channels = left.oclchannels();
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("compute_data_cost_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8, 1};
|
||||
size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int disp_step1 = msg_step1 * h;
|
||||
int disp_step2 = msg_step2 * h2;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_cost));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&right.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&level));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&channels));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&msg_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&msg_step2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&disp_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.data_weight));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
|
||||
StereoConstantSpaceBP &rthis, int msg_step1,
|
||||
int msg_step2, const oclMat &left, const oclMat &right, int h,
|
||||
int w, int h2, int level, int nr_plane)
|
||||
{
|
||||
Context *clCxt = left.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
int channels = left.oclchannels();
|
||||
int win_size = (int)std::pow(2.f, level);
|
||||
|
||||
string kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
const size_t threadsNum = 256;
|
||||
//size_t blockSize = threadsNum;
|
||||
size_t localThreads[3] = {win_size, 1, threadsNum / win_size};
|
||||
size_t globalThreads[3] = {w *localThreads[0],
|
||||
h * divUp(nr_plane, localThreads[2]) *localThreads[1], 1 * localThreads[2]
|
||||
};
|
||||
|
||||
int disp_step1 = msg_step1 * h;
|
||||
int disp_step2 = msg_step2 * h2;
|
||||
size_t local_mem_size = threadsNum * sizeof(float);
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_cost));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&right.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, local_mem_size, (void *)NULL));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&level));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&channels));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&win_size));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&msg_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&disp_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&disp_step2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_float), (void *)&rthis.data_weight));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
|
||||
int msg_step1, int msg_step2, const oclMat &left, const oclMat &right, int h, int w,
|
||||
int h2, int level, int nr_plane)
|
||||
{
|
||||
if(level <= 1)
|
||||
compute_data_cost_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
|
||||
left, right, h, w, h2, level, nr_plane);
|
||||
else
|
||||
compute_data_cost_reduce_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
|
||||
left, right, h, w, h2, level, nr_plane);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////init message//////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static void init_message(uchar *u_new, uchar *d_new, uchar *l_new, uchar *r_new,
|
||||
uchar *u_cur, uchar *d_cur, uchar *l_cur, uchar *r_cur,
|
||||
uchar *disp_selected_pyr_new, uchar *disp_selected_pyr_cur,
|
||||
uchar *data_cost_selected, uchar *data_cost, oclMat &temp, StereoConstantSpaceBP rthis,
|
||||
size_t msg_step1, size_t msg_step2, int h, int w, int nr_plane,
|
||||
int h2, int w2, int nr_plane2)
|
||||
{
|
||||
Context *clCxt = temp.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("init_message_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8, 1};
|
||||
size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int disp_step1 = msg_step1 * h;
|
||||
int disp_step2 = msg_step2 * h2;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u_new));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_new));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l_new));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r_new));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&u_cur));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_cur));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&l_cur));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *)&r_cur));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *)&disp_selected_pyr_new));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *)&disp_selected_pyr_cur));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *)&data_cost_selected));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *)&data_cost));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&h2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_int), (void *)&w2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&nr_plane2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&disp_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////calc_all_iterations////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static void calc_all_iterations_caller(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
|
||||
uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
|
||||
int msg_step, int h, int w, int nr_plane, int i)
|
||||
{
|
||||
Context *clCxt = temp.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("compute_message_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
size_t localThreads[] = {32, 8, 1};
|
||||
size_t globalThreads[] = {divUp(w, (localThreads[0]) << 1) *localThreads[0],
|
||||
divUp(h, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int disp_step = msg_step * h;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&data_cost_selected));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&temp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&h));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&w));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&i));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_float), (void *)&rthis.max_disc_term));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
|
||||
uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
|
||||
int msg_step, int h, int w, int nr_plane)
|
||||
{
|
||||
for(int t = 0; t < rthis.iters; t++)
|
||||
calc_all_iterations_caller(u, d, l, r, data_cost_selected, disp_selected_pyr, temp, rthis,
|
||||
msg_step, h, w, nr_plane, t & 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////compute_disp////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static void compute_disp(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
|
||||
uchar *disp_selected_pyr, StereoConstantSpaceBP &rthis, size_t msg_step,
|
||||
oclMat &disp, int nr_plane)
|
||||
{
|
||||
Context *clCxt = disp.clCxt;
|
||||
int data_type = rthis.msg_type;
|
||||
|
||||
string kernelName = get_kernel_name("compute_disp_", data_type);
|
||||
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
|
||||
|
||||
//size_t blockSize = 256;
|
||||
size_t localThreads[] = {32, 8, 1};
|
||||
size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0],
|
||||
divUp(disp.rows, localThreads[1]) *localThreads[1],
|
||||
1
|
||||
};
|
||||
|
||||
int step_size = disp.step / disp.elemSize();
|
||||
int disp_step = disp.rows * msg_step;
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&data_cost_selected));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&disp_selected_pyr));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&disp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&step_size));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&disp.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&disp.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_MAX_DATA_TERM = 30.0f;
|
||||
const float DEFAULT_DATA_WEIGHT = 1.0f;
|
||||
const float DEFAULT_MAX_DISC_TERM = 160.0f;
|
||||
const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
|
||||
}
|
||||
|
||||
void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane)
|
||||
{
|
||||
ndisp = (int) ((float) width / 3.14f);
|
||||
if ((ndisp & 1) != 0)
|
||||
ndisp++;
|
||||
|
||||
int mm = ::max(width, height);
|
||||
iters = mm / 100 + ((mm > 1200) ? - 4 : 4);
|
||||
|
||||
levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
|
||||
if (levels == 0) levels++;
|
||||
|
||||
nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
|
||||
}
|
||||
|
||||
cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
|
||||
int msg_type_)
|
||||
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
|
||||
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
|
||||
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
|
||||
msg_type(msg_type_), use_local_init_data_cost(true)
|
||||
{
|
||||
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
|
||||
}
|
||||
|
||||
|
||||
cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
|
||||
float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
|
||||
int min_disp_th_, int msg_type_)
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
|
||||
max_data_term(max_data_term_), data_weight(data_weight_),
|
||||
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
|
||||
msg_type(msg_type_), use_local_init_data_cost(true)
|
||||
{
|
||||
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static void csbp_operator(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
|
||||
oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
|
||||
oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp)
|
||||
{
|
||||
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
|
||||
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
|
||||
|
||||
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3));
|
||||
|
||||
const Scalar zero = Scalar::all(0);
|
||||
|
||||
////////////////////////////////////Init///////////////////////////////////////////////////
|
||||
int rows = left.rows;
|
||||
int cols = left.cols;
|
||||
|
||||
rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
|
||||
int levels = rthis.levels;
|
||||
|
||||
AutoBuffer<int> buf(levels * 4);
|
||||
|
||||
int *cols_pyr = buf;
|
||||
int *rows_pyr = cols_pyr + levels;
|
||||
int *nr_plane_pyr = rows_pyr + levels;
|
||||
int *step_pyr = nr_plane_pyr + levels;
|
||||
|
||||
cols_pyr[0] = cols;
|
||||
rows_pyr[0] = rows;
|
||||
nr_plane_pyr[0] = rthis.nr_plane;
|
||||
|
||||
const int n = 64;
|
||||
step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T);
|
||||
for (int i = 1; i < levels; i++)
|
||||
{
|
||||
cols_pyr[i] = cols_pyr[i - 1] / 2;
|
||||
rows_pyr[i] = rows_pyr[i - 1]/ 2;
|
||||
|
||||
nr_plane_pyr[i] = nr_plane_pyr[i - 1] * 2;
|
||||
|
||||
step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T);
|
||||
}
|
||||
|
||||
Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]);
|
||||
Size data_cost_size(step_pyr[0], rows * nr_plane_pyr[0] * 2);
|
||||
|
||||
u[0].create(msg_size, DataType<T>::type);
|
||||
d[0].create(msg_size, DataType<T>::type);
|
||||
l[0].create(msg_size, DataType<T>::type);
|
||||
r[0].create(msg_size, DataType<T>::type);
|
||||
|
||||
u[1].create(msg_size, DataType<T>::type);
|
||||
d[1].create(msg_size, DataType<T>::type);
|
||||
l[1].create(msg_size, DataType<T>::type);
|
||||
r[1].create(msg_size, DataType<T>::type);
|
||||
|
||||
disp_selected_pyr[0].create(msg_size, DataType<T>::type);
|
||||
disp_selected_pyr[1].create(msg_size, DataType<T>::type);
|
||||
|
||||
data_cost.create(data_cost_size, DataType<T>::type);
|
||||
data_cost_selected.create(msg_size, DataType<T>::type);
|
||||
|
||||
Size temp_size = data_cost_size;
|
||||
if (data_cost_size.width * data_cost_size.height < step_pyr[0] * rows_pyr[levels - 1] * rthis.ndisp)
|
||||
temp_size = Size(step_pyr[0], rows_pyr[levels - 1] * rthis.ndisp);
|
||||
|
||||
temp.create(temp_size, DataType<T>::type);
|
||||
temp = zero;
|
||||
|
||||
///////////////////////////////// Compute////////////////////////////////////////////////
|
||||
|
||||
//csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
|
||||
// rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
||||
|
||||
l[0] = zero;
|
||||
d[0] = zero;
|
||||
r[0] = zero;
|
||||
u[0] = zero;
|
||||
disp_selected_pyr[0] = zero;
|
||||
|
||||
l[1] = zero;
|
||||
d[1] = zero;
|
||||
r[1] = zero;
|
||||
u[1] = zero;
|
||||
disp_selected_pyr[1] = zero;
|
||||
|
||||
data_cost = zero;
|
||||
|
||||
data_cost_selected = zero;
|
||||
|
||||
int cur_idx = 0;
|
||||
|
||||
for (int i = levels - 1; i >= 0; i--)
|
||||
{
|
||||
if (i == levels - 1)
|
||||
{
|
||||
cv::ocl::stereoCSBP::init_data_cost(left, right, temp, rthis, disp_selected_pyr[cur_idx].data,
|
||||
data_cost_selected.data, step_pyr[0], rows_pyr[i], cols_pyr[i],
|
||||
i, nr_plane_pyr[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::ocl::stereoCSBP::compute_data_cost(
|
||||
disp_selected_pyr[cur_idx].data, data_cost.data, rthis, step_pyr[0],
|
||||
step_pyr[0], left, right, rows_pyr[i], cols_pyr[i], rows_pyr[i + 1], i,
|
||||
nr_plane_pyr[i + 1]);
|
||||
|
||||
int new_idx = (cur_idx + 1) & 1;
|
||||
|
||||
cv::ocl::stereoCSBP::init_message(u[new_idx].data, d[new_idx].data, l[new_idx].data, r[new_idx].data,
|
||||
u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
|
||||
disp_selected_pyr[new_idx].data, disp_selected_pyr[cur_idx].data,
|
||||
data_cost_selected.data, data_cost.data, temp, rthis, step_pyr[0],
|
||||
step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i + 1],
|
||||
cols_pyr[i + 1], nr_plane_pyr[i + 1]);
|
||||
cur_idx = new_idx;
|
||||
}
|
||||
cv::ocl::stereoCSBP::calc_all_iterations(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
|
||||
data_cost_selected.data, disp_selected_pyr[cur_idx].data, temp,
|
||||
rthis, step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i]);
|
||||
}
|
||||
|
||||
if (disp.empty())
|
||||
disp.create(rows, cols, CV_16S);
|
||||
|
||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||
out = zero;
|
||||
|
||||
stereoCSBP::compute_disp(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
|
||||
data_cost_selected.data, disp_selected_pyr[cur_idx].data, rthis, step_pyr[0],
|
||||
out, nr_plane_pyr[0]);
|
||||
if (disp.type() != CV_16S)
|
||||
out.convertTo(disp, disp.type());
|
||||
}
|
||||
|
||||
|
||||
typedef void (*csbp_operator_t)(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
|
||||
oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
|
||||
oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp);
|
||||
|
||||
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
|
||||
|
||||
void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat &right, oclMat &disp)
|
||||
{
|
||||
|
||||
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
|
||||
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out,
|
||||
left, right, disp);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_OPENCL) */
|
@ -74,28 +74,21 @@ namespace stereoBM
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
|
||||
{
|
||||
Context *clCxt = input.clCxt;
|
||||
|
||||
string kernelName = "prefilter_xsobel";
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
|
||||
|
||||
size_t blockSize = 1;
|
||||
size_t globalThreads[3] = { input.cols, input.rows, 1 };
|
||||
size_t localThreads[3] = { blockSize, blockSize, 1 };
|
||||
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&input.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&output.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&input.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));
|
||||
|
||||
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish((cl_command_queue)clCxt->oclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.rows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.cols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&prefilterCap));
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
|
||||
globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////common////////////////////////////////////
|
||||
@ -115,19 +108,13 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
|
||||
{
|
||||
int winsz2 = winSize >> 1;
|
||||
|
||||
//if(winsz2 == 0 || winsz2 >= calles_num)
|
||||
//cv::ocl:error("Unsupported window size", __FILE__, __LINE__, __FUNCTION__);
|
||||
|
||||
Context *clCxt = left.clCxt;
|
||||
|
||||
string kernelName = "stereoKernel";
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
|
||||
|
||||
disp.setTo(Scalar_<unsigned char>::all(0));
|
||||
minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
|
||||
|
||||
size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize();
|
||||
size_t local_mem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
|
||||
size_t local_mem_size = (N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
|
||||
sizeof(cl_uint);
|
||||
//size_t blockSize = 1;
|
||||
size_t localThreads[] = { BLOCK_W, 1,1};
|
||||
@ -136,26 +123,23 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
|
||||
1
|
||||
};
|
||||
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&right.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&minSSD_buf.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&minssd_step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&disp.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&disp.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&left.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&maxdisp));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&right.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&minSSD_buf.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&minssd_step));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disp.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&disp.step));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.step));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&maxdisp));
|
||||
args.push_back(std::make_pair(local_mem_size, (void *)NULL));
|
||||
|
||||
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
|
||||
clFinish((cl_command_queue)clCxt->oclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
char opt [128];
|
||||
sprintf(opt, "-D radius=%d", winsz2);
|
||||
openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
|
||||
globalThreads, localThreads, args, -1, -1, opt);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////postfilter_textureness///////////////////////
|
||||
@ -163,10 +147,7 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
|
||||
static void postfilter_textureness(oclMat &left, int winSize,
|
||||
float avergeTexThreshold, oclMat &disparity)
|
||||
{
|
||||
Context *clCxt = left.clCxt;
|
||||
|
||||
string kernelName = "textureness_kernel";
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
|
||||
|
||||
size_t blockSize = 1;
|
||||
size_t localThreads[] = { BLOCK_W, blockSize ,1};
|
||||
@ -177,22 +158,19 @@ static void postfilter_textureness(oclMat &left, int winSize,
|
||||
|
||||
size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
|
||||
|
||||
openCLVerifyKernel(clCxt, kernel, localThreads);
|
||||
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disparity.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_int), (void *)&disparity.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&disparity.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&disparity.step));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&left.data));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
|
||||
openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
|
||||
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL));
|
||||
|
||||
clFinish((cl_command_queue)clCxt->oclCommandQueue());
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disparity.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.rows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.cols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.step));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
|
||||
args.push_back(std::make_pair(sizeof(cl_int), (void *)&winSize));
|
||||
args.push_back(std::make_pair(sizeof(cl_float), (void *)&avergeTexThreshold));
|
||||
args.push_back(std::make_pair(local_mem_size, (void*)NULL));
|
||||
openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
|
||||
globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////operator/////////////////////////////////
|
||||
|
@ -1,120 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
|
||||
#define __OPENCV_TEST_INTERPOLATION_HPP__
|
||||
|
||||
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||
{
|
||||
if (border_type == cv::BORDER_CONSTANT)
|
||||
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
|
||||
|
||||
return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
|
||||
}
|
||||
|
||||
template <typename T> struct NearestInterpolator
|
||||
{
|
||||
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||
{
|
||||
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct LinearInterpolator
|
||||
{
|
||||
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||
{
|
||||
x -= 0.5f;
|
||||
y -= 0.5f;
|
||||
|
||||
int x1 = cvFloor(x);
|
||||
int y1 = cvFloor(y);
|
||||
int x2 = x1 + 1;
|
||||
int y2 = y1 + 1;
|
||||
|
||||
float res = 0;
|
||||
|
||||
res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
|
||||
res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
|
||||
res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
|
||||
res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
|
||||
|
||||
return cv::saturate_cast<T>(res);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct CubicInterpolator
|
||||
{
|
||||
static float getValue(float p[4], float x)
|
||||
{
|
||||
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
|
||||
}
|
||||
|
||||
static float getValue(float p[4][4], float x, float y)
|
||||
{
|
||||
float arr[4];
|
||||
|
||||
arr[0] = getValue(p[0], x);
|
||||
arr[1] = getValue(p[1], x);
|
||||
arr[2] = getValue(p[2], x);
|
||||
arr[3] = getValue(p[3], x);
|
||||
|
||||
return getValue(arr, y);
|
||||
}
|
||||
|
||||
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
|
||||
{
|
||||
int ix = cvRound(x);
|
||||
int iy = cvRound(y);
|
||||
|
||||
float vals[4][4] =
|
||||
{
|
||||
{readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
|
||||
{readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
|
||||
{readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)},
|
||||
{readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
|
||||
};
|
||||
|
||||
return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __OPENCV_TEST_INTERPOLATION_HPP__
|
@ -71,7 +71,6 @@
|
||||
#include "opencv2/ocl/ocl.hpp"
|
||||
|
||||
#include "utility.hpp"
|
||||
#include "interpolation.hpp"
|
||||
//#include "add_test_info.h"
|
||||
|
||||
#endif
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -21,6 +22,7 @@
|
||||
// Jiang Liyuan,jlyuan001.good@163.com
|
||||
// Rock Li, Rock.Li@amd.com
|
||||
// Zailong Wu, bullet@yeah.net
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -87,14 +89,13 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
||||
int maskx;
|
||||
int masky;
|
||||
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat mat2_roi;
|
||||
cv::Mat mask_roi;
|
||||
cv::Mat dst_roi;
|
||||
cv::Mat dst1_roi; //bak
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
cv::ocl::oclMat gdst1_whole; //bak
|
||||
@ -125,10 +126,6 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
||||
|
||||
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -175,14 +172,22 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
||||
gmask = mask_roi; //end
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.)
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), threshold);
|
||||
}
|
||||
|
||||
void Near1(double threshold = 0.)
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold);
|
||||
}
|
||||
|
||||
};
|
||||
////////////////////////////////lut/////////////////////////////////////////////////
|
||||
|
||||
struct Lut : ArithmTestBase {};
|
||||
#define VARNAME(A) string(#A);
|
||||
|
||||
|
||||
|
||||
TEST_P(Lut, Mat)
|
||||
{
|
||||
|
||||
@ -203,20 +208,12 @@ TEST_P(Lut, Mat)
|
||||
|
||||
cv::LUT(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::LUT(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download (cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////exp/////////////////////////////////////////////////
|
||||
|
||||
struct Exp : ArithmTestBase {};
|
||||
|
||||
TEST_P(Exp, Mat)
|
||||
@ -227,20 +224,12 @@ TEST_P(Exp, Mat)
|
||||
|
||||
cv::exp(mat1_roi, dst_roi);
|
||||
cv::ocl::exp(gmat1, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 2, s);
|
||||
|
||||
Near(2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////log/////////////////////////////////////////////////
|
||||
|
||||
struct Log : ArithmTestBase {};
|
||||
|
||||
TEST_P(Log, Mat)
|
||||
@ -249,24 +238,14 @@ TEST_P(Log, Mat)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
|
||||
cv::log(mat1_roi, dst_roi);
|
||||
cv::ocl::log(gmat1, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, s);
|
||||
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////add/////////////////////////////////////////////////
|
||||
|
||||
struct Add : ArithmTestBase {};
|
||||
|
||||
TEST_P(Add, Mat)
|
||||
@ -277,12 +256,7 @@ TEST_P(Add, Mat)
|
||||
|
||||
cv::add(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::add(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -294,14 +268,10 @@ TEST_P(Add, Mat_Mask)
|
||||
|
||||
cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
|
||||
cv::ocl::add(gmat1, gmat2, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Add, Scalar)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -310,12 +280,7 @@ TEST_P(Add, Scalar)
|
||||
|
||||
cv::add(mat1_roi, val, dst_roi);
|
||||
cv::ocl::add(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, s);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -327,12 +292,7 @@ TEST_P(Add, Scalar_Mask)
|
||||
|
||||
cv::add(mat1_roi, val, dst_roi, mask_roi);
|
||||
cv::ocl::add(gmat1, val, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, s);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -349,12 +309,7 @@ TEST_P(Sub, Mat)
|
||||
|
||||
cv::subtract(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::subtract(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -366,14 +321,10 @@ TEST_P(Sub, Mat_Mask)
|
||||
|
||||
cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
|
||||
cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Sub, Scalar)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -382,12 +333,7 @@ TEST_P(Sub, Scalar)
|
||||
|
||||
cv::subtract(mat1_roi, val, dst_roi);
|
||||
cv::ocl::subtract(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, s);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -399,12 +345,7 @@ TEST_P(Sub, Scalar_Mask)
|
||||
|
||||
cv::subtract(mat1_roi, val, dst_roi, mask_roi);
|
||||
cv::ocl::subtract(gmat1, val, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, s);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -421,12 +362,7 @@ TEST_P(Mul, Mat)
|
||||
|
||||
cv::multiply(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::multiply(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char s[1024];
|
||||
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, s);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -441,12 +377,7 @@ TEST_P(Mul, Mat_Scalar)
|
||||
|
||||
cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
|
||||
cv::ocl::multiply(gmat1, gmat2, gdst, s);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.001, sss);
|
||||
Near(.001);
|
||||
}
|
||||
}
|
||||
|
||||
@ -462,13 +393,7 @@ TEST_P(Div, Mat)
|
||||
|
||||
cv::divide(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::divide(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,13 +408,7 @@ TEST_P(Div, Mat_Scalar)
|
||||
|
||||
cv::divide(mat1_roi, mat2_roi, dst_roi, s);
|
||||
cv::ocl::divide(gmat1, gmat2, gdst, s);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.001, sss);
|
||||
Near(.001);
|
||||
}
|
||||
}
|
||||
|
||||
@ -504,13 +423,7 @@ TEST_P(Absdiff, Mat)
|
||||
|
||||
cv::absdiff(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::absdiff(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -522,13 +435,7 @@ TEST_P(Absdiff, Mat_Scalar)
|
||||
|
||||
cv::absdiff(mat1_roi, val, dst_roi);
|
||||
cv::ocl::absdiff(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -544,16 +451,8 @@ TEST_P(CartToPolar, angleInDegree)
|
||||
|
||||
cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
|
||||
cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
cv::Mat cpu_dst1;
|
||||
gdst1_whole.download(cpu_dst1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.5, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.5, sss);
|
||||
Near(.5);
|
||||
Near1(.5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -565,22 +464,12 @@ TEST_P(CartToPolar, angleInRadians)
|
||||
|
||||
cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
|
||||
cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
cv::Mat cpu_dst1;
|
||||
gdst1_whole.download(cpu_dst1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.5, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.5, sss);
|
||||
Near(.5);
|
||||
Near1(.5);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct PolarToCart : ArithmTestBase {};
|
||||
|
||||
TEST_P(PolarToCart, angleInDegree)
|
||||
@ -591,17 +480,8 @@ TEST_P(PolarToCart, angleInDegree)
|
||||
|
||||
cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
|
||||
cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
cv::Mat cpu_dst1;
|
||||
gdst1_whole.download(cpu_dst1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.5, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.5, sss);
|
||||
Near(.5);
|
||||
Near1(.5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -613,17 +493,8 @@ TEST_P(PolarToCart, angleInRadians)
|
||||
|
||||
cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
|
||||
cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
cv::Mat cpu_dst1;
|
||||
gdst1_whole.download(cpu_dst1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.5, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.5, sss);
|
||||
Near(.5);
|
||||
Near1(.5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -640,19 +511,11 @@ TEST_P(Magnitude, Mat)
|
||||
|
||||
cv::magnitude(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::magnitude(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct Transpose : ArithmTestBase {};
|
||||
|
||||
TEST_P(Transpose, Mat)
|
||||
@ -663,20 +526,11 @@ TEST_P(Transpose, Mat)
|
||||
|
||||
cv::transpose(mat1_roi, dst_roi);
|
||||
cv::ocl::transpose(gmat1, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
struct Flip : ArithmTestBase {};
|
||||
|
||||
TEST_P(Flip, X)
|
||||
@ -687,13 +541,7 @@ TEST_P(Flip, X)
|
||||
|
||||
cv::flip(mat1_roi, dst_roi, 0);
|
||||
cv::ocl::flip(gmat1, gdst, 0);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -705,13 +553,7 @@ TEST_P(Flip, Y)
|
||||
|
||||
cv::flip(mat1_roi, dst_roi, 1);
|
||||
cv::ocl::flip(gmat1, gdst, 1);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -723,18 +565,11 @@ TEST_P(Flip, BOTH)
|
||||
|
||||
cv::flip(mat1_roi, dst_roi, -1);
|
||||
cv::ocl::flip(gmat1, gdst, -1);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct MinMax : ArithmTestBase {};
|
||||
|
||||
TEST_P(MinMax, MAT)
|
||||
@ -765,12 +600,8 @@ TEST_P(MinMax, MAT)
|
||||
double minVal_, maxVal_;
|
||||
cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
|
||||
|
||||
//check results
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal);
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal);
|
||||
}
|
||||
}
|
||||
|
||||
@ -803,12 +634,8 @@ TEST_P(MinMax, MASK)
|
||||
double minVal_, maxVal_;
|
||||
cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask);
|
||||
|
||||
//check results
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal);
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal);
|
||||
}
|
||||
}
|
||||
|
||||
@ -919,17 +746,13 @@ TEST_P(MinMaxLoc, MAT)
|
||||
error1 = ::abs(mat1_roi.at<double>(maxLoc_) - mat1_roi.at<double>(maxLoc));
|
||||
}
|
||||
|
||||
//check results
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal);
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal);
|
||||
EXPECT_DOUBLE_EQ(minlocVal_, minlocVal);
|
||||
EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(minlocVal_, minlocVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal) << sss;
|
||||
|
||||
EXPECT_DOUBLE_EQ(error0, 0.0) << sss;
|
||||
EXPECT_DOUBLE_EQ(error1, 0.0) << sss;
|
||||
EXPECT_DOUBLE_EQ(error0, 0.0);
|
||||
EXPECT_DOUBLE_EQ(error1, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1040,17 +863,13 @@ TEST_P(MinMaxLoc, MASK)
|
||||
error1 = ::abs(mat1_roi.at<double>(maxLoc_) - mat1_roi.at<double>(maxLoc));
|
||||
}
|
||||
|
||||
//check results
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal);
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal);
|
||||
EXPECT_DOUBLE_EQ(minlocVal_, minlocVal);
|
||||
EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_, minVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxVal_, maxVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(minlocVal_, minlocVal) << sss;
|
||||
EXPECT_DOUBLE_EQ(maxlocVal_, maxlocVal) << sss;
|
||||
|
||||
EXPECT_DOUBLE_EQ(error0, 0.0) << sss;
|
||||
EXPECT_DOUBLE_EQ(error1, 0.0) << sss;
|
||||
EXPECT_DOUBLE_EQ(error0, 0.0);
|
||||
EXPECT_DOUBLE_EQ(error1, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1064,14 +883,12 @@ TEST_P(Sum, MAT)
|
||||
random_roi();
|
||||
Scalar cpures = cv::sum(mat1_roi);
|
||||
Scalar gpures = cv::ocl::sum(gmat1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
//check results
|
||||
EXPECT_NEAR(cpures[0], gpures[0], 0.1) << sss;
|
||||
EXPECT_NEAR(cpures[1], gpures[1], 0.1) << sss;
|
||||
EXPECT_NEAR(cpures[2], gpures[2], 0.1) << sss;
|
||||
EXPECT_NEAR(cpures[3], gpures[3], 0.1) << sss;
|
||||
EXPECT_NEAR(cpures[0], gpures[0], 0.1);
|
||||
EXPECT_NEAR(cpures[1], gpures[1], 0.1);
|
||||
EXPECT_NEAR(cpures[2], gpures[2], 0.1);
|
||||
EXPECT_NEAR(cpures[3], gpures[3], 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1086,11 +903,7 @@ TEST_P(CountNonZero, MAT)
|
||||
int cpures = cv::countNonZero(mat1_roi);
|
||||
int gpures = cv::ocl::countNonZero(gmat1);
|
||||
|
||||
//check results
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_DOUBLE_EQ((double)cpures, (double)gpures) << sss;
|
||||
EXPECT_DOUBLE_EQ((double)cpures, (double)gpures);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1112,13 +925,7 @@ TEST_P(Phase, Mat)
|
||||
random_roi();
|
||||
cv::phase(mat1_roi, mat2_roi, dst_roi, angelInDegrees ? true : false);
|
||||
cv::ocl::phase(gmat1, gmat2, gdst, angelInDegrees ? true : false);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-2, sss);
|
||||
Near(1e-2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1135,13 +942,7 @@ TEST_P(Bitwise_and, Mat)
|
||||
|
||||
cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::bitwise_and(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1153,15 +954,10 @@ TEST_P(Bitwise_and, Mat_Mask)
|
||||
|
||||
cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Bitwise_and, Scalar)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -1170,14 +966,7 @@ TEST_P(Bitwise_and, Scalar)
|
||||
|
||||
cv::bitwise_and(mat1_roi, val, dst_roi);
|
||||
cv::ocl::bitwise_and(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1189,14 +978,7 @@ TEST_P(Bitwise_and, Scalar_Mask)
|
||||
|
||||
cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char *sss = new char[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
delete[] sss;
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1214,13 +996,7 @@ TEST_P(Bitwise_or, Mat)
|
||||
|
||||
cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::bitwise_or(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1232,15 +1008,10 @@ TEST_P(Bitwise_or, Mat_Mask)
|
||||
|
||||
cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Bitwise_or, Scalar)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -1249,13 +1020,7 @@ TEST_P(Bitwise_or, Scalar)
|
||||
|
||||
cv::bitwise_or(mat1_roi, val, dst_roi);
|
||||
cv::ocl::bitwise_or(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1267,13 +1032,7 @@ TEST_P(Bitwise_or, Scalar_Mask)
|
||||
|
||||
cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1291,13 +1050,7 @@ TEST_P(Bitwise_xor, Mat)
|
||||
|
||||
cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
|
||||
cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1309,15 +1062,10 @@ TEST_P(Bitwise_xor, Mat_Mask)
|
||||
|
||||
cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Bitwise_xor, Scalar)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
@ -1326,13 +1074,7 @@ TEST_P(Bitwise_xor, Scalar)
|
||||
|
||||
cv::bitwise_xor(mat1_roi, val, dst_roi);
|
||||
cv::ocl::bitwise_xor(gmat1, val, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1344,13 +1086,7 @@ TEST_P(Bitwise_xor, Scalar_Mask)
|
||||
|
||||
cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
|
||||
cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1367,13 +1103,7 @@ TEST_P(Bitwise_not, Mat)
|
||||
|
||||
cv::bitwise_not(mat1_roi, dst_roi);
|
||||
cv::ocl::bitwise_not(gmat1, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1390,7 +1120,7 @@ TEST_P(Compare, Mat)
|
||||
}
|
||||
|
||||
int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
|
||||
const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
|
||||
//const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
|
||||
int cmp_num = sizeof(cmp_codes) / sizeof(int);
|
||||
|
||||
for (int i = 0; i < cmp_num; ++i)
|
||||
@ -1402,13 +1132,7 @@ TEST_P(Compare, Mat)
|
||||
|
||||
cv::compare(mat1_roi, mat2_roi, dst_roi, cmp_codes[i]);
|
||||
cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "cmptype=%s, roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", cmp_str[i], roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
Near(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1430,14 +1154,7 @@ TEST_P(Pow, Mat)
|
||||
double p = 4.5;
|
||||
cv::pow(mat1_roi, p, dst_roi);
|
||||
cv::ocl::pow(gmat1, p, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1448,36 +1165,18 @@ TEST_P(MagnitudeSqr, Mat)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
// random_roi();
|
||||
// int64 start, end;
|
||||
// start = cv::getTickCount();
|
||||
random_roi();
|
||||
for(int i = 0; i < mat1.rows; ++i)
|
||||
for(int j = 0; j < mat1.cols; ++j)
|
||||
{
|
||||
float val1 = mat1.at<float>(i, j);
|
||||
float val2 = mat2.at<float>(i, j);
|
||||
|
||||
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
|
||||
|
||||
// float val1 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2];
|
||||
//
|
||||
// float val2 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2+ 1 ];
|
||||
|
||||
// ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2;
|
||||
}
|
||||
// end = cv::getTickCount();
|
||||
|
||||
|
||||
|
||||
cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst;
|
||||
cv::ocl::magnitudeSqr(clmat1, clmat2, cldst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
cldst.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
cv::ocl::oclMat clmat1(mat1), clmat2(mat2);
|
||||
cv::ocl::magnitudeSqr(clmat1, clmat2, gdst);
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1498,21 +1197,13 @@ TEST_P(AddWeighted, Mat)
|
||||
|
||||
cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst);
|
||||
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//********test****************
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
|
||||
|
@ -1,3 +1,47 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
#include <iomanip>
|
||||
|
||||
@ -33,20 +77,14 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
|
||||
|
||||
PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/)
|
||||
{
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
cv::Size size;
|
||||
int type;
|
||||
bool useRoi;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
//devInfo = GET_PARAM(0);
|
||||
size = GET_PARAM(0);
|
||||
type = GET_PARAM(1);
|
||||
/*useRoi = GET_PARAM(3);*/
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -59,12 +97,9 @@ TEST_P(Blend, Accuracy)
|
||||
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
|
||||
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
|
||||
|
||||
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
|
||||
cv::ocl::oclMat dst(size, type);
|
||||
gimg1.upload(img1);
|
||||
gimg2.upload(img2);
|
||||
gweights1.upload(weights1);
|
||||
gweights2.upload(weights2);
|
||||
cv::ocl::oclMat gimg1(img1), gimg2(img2), gweights1(weights1), gweights2(weights2);
|
||||
cv::ocl::oclMat dst;
|
||||
|
||||
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
|
||||
cv::Mat result;
|
||||
cv::Mat result_gold;
|
||||
@ -74,10 +109,10 @@ TEST_P(Blend, Accuracy)
|
||||
else
|
||||
blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
|
||||
|
||||
EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f, 0);
|
||||
EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
|
||||
DIFFERENT_SIZES,
|
||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
|
||||
));
|
||||
|
@ -7,12 +7,16 @@
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
@ -21,12 +25,12 @@
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
|
@ -59,7 +59,7 @@ PARAM_TEST_CASE(StereoMatchBM, int, int)
|
||||
virtual void SetUp()
|
||||
{
|
||||
n_disp = GET_PARAM(0);
|
||||
winSize = GET_PARAM(1);
|
||||
winSize = GET_PARAM(1);
|
||||
}
|
||||
};
|
||||
|
||||
@ -69,27 +69,27 @@ TEST_P(StereoMatchBM, Regression)
|
||||
Mat left_image = readImage("stereobm/aloe-L.png", IMREAD_GRAYSCALE);
|
||||
Mat right_image = readImage("stereobm/aloe-R.png", IMREAD_GRAYSCALE);
|
||||
Mat disp_gold = readImage("stereobm/aloe-disp.png", IMREAD_GRAYSCALE);
|
||||
ocl::oclMat d_left, d_right;
|
||||
ocl::oclMat d_disp(left_image.size(), CV_8U);
|
||||
Mat disp;
|
||||
ocl::oclMat d_left, d_right;
|
||||
ocl::oclMat d_disp(left_image.size(), CV_8U);
|
||||
Mat disp;
|
||||
|
||||
ASSERT_FALSE(left_image.empty());
|
||||
ASSERT_FALSE(right_image.empty());
|
||||
ASSERT_FALSE(disp_gold.empty());
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
|
||||
ocl::StereoBM_OCL bm(0, n_disp, winSize);
|
||||
|
||||
|
||||
bm(d_left, d_right, d_disp);
|
||||
d_disp.download(disp);
|
||||
d_disp.download(disp);
|
||||
|
||||
EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128),
|
||||
testing::Values(19)));
|
||||
testing::Values(19)));
|
||||
|
||||
PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float)
|
||||
{
|
||||
@ -129,9 +129,69 @@ TEST_P(StereoMatchBP, Regression)
|
||||
bp(d_left, d_right, d_disp);
|
||||
d_disp.download(disp);
|
||||
disp.convertTo(disp, disp_gold.depth());
|
||||
EXPECT_MAT_NEAR(disp_gold, disp, 0.0, "");
|
||||
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBP, testing::Combine(testing::Values(64),
|
||||
testing::Values(8),testing::Values(2),testing::Values(25.0f),
|
||||
testing::Values(0.1f),testing::Values(15.0f),testing::Values(1.0f)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// ConstSpaceBeliefPropagation
|
||||
PARAM_TEST_CASE(StereoMatchConstSpaceBP, int, int, int, int, float, float, float, float, int, int)
|
||||
{
|
||||
int ndisp_;
|
||||
int iters_;
|
||||
int levels_;
|
||||
int nr_plane_;
|
||||
float max_data_term_;
|
||||
float data_weight_;
|
||||
float max_disc_term_;
|
||||
float disc_single_jump_;
|
||||
int min_disp_th_;
|
||||
int msg_type_;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
ndisp_ = GET_PARAM(0);
|
||||
iters_ = GET_PARAM(1);
|
||||
levels_ = GET_PARAM(2);
|
||||
nr_plane_ = GET_PARAM(3);
|
||||
max_data_term_ = GET_PARAM(4);
|
||||
data_weight_ = GET_PARAM(5);
|
||||
max_disc_term_ = GET_PARAM(6);
|
||||
disc_single_jump_ = GET_PARAM(7);
|
||||
min_disp_th_ = GET_PARAM(8);
|
||||
msg_type_ = GET_PARAM(9);
|
||||
}
|
||||
};
|
||||
TEST_P(StereoMatchConstSpaceBP, Regression)
|
||||
{
|
||||
Mat left_image = readImage("csstereobp/aloe-L.png");
|
||||
Mat right_image = readImage("csstereobp/aloe-R.png");
|
||||
Mat disp_gold = readImage("csstereobp/aloe-disp.png", IMREAD_GRAYSCALE);
|
||||
|
||||
ocl::oclMat d_left, d_right;
|
||||
ocl::oclMat d_disp;
|
||||
|
||||
Mat disp;
|
||||
ASSERT_FALSE(left_image.empty());
|
||||
ASSERT_FALSE(right_image.empty());
|
||||
ASSERT_FALSE(disp_gold.empty());
|
||||
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
|
||||
ocl::StereoConstantSpaceBP bp(ndisp_, iters_, levels_, nr_plane_, max_data_term_, data_weight_,
|
||||
max_disc_term_, disc_single_jump_, 0, CV_32F);
|
||||
bp(d_left, d_right, d_disp);
|
||||
d_disp.download(disp);
|
||||
disp.convertTo(disp, disp_gold.depth());
|
||||
|
||||
EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-4);
|
||||
//EXPECT_MAT_NEAR(disp_gold, disp, 1.0, "");
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchConstSpaceBP, testing::Combine(testing::Values(128),
|
||||
testing::Values(16),testing::Values(4), testing::Values(4), testing::Values(30.0f),
|
||||
testing::Values(1.0f),testing::Values(160.0f),
|
||||
testing::Values(10.0f), testing::Values(0), testing::Values(CV_32F)));
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -100,7 +100,7 @@ PARAM_TEST_CASE(CvtColor, cv::Size, MatDepth)
|
||||
cv::cvtColor(src, dst_gold, CVTCODE(name));\
|
||||
cv::Mat dst_mat;\
|
||||
dst.download(dst_mat);\
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, "");\
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5);\
|
||||
}
|
||||
|
||||
//add new ones here using macro
|
||||
@ -141,7 +141,7 @@ TEST_P(CvtColor_Gray2RGB, Accuracy)
|
||||
cv::cvtColor(src, dst_gold, code);
|
||||
cv::Mat dst_mat;
|
||||
dst.download(dst_mat);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, "");
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5);
|
||||
}
|
||||
|
||||
|
||||
@ -171,7 +171,7 @@ TEST_P(CvtColor_YUV420, Accuracy)
|
||||
cv::Mat dst_mat;
|
||||
dst.download(dst_mat);
|
||||
MAT_DIFF(dst_mat, dst_gold);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, "");
|
||||
EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor, testing::Combine(
|
||||
|
@ -47,27 +47,16 @@
|
||||
#include "precomp.hpp"
|
||||
#include <iomanip>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/// ColumnSum
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// ColumnSum
|
||||
|
||||
PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
|
||||
PARAM_TEST_CASE(ColumnSum, cv::Size)
|
||||
{
|
||||
cv::Size size;
|
||||
cv::Mat src;
|
||||
bool useRoi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
size = GET_PARAM(0);
|
||||
useRoi = GET_PARAM(1);
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -99,8 +88,7 @@ TEST_P(ColumnSum, Accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
|
||||
DIFFERENT_SIZES, testing::Values(Inverse(false), Inverse(true))));
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -68,7 +68,7 @@ TEST_P(Dft, C2C)
|
||||
|
||||
cv::dft(a, b_gold, dft_flags);
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
|
||||
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4);
|
||||
}
|
||||
|
||||
TEST_P(Dft, R2C)
|
||||
@ -81,11 +81,11 @@ TEST_P(Dft, R2C)
|
||||
cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags);
|
||||
|
||||
b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows));
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4);
|
||||
|
||||
cv::Mat c_gold;
|
||||
cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4);
|
||||
}
|
||||
|
||||
TEST_P(Dft, R2CthenC2R)
|
||||
@ -95,7 +95,7 @@ TEST_P(Dft, R2CthenC2R)
|
||||
cv::ocl::oclMat d_b, d_c;
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
|
||||
cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
|
||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4);
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -19,6 +20,7 @@
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Zero Lin, Zero.Lin@amd.com
|
||||
// Zhang Ying, zhangying913@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -55,121 +57,13 @@ using namespace testing;
|
||||
using namespace std;
|
||||
|
||||
|
||||
PARAM_TEST_CASE(FilterTestBase, MatType, bool)
|
||||
PARAM_TEST_CASE(FilterTestBase,
|
||||
MatType,
|
||||
cv::Size, // kernel size
|
||||
cv::Size, // dx,dy
|
||||
int // border type, or iteration
|
||||
)
|
||||
{
|
||||
int type;
|
||||
cv::Scalar val;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat mat2;
|
||||
cv::Mat mask;
|
||||
cv::Mat dst;
|
||||
cv::Mat dst1; //bak, for two outputs
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int src1x;
|
||||
int src1y;
|
||||
int src2x;
|
||||
int src2y;
|
||||
int dstx;
|
||||
int dsty;
|
||||
int maskx;
|
||||
int masky;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat mat2_roi;
|
||||
cv::Mat mask_roi;
|
||||
cv::Mat dst_roi;
|
||||
cv::Mat dst1_roi; //bak
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
cv::ocl::oclMat gdst1_whole; //bak
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gmat2;
|
||||
cv::ocl::oclMat gdst;
|
||||
cv::ocl::oclMat gdst1; //bak
|
||||
cv::ocl::oclMat gmask;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
mat2 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
dst1 = randomMat(rng, size, type, 5, 16, false);
|
||||
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
|
||||
|
||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
||||
|
||||
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(1, mat1.cols);
|
||||
roirows = rng.uniform(1, mat1.rows);
|
||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||
src2x = rng.uniform(0, mat2.cols - roicols);
|
||||
src2y = rng.uniform(0, mat2.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
maskx = rng.uniform(0, mask.cols - roicols);
|
||||
masky = rng.uniform(0, mask.rows - roirows);
|
||||
#else
|
||||
roicols = mat1.cols;
|
||||
roirows = mat1.rows;
|
||||
src1x = 0;
|
||||
src1y = 0;
|
||||
src2x = 0;
|
||||
src2y = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
maskx = 0;
|
||||
masky = 0;
|
||||
#endif
|
||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
|
||||
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst1_whole = dst1;
|
||||
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
gmat2 = mat2_roi;
|
||||
gmask = mask_roi;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// blur
|
||||
|
||||
PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
|
||||
{
|
||||
int type;
|
||||
cv::Size ksize;
|
||||
int bordertype;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat dst;
|
||||
@ -185,7 +79,7 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -193,23 +87,6 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
bordertype = GET_PARAM(2);
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
@ -236,10 +113,37 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
}
|
||||
|
||||
void Init(int mat_type)
|
||||
{
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
mat1 = randomMat(size, mat_type, 5, 16);
|
||||
dst = randomMat(size, mat_type, 5, 16);
|
||||
}
|
||||
|
||||
void Near(double threshold)
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), threshold);
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// blur
|
||||
struct Blur : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
cv::Size ksize;
|
||||
int bordertype;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
bordertype = GET_PARAM(3);
|
||||
Init(type);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Blur, Mat)
|
||||
@ -247,116 +151,36 @@ TEST_P(Blur, Mat)
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::blur(mat1_roi, dst_roi, ksize, Point(-1, -1), bordertype);
|
||||
cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
Near(1.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Laplacian
|
||||
|
||||
PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
|
||||
struct Laplacian : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
int ksize;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat;
|
||||
cv::Mat dst;
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int srcx;
|
||||
int srcy;
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat;
|
||||
cv::ocl::oclMat gdst;
|
||||
cv::Size ksize;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(2, mat.cols);
|
||||
roirows = rng.uniform(2, mat.rows);
|
||||
srcx = rng.uniform(0, mat.cols - roicols);
|
||||
srcy = rng.uniform(0, mat.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
#else
|
||||
roicols = mat.cols;
|
||||
roirows = mat.rows;
|
||||
srcx = 0;
|
||||
srcy = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
#endif
|
||||
|
||||
mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gmat = mat_roi;
|
||||
Init(type);
|
||||
}
|
||||
};
|
||||
|
||||
struct Laplacian : LaplacianTestBase {};
|
||||
|
||||
TEST_P(Laplacian, Accuracy)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
|
||||
cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, srcx, srcy, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
cv::Laplacian(mat1_roi, dst_roi, -1, ksize.width, 1);
|
||||
cv::ocl::Laplacian(gmat1, gdst, -1, ksize.width, 1);
|
||||
Near(1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -364,8 +188,7 @@ TEST_P(Laplacian, Accuracy)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// erode & dilate
|
||||
|
||||
PARAM_TEST_CASE(ErodeDilateBase, MatType, int)
|
||||
struct ErodeDilate : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
int iterations;
|
||||
@ -373,210 +196,54 @@ PARAM_TEST_CASE(ErodeDilateBase, MatType, int)
|
||||
//erode or dilate kernel
|
||||
cv::Mat kernel;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat dst;
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int src1x;
|
||||
int src1y;
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
iterations = GET_PARAM(1);
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
iterations = GET_PARAM(3);
|
||||
Init(type);
|
||||
// rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
|
||||
kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false);
|
||||
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(2, mat1.cols);
|
||||
roirows = rng.uniform(2, mat1.rows);
|
||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
#else
|
||||
roicols = mat1.cols;
|
||||
roirows = mat1.rows;
|
||||
src1x = 0;
|
||||
src1y = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
#endif
|
||||
|
||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
kernel = randomMat(Size(3, 3), CV_8UC1, 0, 3);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// erode
|
||||
|
||||
struct Erode : ErodeDilateBase {};
|
||||
|
||||
TEST_P(Erode, Mat)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::erode(mat1_roi, dst_roi, kernel, Point(-1, -1), iterations);
|
||||
cv::ocl::erode(gmat1, gdst, kernel, Point(-1, -1), iterations);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// dilate
|
||||
|
||||
struct Dilate : ErodeDilateBase {};
|
||||
|
||||
TEST_P(Dilate, Mat)
|
||||
TEST_P(ErodeDilate, Mat)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
cv::erode(mat1_roi, dst_roi, kernel, Point(-1, -1), iterations);
|
||||
cv::ocl::erode(gmat1, gdst, kernel, Point(-1, -1), iterations);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5, sss);
|
||||
Near(1e-5);
|
||||
}
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
cv::dilate(mat1_roi, dst_roi, kernel, Point(-1, -1), iterations);
|
||||
cv::ocl::dilate(gmat1, gdst, kernel, Point(-1, -1), iterations);
|
||||
Near(1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Sobel
|
||||
|
||||
PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
|
||||
struct Sobel : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
int dx, dy, ksize, bordertype;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat dst;
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int src1x;
|
||||
int src1y;
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
dx = GET_PARAM(1);
|
||||
dy = GET_PARAM(2);
|
||||
ksize = GET_PARAM(3);
|
||||
bordertype = GET_PARAM(4);
|
||||
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
Size s = GET_PARAM(1);
|
||||
ksize = s.width;
|
||||
s = GET_PARAM(2);
|
||||
dx = s.width;
|
||||
dy = s.height;
|
||||
bordertype = GET_PARAM(3);
|
||||
Init(type);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(2, mat1.cols);
|
||||
roirows = rng.uniform(2, mat1.rows);
|
||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
#else
|
||||
roicols = mat1.cols;
|
||||
roirows = mat1.rows;
|
||||
src1x = 0;
|
||||
src1y = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
#endif
|
||||
|
||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_P(Sobel, Mat)
|
||||
@ -584,103 +251,29 @@ TEST_P(Sobel, Mat)
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
|
||||
cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
Near(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Scharr
|
||||
|
||||
PARAM_TEST_CASE(Scharr, MatType, int, int, int)
|
||||
struct Scharr : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
int dx, dy, bordertype;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat dst;
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int src1x;
|
||||
int src1y;
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
dx = GET_PARAM(1);
|
||||
dy = GET_PARAM(2);
|
||||
Size s = GET_PARAM(2);
|
||||
dx = s.width;
|
||||
dy = s.height;
|
||||
bordertype = GET_PARAM(3);
|
||||
dx = 1;
|
||||
dy = 0;
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
Init(type);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(2, mat1.cols);
|
||||
roirows = rng.uniform(2, mat1.rows);
|
||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
#else
|
||||
roicols = mat1.cols;
|
||||
roirows = mat1.rows;
|
||||
src1x = 0;
|
||||
src1y = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
#endif
|
||||
|
||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_P(Scharr, Mat)
|
||||
@ -688,16 +281,9 @@ TEST_P(Scharr, Mat)
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
|
||||
cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
Near(1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -705,89 +291,23 @@ TEST_P(Scharr, Mat)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// GaussianBlur
|
||||
|
||||
PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
|
||||
struct GaussianBlur : FilterTestBase
|
||||
{
|
||||
int type;
|
||||
cv::Size ksize;
|
||||
int bordertype;
|
||||
|
||||
double sigma1, sigma2;
|
||||
|
||||
//src mat
|
||||
cv::Mat mat1;
|
||||
cv::Mat dst;
|
||||
|
||||
// set up roi
|
||||
int roicols;
|
||||
int roirows;
|
||||
int src1x;
|
||||
int src1y;
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gmat1;
|
||||
cv::ocl::oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
bordertype = GET_PARAM(2);
|
||||
|
||||
bordertype = GET_PARAM(3);
|
||||
Init(type);
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
|
||||
sigma1 = rng.uniform(0.1, 1.0);
|
||||
sigma2 = rng.uniform(0.1, 1.0);
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
#ifdef RANDOMROI
|
||||
//randomize ROI
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
roicols = rng.uniform(2, mat1.cols);
|
||||
roirows = rng.uniform(2, mat1.rows);
|
||||
src1x = rng.uniform(0, mat1.cols - roicols);
|
||||
src1y = rng.uniform(0, mat1.rows - roirows);
|
||||
dstx = rng.uniform(0, dst.cols - roicols);
|
||||
dsty = rng.uniform(0, dst.rows - roirows);
|
||||
#else
|
||||
roicols = mat1.cols;
|
||||
roirows = mat1.rows;
|
||||
src1x = 0;
|
||||
src1y = 0;
|
||||
dstx = 0;
|
||||
dsty = 0;
|
||||
#endif
|
||||
|
||||
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
|
||||
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
gdst_whole = dst;
|
||||
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
|
||||
|
||||
|
||||
gmat1 = mat1_roi;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_P(GaussianBlur, Mat)
|
||||
@ -795,53 +315,53 @@ TEST_P(GaussianBlur, Mat)
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
|
||||
cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
Near(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values(1, 3)));
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values(Size(3, 3)),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(0))); //not use
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1)));
|
||||
|
||||
//INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1)));
|
||||
|
||||
//INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||
INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(1)));
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
|
||||
(MatType)cv::BORDER_REPLICATE)));
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values(Size(3, 3), Size(5, 5)),
|
||||
Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)),
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(Size(0, 1), Size(1, 0)),
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(cv::Size(3, 3), cv::Size(5, 5)),
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(Size(3, 3), Size(5, 5)),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
|
||||
|
||||
|
||||
|
@ -53,13 +53,12 @@ PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||
int type;
|
||||
cv::Size mat_size;
|
||||
int flags;
|
||||
//vector<cv::ocl::Info> info;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
mat_size = GET_PARAM(1);
|
||||
flags = GET_PARAM(2);
|
||||
//cv::ocl::getDevice(info);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -12,10 +12,12 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Sen Liu, swjutls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -61,40 +63,31 @@ struct getRect
|
||||
}
|
||||
};
|
||||
|
||||
PARAM_TEST_CASE(HaarTestBase, int, int)
|
||||
PARAM_TEST_CASE(Haar, double, int)
|
||||
{
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
||||
cv::ocl::OclCascadeClassifierBuf cascadebuf;
|
||||
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
||||
// Mat img;
|
||||
|
||||
double scale;
|
||||
int index;
|
||||
int flags;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
scale = 1.0;
|
||||
index = 0;
|
||||
scale = GET_PARAM(0);
|
||||
flags = GET_PARAM(1);
|
||||
string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||
|
||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
|
||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName )))
|
||||
{
|
||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||
return;
|
||||
}
|
||||
//int devnums = getDevice(oclinfo);
|
||||
//CV_Assert(devnums>0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
//cv::ocl::setBinpath("E:\\");
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////faceDetect/////////////////////////////////////////////////
|
||||
|
||||
struct Haar : HaarTestBase {};
|
||||
|
||||
TEST_F(Haar, FaceDetect)
|
||||
TEST_P(Haar, FaceDetect)
|
||||
{
|
||||
string imgName = workdir + "lena.jpg";
|
||||
Mat img = imread( imgName, 1 );
|
||||
@ -105,59 +98,65 @@ TEST_F(Haar, FaceDetect)
|
||||
return ;
|
||||
}
|
||||
|
||||
//int i = 0;
|
||||
//double t = 0;
|
||||
vector<Rect> faces, oclfaces;
|
||||
|
||||
// const static Scalar colors[] = { CV_RGB(0, 0, 255),
|
||||
// CV_RGB(0, 128, 255),
|
||||
// CV_RGB(0, 255, 255),
|
||||
// CV_RGB(0, 255, 0),
|
||||
// CV_RGB(255, 128, 0),
|
||||
// CV_RGB(255, 255, 0),
|
||||
// CV_RGB(255, 0, 0),
|
||||
// CV_RGB(255, 0, 255)
|
||||
// } ;
|
||||
|
||||
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
|
||||
MemStorage storage(cvCreateMemStorage(0));
|
||||
cvtColor( img, gray, CV_BGR2GRAY );
|
||||
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||
equalizeHist( smallImg, smallImg );
|
||||
|
||||
|
||||
cv::ocl::oclMat image;
|
||||
CvSeq *_objects;
|
||||
image.upload(smallImg);
|
||||
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|
||||
3, 0
|
||||
| CV_HAAR_SCALE_IMAGE
|
||||
, Size(30, 30), Size(0, 0) );
|
||||
3, flags, Size(30, 30), Size(0, 0) );
|
||||
vector<CvAvgComp> vecAvgComp;
|
||||
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
||||
oclfaces.resize(vecAvgComp.size());
|
||||
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
||||
|
||||
cpucascade.detectMultiScale( smallImg, faces, 1.1,
|
||||
3, 0
|
||||
| CV_HAAR_SCALE_IMAGE
|
||||
, Size(30, 30), Size(0, 0) );
|
||||
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
EXPECT_EQ(faces.size(), oclfaces.size());
|
||||
/* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||
{
|
||||
Mat smallImgROI;
|
||||
Point center;
|
||||
Scalar color = colors[i%8];
|
||||
int radius;
|
||||
center.x = cvRound((r->x + r->width*0.5)*scale);
|
||||
center.y = cvRound((r->y + r->height*0.5)*scale);
|
||||
radius = cvRound((r->width + r->height)*0.25*scale);
|
||||
circle( img, center, radius, color, 3, 8, 0 );
|
||||
} */
|
||||
//namedWindow("result");
|
||||
//imshow("result",img);
|
||||
//waitKey(0);
|
||||
//destroyAllWindows();
|
||||
|
||||
}
|
||||
|
||||
TEST_P(Haar, FaceDetectUseBuf)
|
||||
{
|
||||
string imgName = workdir + "lena.jpg";
|
||||
Mat img = imread( imgName, 1 );
|
||||
|
||||
if(img.empty())
|
||||
{
|
||||
std::cout << "Couldn't read " << imgName << std::endl;
|
||||
return ;
|
||||
}
|
||||
|
||||
vector<Rect> faces, oclfaces;
|
||||
|
||||
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
|
||||
MemStorage storage(cvCreateMemStorage(0));
|
||||
cvtColor( img, gray, CV_BGR2GRAY );
|
||||
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||
equalizeHist( smallImg, smallImg );
|
||||
|
||||
cv::ocl::oclMat image;
|
||||
image.upload(smallImg);
|
||||
|
||||
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
cascadebuf.release();
|
||||
|
||||
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
EXPECT_EQ(faces.size(), oclfaces.size());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
|
||||
Combine(Values(1.0),
|
||||
Values(CV_HAAR_SCALE_IMAGE, 0)));
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -240,12 +240,11 @@ TEST_P(HOG, Detect)
|
||||
}
|
||||
}
|
||||
|
||||
char s[100] = {0};
|
||||
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3, s);
|
||||
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
|
||||
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
|
||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -327,7 +328,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
|
||||
cv::Mat mask_roi;
|
||||
cv::Mat dst_roi;
|
||||
cv::Mat dst1_roi; //bak
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl mat
|
||||
cv::ocl::oclMat clmat1;
|
||||
cv::ocl::oclMat clmat2;
|
||||
@ -352,10 +353,6 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size size(MWIDTH, MHEIGHT);
|
||||
double min = 1, max = 20;
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
|
||||
if(type1 != nulltype)
|
||||
{
|
||||
@ -445,6 +442,13 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
|
||||
clmask_roi = clmask(Rect(maskx, masky, roicols, roirows));
|
||||
}
|
||||
}
|
||||
|
||||
void Near(double threshold)
|
||||
{
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
|
||||
}
|
||||
};
|
||||
////////////////////////////////equalizeHist//////////////////////////////////////////
|
||||
|
||||
@ -464,11 +468,7 @@ TEST_P(equalizeHist, Mat)
|
||||
random_roi();
|
||||
cv::equalizeHist(mat1_roi, dst_roi);
|
||||
cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 1.1, sss);
|
||||
Near(1.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -488,7 +488,7 @@ TEST_P(bilateralFilter, Mat)
|
||||
int d = 2 * radius + 1;
|
||||
double sigmaspace = 20.0;
|
||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
|
||||
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||
//const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||
|
||||
if (mat1.depth() != CV_8U || mat1.type() != dst.type())
|
||||
{
|
||||
@ -517,25 +517,7 @@ TEST_P(bilateralFilter, Mat)
|
||||
|
||||
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
|
||||
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
|
||||
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]);
|
||||
//for(int i=0;i<dst.rows;i++)
|
||||
//{
|
||||
// for(int j=0;j<dst.cols*dst.channels();j++)
|
||||
// {
|
||||
// if(dst.at<uchar>(i,j)!=cpu_cldst.at<uchar>(i,j))
|
||||
// cout<< i <<" "<< j <<" "<< (int)dst.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
|
||||
// }
|
||||
// cout<<endl;
|
||||
//}
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 1.0, sss);
|
||||
|
||||
Near(1.);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -549,7 +531,7 @@ struct CopyMakeBorder : ImgprocTestBase {};
|
||||
TEST_P(CopyMakeBorder, Mat)
|
||||
{
|
||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
|
||||
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||
//const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
int top = rng.uniform(0, 10);
|
||||
int bottom = rng.uniform(0, 10);
|
||||
@ -587,24 +569,12 @@ TEST_P(CopyMakeBorder, Mat)
|
||||
cv::Mat cpu_cldst;
|
||||
#ifndef RANDOMROI
|
||||
cldst_roi.download(cpu_cldst);
|
||||
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0);
|
||||
#else
|
||||
cldst.download(cpu_cldst);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0);
|
||||
#endif
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right, borderstr[i]);
|
||||
#ifndef RANDOMROI
|
||||
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
|
||||
#else
|
||||
//for(int i=0;i<dst.rows;i++)
|
||||
//{
|
||||
//for(int j=0;j<dst.cols;j++)
|
||||
//{
|
||||
// cout<< (int)dst.at<uchar>(i,j)<<" ";
|
||||
//}
|
||||
//cout<<endl;
|
||||
//}
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -627,14 +597,7 @@ TEST_P(cornerMinEigenVal, Mat)
|
||||
int borderType = cv::BORDER_REFLECT;
|
||||
cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType);
|
||||
cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
|
||||
|
||||
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, maskx, masky, src2x, src2y);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 1, sss);
|
||||
Near(1.);
|
||||
}
|
||||
}
|
||||
|
||||
@ -657,13 +620,7 @@ TEST_P(cornerHarris, Mat)
|
||||
int borderType = cv::BORDER_REFLECT;
|
||||
cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, k, borderType);
|
||||
cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, k, borderType);
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 1, sss);
|
||||
Near(1.);
|
||||
}
|
||||
}
|
||||
|
||||
@ -680,15 +637,11 @@ TEST_P(integral, Mat)
|
||||
|
||||
cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
|
||||
cv::integral(mat1_roi, dst_roi, dst1_roi);
|
||||
Near(0);
|
||||
|
||||
cv::Mat cpu_cldst, cpu_cldst1;
|
||||
cldst.download(cpu_cldst);
|
||||
cv::Mat cpu_cldst1;
|
||||
cldst1.download(cpu_cldst1);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, maskx, masky, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_cldst1, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst1, cpu_cldst1, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -720,7 +673,7 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int)
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -740,10 +693,6 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int)
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -805,10 +754,7 @@ TEST_P(WarpAffine, Mat)
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -837,10 +783,7 @@ TEST_P(WarpPerspective, Mat)
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -905,9 +848,6 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
|
||||
interpolation = GET_PARAM(3);
|
||||
bordertype = GET_PARAM(4);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
|
||||
cv::RNG &rng = TS::ptr()->get_rng();
|
||||
cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT);
|
||||
cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT);
|
||||
@ -1004,7 +944,7 @@ TEST_P(Remap, Mat)
|
||||
return;
|
||||
}
|
||||
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
|
||||
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
||||
//const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
|
||||
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
@ -1014,13 +954,9 @@ TEST_P(Remap, Mat)
|
||||
cv::Mat cpu_dst;
|
||||
gdst.download(cpu_dst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d bordertype=%s", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty, borderstr[0]);
|
||||
|
||||
|
||||
if(interpolation == 0)
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0);
|
||||
|
||||
}
|
||||
}
|
||||
@ -1051,7 +987,6 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
|
||||
int dstx;
|
||||
int dsty;
|
||||
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
@ -1090,10 +1025,6 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, dsize, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -1149,10 +1080,7 @@ TEST_P(Resize, Mat)
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, src1x, src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1181,7 +1109,7 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -1199,11 +1127,6 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
|
||||
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -1251,12 +1174,7 @@ TEST_P(Threshold, Mat)
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
//EXPECT_MAT_NEAR(dst, cpu_dst, 1e-5)
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x , src1y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1288,7 +1206,6 @@ PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
|
||||
cv::ocl::oclMat gdst;
|
||||
cv::ocl::oclMat gdstCoor;
|
||||
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gsrc_roi;
|
||||
cv::ocl::oclMat gdst_roi;
|
||||
@ -1311,10 +1228,6 @@ PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -1367,11 +1280,7 @@ TEST_P(meanShiftFiltering, Mat)
|
||||
cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
|
||||
|
||||
gdst.download(cpu_gdst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1393,11 +1302,8 @@ TEST_P(meanShiftProc, Mat)
|
||||
|
||||
gdst.download(cpu_gdst);
|
||||
gdstCoor.download(cpu_gdstCoor);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
|
||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0);
|
||||
EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1436,7 +1342,6 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType)
|
||||
cv::ocl::oclMat gdst_hist;
|
||||
//ocl mat with roi
|
||||
cv::ocl::oclMat gsrc_roi;
|
||||
// std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
@ -1447,10 +1352,6 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType)
|
||||
|
||||
src = randomMat(rng, size, type_src, 0, 256, false);
|
||||
|
||||
// int devnums = getDevice(oclinfo);
|
||||
// CV_Assert(devnums > 0);
|
||||
//if you want to use undefault device, set it here
|
||||
//setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -1489,10 +1390,7 @@ TEST_P(calcHist, Mat)
|
||||
cv::ocl::calcHist(gsrc_roi, gdst_hist);
|
||||
|
||||
gdst_hist.download(cpu_hist);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d\n", roicols, roirows, srcx, srcy);
|
||||
EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1629,11 +1527,7 @@ TEST_P(Convolve, Mat)
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, src2x, src2y);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 1e-1, sss);
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, .1);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -62,7 +62,6 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
|
||||
cv::Size templ_size;
|
||||
int cn;
|
||||
int method;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
@ -70,8 +69,6 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
|
||||
templ_size = GET_PARAM(1);
|
||||
cn = GET_PARAM(2);
|
||||
method = GET_PARAM(3);
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -92,12 +89,10 @@ TEST_P(MatchTemplate8U, Accuracy)
|
||||
cv::Mat dst_gold;
|
||||
cv::matchTemplate(image, templ, dst_gold, method);
|
||||
|
||||
char sss [100] = "";
|
||||
|
||||
cv::Mat mat_dst;
|
||||
dst.download(mat_dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1);
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod)
|
||||
@ -114,8 +109,6 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth
|
||||
templ_size = GET_PARAM(1);
|
||||
cn = GET_PARAM(2);
|
||||
method = GET_PARAM(3);
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -130,12 +123,10 @@ TEST_P(MatchTemplate32F, Accuracy)
|
||||
cv::Mat dst_gold;
|
||||
cv::matchTemplate(image, templ, dst_gold, method);
|
||||
|
||||
char sss [100] = "";
|
||||
|
||||
cv::Mat mat_dst;
|
||||
dst.download(mat_dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
|
||||
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -72,7 +73,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
|
||||
//src mat with roi
|
||||
cv::Mat mat_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -90,11 +91,6 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
|
||||
|
||||
mat = randomMat(rng, size, type, 5, 16, false);
|
||||
dst = randomMat(rng, size, type, 5, 16, false);
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -139,12 +135,7 @@ TEST_P(ConvertTo, Accuracy)
|
||||
mat_roi.convertTo(dst_roi, dst_type);
|
||||
gmat.convertTo(gdst, dst_type);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx =%d,srcy=%d,dstx=%d,dsty=%d", roicols, roirows, srcx , srcy, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,7 +166,7 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
|
||||
cv::Mat mat_roi;
|
||||
cv::Mat mask_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -197,10 +188,6 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
|
||||
|
||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -250,12 +237,7 @@ TEST_P(CopyTo, Without_mask)
|
||||
mat_roi.copyTo(dst_roi);
|
||||
gmat.copyTo(gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx =%d,srcy=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d", roicols, roirows, srcx , srcy, dstx, dsty, maskx, masky);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -268,12 +250,7 @@ TEST_P(CopyTo, With_mask)
|
||||
mat_roi.copyTo(dst_roi, mask_roi);
|
||||
gmat.copyTo(gdst, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx =%d,srcy=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d", roicols, roirows, srcx , srcy, dstx, dsty, maskx, masky);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,7 +278,7 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool)
|
||||
//src mat with roi
|
||||
cv::Mat mat_roi;
|
||||
cv::Mat mask_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gmat_whole;
|
||||
|
||||
@ -322,10 +299,6 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool)
|
||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
||||
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -369,12 +342,7 @@ TEST_P(SetTo, Without_mask)
|
||||
mat_roi.setTo(val);
|
||||
gmat.setTo(val);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gmat_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx =%d,srcy=%d,maskx=%d,masky=%d", roicols, roirows, srcx , srcy, maskx, masky);
|
||||
|
||||
EXPECT_MAT_NEAR(mat, cpu_dst, 1., sss);
|
||||
EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.);
|
||||
}
|
||||
}
|
||||
|
||||
@ -387,12 +355,7 @@ TEST_P(SetTo, With_mask)
|
||||
mat_roi.setTo(val, mask_roi);
|
||||
gmat.setTo(val, gmask);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gmat_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,srcx =%d,srcy=%d,maskx=%d,masky=%d", roicols, roirows, srcx , srcy, maskx, masky);
|
||||
|
||||
EXPECT_MAT_NEAR(mat, cpu_dst, 1., sss);
|
||||
EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.);
|
||||
}
|
||||
}
|
||||
|
||||
@ -417,7 +380,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, cv::Size)
|
||||
//src mat with roi
|
||||
cv::Mat mat1_roi;
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -430,13 +393,6 @@ PARAM_TEST_CASE(convertC3C4, MatType, cv::Size)
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
|
||||
|
||||
|
||||
//dst = randomMat(rng, size, type, 5, 16, false);
|
||||
//int devnums = getDevice(oclinfo);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[1]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -483,11 +439,8 @@ TEST_P(convertC3C4, Accuracy)
|
||||
|
||||
mat1 = randomMat(rng, size, type, 0, 40, false);
|
||||
gmat1 = mat1;
|
||||
cv::Mat cpu_dst;
|
||||
gmat1.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "cols=%d,rows=%d", mat1.cols, mat1.rows);
|
||||
EXPECT_MAT_NEAR(mat1, cpu_dst, 0.0, sss);
|
||||
|
||||
EXPECT_MAT_NEAR(mat1, Mat(gmat1), 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ using namespace cvtest;
|
||||
using namespace testing;
|
||||
using namespace std;
|
||||
extern string workdir;
|
||||
PARAM_TEST_CASE(MomentsTestBase, MatType, bool)
|
||||
PARAM_TEST_CASE(MomentsTest, MatType, bool)
|
||||
{
|
||||
int type;
|
||||
cv::Mat mat1;
|
||||
@ -30,13 +30,13 @@ PARAM_TEST_CASE(MomentsTestBase, MatType, bool)
|
||||
Mat gpu_dst, cpu_dst;
|
||||
HuMoments(cpu, cpu_dst);
|
||||
HuMoments(gpu, gpu_dst);
|
||||
EXPECT_MAT_NEAR(gpu_dst,cpu_dst, .5, "");
|
||||
EXPECT_MAT_NEAR(gpu_dst,cpu_dst, .5);
|
||||
}
|
||||
|
||||
};
|
||||
struct ocl_Moments : MomentsTestBase {};
|
||||
|
||||
TEST_P(ocl_Moments, Mat)
|
||||
|
||||
TEST_P(MomentsTest, Mat)
|
||||
{
|
||||
bool binaryImage = 0;
|
||||
SetUp();
|
||||
@ -67,6 +67,6 @@ TEST_P(ocl_Moments, Mat)
|
||||
|
||||
}
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(Moments, ocl_Moments, Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MomentsTest, Combine(
|
||||
Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false)));
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -65,15 +65,6 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
channels = GET_PARAM(1);
|
||||
|
||||
//int devnums = getDevice(oclinfo);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void Cleanup()
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
@ -92,17 +83,11 @@ TEST_P(PyrDown, Mat)
|
||||
cv::pyrDown(src, dst_cpu);
|
||||
cv::ocl::pyrDown(gsrc, gdst);
|
||||
|
||||
cv::Mat dst;
|
||||
gdst.download(dst);
|
||||
char s[1024] = {0};
|
||||
|
||||
EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
|
||||
|
||||
Cleanup();
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
|
||||
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||
|
||||
|
||||
|
@ -50,19 +50,7 @@ using namespace cvtest;
|
||||
using namespace testing;
|
||||
using namespace std;
|
||||
|
||||
//#define DUMP
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BroxOpticalFlow
|
||||
extern string workdir;
|
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE "opticalflow/brox_optical_flow.bin"
|
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE_CC20 "opticalflow/brox_optical_flow_cc20.bin"
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlow
|
||||
|
||||
//IMPLEMENT_PARAM_CLASS(UseGray, bool)
|
||||
|
||||
PARAM_TEST_CASE(Sparse, bool, bool)
|
||||
{
|
||||
|
@ -58,12 +58,9 @@ PARAM_TEST_CASE(PyrUp, MatType, int)
|
||||
{
|
||||
int type;
|
||||
int channels;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
type = GET_PARAM(0);
|
||||
channels = GET_PARAM(1);
|
||||
}
|
||||
@ -80,17 +77,14 @@ TEST_P(PyrUp, Accuracy)
|
||||
ocl::oclMat dst;
|
||||
ocl::oclMat srcMat(src);
|
||||
ocl::pyrUp(srcMat, dst);
|
||||
Mat cpu_dst;
|
||||
dst.download(cpu_dst);
|
||||
char s[100] = {0};
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0), s);
|
||||
EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
|
||||
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -87,7 +88,7 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
||||
|
||||
//dst mat with roi
|
||||
cv::Mat dst_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst_whole;
|
||||
|
||||
@ -112,10 +113,6 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int)
|
||||
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -205,12 +202,7 @@ TEST_P(Merge, Accuracy)
|
||||
cv::merge(dev_src, dst_roi);
|
||||
cv::ocl::merge(dev_gsrc, gdst);
|
||||
|
||||
cv::Mat cpu_dst;
|
||||
gdst_whole.download(cpu_dst);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,src1x =%d,src1y=%d,src2x =%d,src2y=%d,src3x =%d,src3y=%d,src4x =%d,src4y=%d,dstx=%d,dsty=%d", roicols, roirows, src1x, src1y, src2x , src2y, src3x , src3y, src4x , src4y, dstx, dsty);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, cpu_dst, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,7 +244,7 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int)
|
||||
cv::Mat dst2_roi;
|
||||
cv::Mat dst3_roi;
|
||||
cv::Mat dst4_roi;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
|
||||
//ocl dst mat for testing
|
||||
cv::ocl::oclMat gdst1_whole;
|
||||
cv::ocl::oclMat gdst2_whole;
|
||||
@ -280,10 +272,6 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int)
|
||||
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
////if you want to use undefault device, set it here
|
||||
////setDevice(oclinfo[0]);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
@ -356,28 +344,17 @@ TEST_P(Split, Accuracy)
|
||||
cv::split(mat_roi, dev_dst);
|
||||
cv::ocl::split(gmat, dev_gdst);
|
||||
|
||||
cv::Mat cpu_dst1;
|
||||
cv::Mat cpu_dst2;
|
||||
cv::Mat cpu_dst3;
|
||||
cv::Mat cpu_dst4;
|
||||
gdst1_whole.download(cpu_dst1);
|
||||
gdst2_whole.download(cpu_dst2);
|
||||
gdst3_whole.download(cpu_dst3);
|
||||
gdst4_whole.download(cpu_dst4);
|
||||
char sss[1024];
|
||||
sprintf(sss, "roicols=%d,roirows=%d,dst1x =%d,dsty=%d,dst2x =%d,dst2y=%d,dst3x =%d,dst3y=%d,dst4x =%d,dst4y=%d,srcx=%d,srcy=%d", roicols, roirows, dst1x , dst1y, dst2x , dst2y, dst3x , dst3y, dst4x , dst4y, srcx, srcy);
|
||||
|
||||
if(channels >= 1)
|
||||
EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), 0.0);
|
||||
|
||||
if(channels >= 2)
|
||||
EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst2, Mat(gdst2_whole), 0.0);
|
||||
|
||||
if(channels >= 3)
|
||||
EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst3, Mat(gdst3_whole), 0.0);
|
||||
|
||||
if(channels >= 4)
|
||||
EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss);
|
||||
EXPECT_MAT_NEAR(dst4, Mat(gdst4_whole), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,20 +76,20 @@ double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
|
||||
EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
|
||||
}
|
||||
|
||||
/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
|
||||
#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
|
||||
{ \
|
||||
ASSERT_EQ(mat1.type(), mat2.type()); \
|
||||
ASSERT_EQ(mat1.size(), mat2.size()); \
|
||||
EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \
|
||||
}*/
|
||||
|
||||
}
|
||||
/*
|
||||
#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \
|
||||
{ \
|
||||
ASSERT_EQ(mat1.type(), mat2.type()); \
|
||||
ASSERT_EQ(mat1.size(), mat2.size()); \
|
||||
EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \
|
||||
}
|
||||
|
||||
*/
|
||||
#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
|
||||
{ \
|
||||
ASSERT_EQ(mat1.type(), mat2.type()); \
|
||||
|
@ -554,6 +554,7 @@ namespace cvtest
|
||||
CV_EXPORTS void fillGradient(Mat& img, int delta = 5);
|
||||
CV_EXPORTS void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
|
||||
|
||||
CV_EXPORTS void printVersionInfo(bool useStdOut = true);
|
||||
} //namespace cvtest
|
||||
|
||||
// fills c with zeros
|
||||
@ -573,6 +574,7 @@ int main(int argc, char **argv) \
|
||||
{ \
|
||||
cvtest::TS::ptr()->init(resourcesubdir); \
|
||||
::testing::InitGoogleTest(&argc, argv); \
|
||||
cvtest::printVersionInfo();\
|
||||
return RUN_ALL_TESTS(); \
|
||||
}
|
||||
|
||||
|
@ -484,6 +484,7 @@ int main(int argc, char **argv)\
|
||||
::perf::Regression::Init(#testsuitname);\
|
||||
::perf::TestBase::Init(argc, argv);\
|
||||
::testing::InitGoogleTest(&argc, argv);\
|
||||
cvtest::printVersionInfo();\
|
||||
return RUN_ALL_TESTS();\
|
||||
}
|
||||
|
||||
|
@ -2934,8 +2934,34 @@ MatComparator::operator()(const char* expr1, const char* expr2,
|
||||
<< "'" << expr2 << "': " << MatPart(m2part, border > 0 ? &loc : 0) << ".\n";
|
||||
}
|
||||
|
||||
void printVersionInfo(bool useStdOut)
|
||||
{
|
||||
::testing::Test::RecordProperty("CV_VERSION", CV_VERSION);
|
||||
if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
|
||||
|
||||
std::string buildInfo( cv::getBuildInformation() );
|
||||
|
||||
size_t pos1 = buildInfo.find("Version control");
|
||||
size_t pos2 = buildInfo.find("\n", pos1);\
|
||||
if(pos1 != std::string::npos && pos2 != std::string::npos)
|
||||
{
|
||||
std::string ver( buildInfo.substr(pos1, pos2-pos1) );
|
||||
::testing::Test::RecordProperty("Version_control", ver);
|
||||
if(useStdOut) std::cout << ver << std::endl;
|
||||
}
|
||||
|
||||
pos1 = buildInfo.find("inner version");
|
||||
pos2 = buildInfo.find("\n", pos1);\
|
||||
if(pos1 != std::string::npos && pos2 != std::string::npos)
|
||||
{
|
||||
std::string ver( buildInfo.substr(pos1, pos2-pos1) );
|
||||
::testing::Test::RecordProperty("inner_version", ver);
|
||||
if(useStdOut) std::cout << ver << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} //namespace cvtest
|
||||
|
||||
void cvTsConvert( const CvMat* src, CvMat* dst )
|
||||
{
|
||||
Mat _src = cvarrToMat(src), _dst = cvarrToMat(dst);
|
||||
|
@ -10,16 +10,13 @@ add_subdirectory(15-puzzle)
|
||||
add_subdirectory(face-detection)
|
||||
add_subdirectory(image-manipulations)
|
||||
add_subdirectory(color-blob-detection)
|
||||
|
||||
if (ANDROID_NATIVE_API_LEVEL GREATER 8)
|
||||
add_subdirectory(native-activity)
|
||||
endif()
|
||||
|
||||
add_subdirectory(tutorial-1-camerapreview)
|
||||
add_subdirectory(tutorial-2-mixedprocessing)
|
||||
add_subdirectory(tutorial-3-cameracontrol)
|
||||
|
||||
#hello-android sample
|
||||
add_subdirectory(native-activity)
|
||||
|
||||
# hello-android sample
|
||||
if(HAVE_opencv_highgui)
|
||||
ocv_include_modules_recurse(opencv_highgui opencv_core)
|
||||
add_executable(hello-android hello-android/main.cpp)
|
||||
|
Loading…
x
Reference in New Issue
Block a user