Merge pull request #1824 from vpisarev:ocl_experiments5
This commit is contained in:
commit
8d1a8df1a9
@ -499,11 +499,12 @@ macro(ocv_glob_module_sources)
|
||||
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
|
||||
|
||||
file(GLOB cl_kernels "src/opencl/*.cl")
|
||||
if(HAVE_opencv_ocl AND cl_kernels)
|
||||
if(cl_kernels)
|
||||
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
|
||||
string(REGEX REPLACE "opencv_" "" the_module_barename "${the_module}")
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
|
||||
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
|
||||
COMMAND ${CMAKE_COMMAND} -DMODULE_NAME="${the_module_barename}" -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
|
||||
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
|
||||
source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
|
||||
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
|
||||
|
@ -4,6 +4,15 @@ list(SORT cl_list)
|
||||
string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}")
|
||||
get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME)
|
||||
|
||||
if("${MODULE_NAME}" STREQUAL "ocl")
|
||||
set(nested_namespace_start "")
|
||||
set(nested_namespace_end "")
|
||||
else()
|
||||
set(new_mode ON)
|
||||
set(nested_namespace_start "namespace ${MODULE_NAME}\n{")
|
||||
set(nested_namespace_end "}")
|
||||
endif()
|
||||
|
||||
set(STR_CPP "// This file is auto-generated. Do not edit!
|
||||
|
||||
#include \"precomp.hpp\"
|
||||
@ -13,16 +22,19 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
${nested_namespace_start}
|
||||
|
||||
")
|
||||
|
||||
set(STR_HPP "// This file is auto-generated. Do not edit!
|
||||
|
||||
#include \"opencv2/ocl/private/util.hpp\"
|
||||
#include \"opencv2/core/ocl_genbase.hpp\"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
${nested_namespace_start}
|
||||
|
||||
")
|
||||
|
||||
@ -49,12 +61,19 @@ foreach(cl ${cl_list})
|
||||
|
||||
string(MD5 hash "${lines}")
|
||||
|
||||
set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
|
||||
set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n")
|
||||
set(STR_CPP_DECL "const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
|
||||
set(STR_HPP_DECL "extern const struct ProgramEntry ${cl_filename};\n")
|
||||
if(new_mode)
|
||||
set(STR_CPP_DECL "${STR_CPP_DECL}ProgramSource2 ${cl_filename}_oclsrc(${cl_filename}.programStr);\n")
|
||||
set(STR_HPP_DECL "${STR_HPP_DECL}extern ProgramSource2 ${cl_filename}_oclsrc;\n")
|
||||
endif()
|
||||
|
||||
set(STR_CPP "${STR_CPP}${STR_CPP_DECL}")
|
||||
set(STR_HPP "${STR_HPP}${STR_HPP_DECL}")
|
||||
endforeach()
|
||||
|
||||
set(STR_CPP "${STR_CPP}}\n}\n")
|
||||
set(STR_HPP "${STR_HPP}}\n}\n")
|
||||
set(STR_CPP "${STR_CPP}}\n${nested_namespace_end}}\n")
|
||||
set(STR_HPP "${STR_HPP}}\n${nested_namespace_end}}\n")
|
||||
|
||||
file(WRITE "${OUTPUT}" "${STR_CPP}")
|
||||
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include "opencv2/bioinspired.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include <valarray>
|
||||
|
||||
|
@ -56,6 +56,8 @@
|
||||
|
||||
namespace cv
|
||||
{
|
||||
static ocl::ProgramEntry retina_kernel = ocl::bioinspired::retina_kernel;
|
||||
|
||||
namespace bioinspired
|
||||
{
|
||||
namespace ocl
|
||||
|
@ -347,6 +347,10 @@ CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst);
|
||||
CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst);
|
||||
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst);
|
||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst);
|
||||
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst);
|
||||
|
||||
//! computes square root of each matrix element (dst = src**0.5)
|
||||
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst);
|
||||
|
@ -58,6 +58,8 @@ namespace cv
|
||||
enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
|
||||
ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
|
||||
|
||||
class CV_EXPORTS _OutputArray;
|
||||
|
||||
//////////////////////// Input/Output Array Arguments /////////////////////////////////
|
||||
|
||||
/*!
|
||||
@ -116,12 +118,22 @@ public:
|
||||
void* getObj() const;
|
||||
|
||||
virtual int kind() const;
|
||||
virtual int dims(int i=-1) const;
|
||||
virtual Size size(int i=-1) const;
|
||||
virtual int sizend(int* sz, int i=-1) const;
|
||||
virtual bool sameSize(const _InputArray& arr) const;
|
||||
virtual size_t total(int i=-1) const;
|
||||
virtual int type(int i=-1) const;
|
||||
virtual int depth(int i=-1) const;
|
||||
virtual int channels(int i=-1) const;
|
||||
virtual bool isContinuous(int i=-1) const;
|
||||
virtual bool empty() const;
|
||||
virtual void copyTo(const _OutputArray& arr) const;
|
||||
bool isMat() const;
|
||||
bool isUMat() const;
|
||||
bool isMatVectot() const;
|
||||
bool isUMatVector() const;
|
||||
bool isMatx();
|
||||
|
||||
virtual ~_InputArray();
|
||||
|
||||
@ -197,8 +209,10 @@ public:
|
||||
virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
|
||||
virtual void createSameSize(const _InputArray& arr, int mtype) const;
|
||||
virtual void release() const;
|
||||
virtual void clear() const;
|
||||
virtual void setTo(const _InputArray& value) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -108,6 +108,12 @@ inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem)
|
||||
|
||||
inline _InputArray::~_InputArray() {}
|
||||
|
||||
inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
|
||||
inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; }
|
||||
inline bool _InputArray::isMatVectot() const { return kind() == _InputArray::STD_VECTOR_MAT; }
|
||||
inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; }
|
||||
inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
|
||||
|
@ -49,13 +49,13 @@ namespace cv { namespace ocl {
|
||||
CV_EXPORTS bool haveOpenCL();
|
||||
CV_EXPORTS bool useOpenCL();
|
||||
CV_EXPORTS void setUseOpenCL(bool flag);
|
||||
CV_EXPORTS void finish();
|
||||
CV_EXPORTS void finish2();
|
||||
|
||||
class CV_EXPORTS Context;
|
||||
class CV_EXPORTS Context2;
|
||||
class CV_EXPORTS Device;
|
||||
class CV_EXPORTS Kernel;
|
||||
class CV_EXPORTS Program;
|
||||
class CV_EXPORTS ProgramSource;
|
||||
class CV_EXPORTS ProgramSource2;
|
||||
class CV_EXPORTS Queue;
|
||||
|
||||
class CV_EXPORTS Device
|
||||
@ -199,22 +199,22 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS Context
|
||||
class CV_EXPORTS Context2
|
||||
{
|
||||
public:
|
||||
Context();
|
||||
explicit Context(int dtype);
|
||||
~Context();
|
||||
Context(const Context& c);
|
||||
Context& operator = (const Context& c);
|
||||
Context2();
|
||||
explicit Context2(int dtype);
|
||||
~Context2();
|
||||
Context2(const Context2& c);
|
||||
Context2& operator = (const Context2& c);
|
||||
|
||||
bool create(int dtype);
|
||||
size_t ndevices() const;
|
||||
const Device& device(size_t idx) const;
|
||||
Program getProg(const ProgramSource& prog,
|
||||
Program getProg(const ProgramSource2& prog,
|
||||
const String& buildopt, String& errmsg);
|
||||
|
||||
static Context& getDefault();
|
||||
static Context2& getDefault();
|
||||
void* ptr() const;
|
||||
protected:
|
||||
struct Impl;
|
||||
@ -226,12 +226,12 @@ class CV_EXPORTS Queue
|
||||
{
|
||||
public:
|
||||
Queue();
|
||||
explicit Queue(const Context& c, const Device& d=Device());
|
||||
explicit Queue(const Context2& c, const Device& d=Device());
|
||||
~Queue();
|
||||
Queue(const Queue& q);
|
||||
Queue& operator = (const Queue& q);
|
||||
|
||||
bool create(const Context& c=Context(), const Device& d=Device());
|
||||
bool create(const Context2& c=Context2(), const Device& d=Device());
|
||||
void finish();
|
||||
void* ptr() const;
|
||||
static Queue& getDefault();
|
||||
@ -245,41 +245,55 @@ protected:
|
||||
class CV_EXPORTS KernelArg
|
||||
{
|
||||
public:
|
||||
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 };
|
||||
KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0);
|
||||
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, NO_SIZE=256 };
|
||||
KernelArg(int _flags, UMat* _m, int wscale=1, const void* _obj=0, size_t _sz=0);
|
||||
KernelArg();
|
||||
|
||||
static KernelArg Local() { return KernelArg(LOCAL, 0); }
|
||||
static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); }
|
||||
static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); }
|
||||
static KernelArg ReadWrite(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale); }
|
||||
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale); }
|
||||
static KernelArg ReadOnly(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale); }
|
||||
static KernelArg WriteOnly(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale); }
|
||||
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale); }
|
||||
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1)
|
||||
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale); }
|
||||
static KernelArg Constant(const Mat& m);
|
||||
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
|
||||
{ return KernelArg(CONSTANT, 0, (void*)arr, n); }
|
||||
{ return KernelArg(CONSTANT, 0, 1, (void*)arr, n); }
|
||||
|
||||
int flags;
|
||||
UMat* m;
|
||||
void* obj;
|
||||
const void* obj;
|
||||
size_t sz;
|
||||
int wscale;
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS Kernel
|
||||
{
|
||||
public:
|
||||
Kernel();
|
||||
Kernel(const char* kname, const Program& prog);
|
||||
Kernel(const char* kname, const ProgramSource& prog,
|
||||
const String& buildopts, String& errmsg);
|
||||
Kernel(const char* kname, const ProgramSource2& prog,
|
||||
const String& buildopts, String* errmsg=0);
|
||||
~Kernel();
|
||||
Kernel(const Kernel& k);
|
||||
Kernel& operator = (const Kernel& k);
|
||||
|
||||
bool empty() const;
|
||||
bool create(const char* kname, const Program& prog);
|
||||
bool create(const char* kname, const ProgramSource& prog,
|
||||
const String& buildopts, String& errmsg);
|
||||
bool create(const char* kname, const ProgramSource2& prog,
|
||||
const String& buildopts, String* errmsg=0);
|
||||
|
||||
void set(int i, const void* value, size_t sz);
|
||||
void set(int i, const UMat& m);
|
||||
void set(int i, const KernelArg& arg);
|
||||
template<typename _Tp> void set(int i, const _Tp& value)
|
||||
int set(int i, const void* value, size_t sz);
|
||||
int set(int i, const UMat& m);
|
||||
int set(int i, const KernelArg& arg);
|
||||
template<typename _Tp> int set(int i, const _Tp& value)
|
||||
{ return set(i, &value, sizeof(value)); }
|
||||
|
||||
template<typename _Tp0>
|
||||
@ -291,26 +305,27 @@ public:
|
||||
template<typename _Tp0, typename _Tp1>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1)
|
||||
{
|
||||
set(0, a0); set(1, a1); return *this;
|
||||
int i = set(0, a0); set(i, a1); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
|
||||
const _Tp3& a3, const _Tp4& a4)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
|
||||
i = set(i, a3); set(i, a4); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2,
|
||||
@ -318,8 +333,8 @@ public:
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
|
||||
const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2);
|
||||
set(3, a3); set(4, a4); set(5, a5); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
|
||||
i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
@ -327,8 +342,8 @@ public:
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
|
||||
set(4, a4); set(5, a5); set(6, a6); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
|
||||
i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
@ -336,8 +351,8 @@ public:
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
|
||||
set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
|
||||
i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
|
||||
@ -346,8 +361,8 @@ public:
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4);
|
||||
set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
|
||||
i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
|
||||
@ -356,8 +371,8 @@ public:
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
|
||||
set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
@ -367,8 +382,8 @@ public:
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
|
||||
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
@ -378,13 +393,13 @@ public:
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
|
||||
{
|
||||
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
|
||||
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this;
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
|
||||
}
|
||||
|
||||
void run(int dims, size_t offset[], size_t globalsize[],
|
||||
bool run(int dims, size_t globalsize[],
|
||||
size_t localsize[], bool sync, const Queue& q=Queue());
|
||||
void runTask(bool sync, const Queue& q=Queue());
|
||||
bool runTask(bool sync, const Queue& q=Queue());
|
||||
|
||||
size_t workGroupSize() const;
|
||||
bool compileWorkGroupSize(size_t wsz[]) const;
|
||||
@ -401,7 +416,7 @@ class CV_EXPORTS Program
|
||||
{
|
||||
public:
|
||||
Program();
|
||||
Program(const ProgramSource& src,
|
||||
Program(const ProgramSource2& src,
|
||||
const String& buildflags, String& errmsg);
|
||||
explicit Program(const String& buf);
|
||||
Program(const Program& prog);
|
||||
@ -409,12 +424,12 @@ public:
|
||||
Program& operator = (const Program& prog);
|
||||
~Program();
|
||||
|
||||
bool create(const ProgramSource& src,
|
||||
bool create(const ProgramSource2& src,
|
||||
const String& buildflags, String& errmsg);
|
||||
bool read(const String& buf, const String& buildflags);
|
||||
bool write(String& buf) const;
|
||||
|
||||
const ProgramSource& source() const;
|
||||
const ProgramSource2& source() const;
|
||||
void* ptr() const;
|
||||
|
||||
String getPrefix() const;
|
||||
@ -426,17 +441,17 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS ProgramSource
|
||||
class CV_EXPORTS ProgramSource2
|
||||
{
|
||||
public:
|
||||
typedef uint64 hash_t;
|
||||
|
||||
ProgramSource();
|
||||
explicit ProgramSource(const String& prog);
|
||||
explicit ProgramSource(const char* prog);
|
||||
~ProgramSource();
|
||||
ProgramSource(const ProgramSource& prog);
|
||||
ProgramSource& operator = (const ProgramSource& prog);
|
||||
ProgramSource2();
|
||||
explicit ProgramSource2(const String& prog);
|
||||
explicit ProgramSource2(const char* prog);
|
||||
~ProgramSource2();
|
||||
ProgramSource2(const ProgramSource2& prog);
|
||||
ProgramSource2& operator = (const ProgramSource2& prog);
|
||||
|
||||
const String& source() const;
|
||||
hash_t hash() const;
|
||||
@ -446,6 +461,10 @@ protected:
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
|
||||
CV_EXPORTS const char* typeToStr(int t);
|
||||
CV_EXPORTS const char* memopTypeToStr(int t);
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
60
modules/core/include/opencv2/core/ocl_genbase.hpp
Normal file
60
modules/core/include/opencv2/core/ocl_genbase.hpp
Normal file
@ -0,0 +1,60 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OPENCL_GENBASE_HPP__
|
||||
#define __OPENCV_OPENCL_GENBASE_HPP__
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
struct ProgramEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* programStr;
|
||||
const char* programHash;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -47,6 +47,7 @@
|
||||
// */
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -911,33 +912,112 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl
|
||||
scbuf[i] = scbuf[i - esz];
|
||||
}
|
||||
|
||||
static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, const BinaryFunc* tab, bool bitwise)
|
||||
|
||||
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
|
||||
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
|
||||
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14 };
|
||||
|
||||
static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
|
||||
"OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
|
||||
"OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", 0 };
|
||||
|
||||
static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, bool bitwise, int oclop, bool haveScalar )
|
||||
{
|
||||
int kind1 = _src1.kind(), kind2 = _src2.kind();
|
||||
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
|
||||
bool haveMask = !_mask.empty();
|
||||
int srctype = _src1.type();
|
||||
int srcdepth = CV_MAT_DEPTH(srctype);
|
||||
int cn = CV_MAT_CN(srctype);
|
||||
|
||||
if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) )
|
||||
return false;
|
||||
|
||||
UMat src1 = _src1.getUMat(), src2;
|
||||
UMat dst = _dst.getUMat(), mask = _mask.getUMat();
|
||||
|
||||
char opts[1024];
|
||||
int kercn = haveMask || haveScalar ? cn : 1;
|
||||
sprintf(opts, "-D %s%s -D %s -D dstT=%s",
|
||||
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop],
|
||||
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
|
||||
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)));
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
|
||||
if( k.empty() )
|
||||
return false;
|
||||
|
||||
int cscale = cn/kercn;
|
||||
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
|
||||
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
|
||||
ocl::KernelArg::WriteOnly(dst, cscale);
|
||||
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
|
||||
|
||||
if( haveScalar )
|
||||
{
|
||||
size_t esz = CV_ELEM_SIZE(srctype);
|
||||
double buf[4] = {0,0,0,0};
|
||||
|
||||
if( oclop != OCL_OP_NOT )
|
||||
{
|
||||
Mat src2sc = _src2.getMat();
|
||||
convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
|
||||
}
|
||||
|
||||
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
|
||||
|
||||
if( !haveMask )
|
||||
k.args(src1arg, dstarg, scalararg);
|
||||
else
|
||||
k.args(src1arg, maskarg, dstarg, scalararg);
|
||||
}
|
||||
else
|
||||
{
|
||||
src2 = _src2.getUMat();
|
||||
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
|
||||
|
||||
if( !haveMask )
|
||||
k.args(src1arg, src2arg, dstarg);
|
||||
else
|
||||
k.args(src1arg, src2arg, maskarg, dstarg);
|
||||
}
|
||||
|
||||
size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows };
|
||||
return k.run(2, globalsize, 0, false);
|
||||
}
|
||||
|
||||
|
||||
static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, const BinaryFunc* tab,
|
||||
bool bitwise, int oclop )
|
||||
{
|
||||
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
|
||||
int kind1 = psrc1->kind(), kind2 = psrc2->kind();
|
||||
int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
|
||||
int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
|
||||
int dims1 = psrc1->dims(), dims2 = psrc2->dims();
|
||||
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
|
||||
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
|
||||
bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
|
||||
ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
|
||||
bool haveMask = !_mask.empty(), haveScalar = false;
|
||||
BinaryFunc func;
|
||||
int c;
|
||||
|
||||
if( src1.dims <= 2 && src2.dims <= 2 && kind1 == kind2 &&
|
||||
src1.size() == src2.size() && src1.type() == src2.type() && !haveMask )
|
||||
if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask )
|
||||
{
|
||||
_dst.create(src1.size(), src1.type());
|
||||
Mat dst = _dst.getMat();
|
||||
_dst.create(sz1, type1);
|
||||
if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false) )
|
||||
return;
|
||||
if( bitwise )
|
||||
{
|
||||
func = *tab;
|
||||
c = (int)src1.elemSize();
|
||||
cn = (int)CV_ELEM_SIZE(type1);
|
||||
}
|
||||
else
|
||||
{
|
||||
func = tab[src1.depth()];
|
||||
c = src1.channels();
|
||||
}
|
||||
func = tab[depth1];
|
||||
|
||||
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
|
||||
Size sz = getContinuousSize(src1, src2, dst);
|
||||
size_t len = sz.width*(size_t)c;
|
||||
size_t len = sz.width*(size_t)cn;
|
||||
if( len == (size_t)(int)len )
|
||||
{
|
||||
sz.width = (int)len;
|
||||
@ -946,56 +1026,67 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
}
|
||||
}
|
||||
|
||||
if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
|
||||
src1.size != src2.size || src1.type() != src2.type() )
|
||||
if( oclop == OCL_OP_NOT )
|
||||
haveScalar = true;
|
||||
else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
|
||||
!psrc1->sameSize(*psrc2) || type1 != type2 )
|
||||
{
|
||||
if( checkScalar(src1, src2.type(), kind1, kind2) )
|
||||
if( checkScalar(*psrc1, type2, kind1, kind2) )
|
||||
{
|
||||
// src1 is a scalar; swap it with src2
|
||||
swap(src1, src2);
|
||||
else if( !checkScalar(src2, src1.type(), kind2, kind1) )
|
||||
swap(psrc1, psrc2);
|
||||
swap(type1, type2);
|
||||
swap(depth1, depth2);
|
||||
swap(cn, cn2);
|
||||
swap(sz1, sz2);
|
||||
}
|
||||
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
|
||||
CV_Error( CV_StsUnmatchedSizes,
|
||||
"The operation is neither 'array op array' (where arrays have the same size and type), "
|
||||
"nor 'array op scalar', nor 'scalar op array'" );
|
||||
haveScalar = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 );
|
||||
}
|
||||
|
||||
size_t esz = src1.elemSize();
|
||||
size_t esz = CV_ELEM_SIZE(type1);
|
||||
size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
|
||||
int cn = src1.channels();
|
||||
BinaryFunc copymask = 0;
|
||||
Mat mask;
|
||||
bool reallocate = false;
|
||||
|
||||
if( haveMask )
|
||||
{
|
||||
mask = _mask.getMat();
|
||||
CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
|
||||
CV_Assert( mask.size == src1.size );
|
||||
int mtype = _mask.type();
|
||||
CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1));
|
||||
copymask = getCopyMaskFunc(esz);
|
||||
Mat tdst = _dst.getMat();
|
||||
reallocate = tdst.size != src1.size || tdst.type() != src1.type();
|
||||
reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1;
|
||||
}
|
||||
|
||||
AutoBuffer<uchar> _buf;
|
||||
uchar *scbuf = 0, *maskbuf = 0;
|
||||
|
||||
_dst.create(src1.dims, src1.size, src1.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
_dst.createSameSize(*psrc1, type1);
|
||||
// if this is mask operation and dst has been reallocated,
|
||||
// we have to
|
||||
// we have to clear the destination
|
||||
if( haveMask && reallocate )
|
||||
dst = Scalar::all(0);
|
||||
_dst.setTo(0.);
|
||||
|
||||
if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar ))
|
||||
return;
|
||||
|
||||
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat();
|
||||
Mat dst = _dst.getMat(), mask = _mask.getMat();
|
||||
|
||||
if( bitwise )
|
||||
{
|
||||
func = *tab;
|
||||
c = (int)esz;
|
||||
cn = (int)esz;
|
||||
}
|
||||
else
|
||||
{
|
||||
func = tab[src1.depth()];
|
||||
c = cn;
|
||||
func = tab[depth1];
|
||||
}
|
||||
|
||||
if( !haveScalar )
|
||||
@ -1006,8 +1097,8 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = total;
|
||||
|
||||
if( blocksize*c > INT_MAX )
|
||||
blocksize = INT_MAX/c;
|
||||
if( blocksize*cn > INT_MAX )
|
||||
blocksize = INT_MAX/cn;
|
||||
|
||||
if( haveMask )
|
||||
{
|
||||
@ -1022,7 +1113,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
{
|
||||
int bsz = (int)MIN(total - j, blocksize);
|
||||
|
||||
func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*c, 1), 0 );
|
||||
func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*cn, 1), 0 );
|
||||
if( haveMask )
|
||||
{
|
||||
copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
|
||||
@ -1054,7 +1145,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
{
|
||||
int bsz = (int)MIN(total - j, blocksize);
|
||||
|
||||
func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*c, 1), 0 );
|
||||
func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*cn, 1), 0 );
|
||||
if( haveMask )
|
||||
{
|
||||
copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
|
||||
@ -1101,47 +1192,59 @@ static BinaryFunc* getMinTab()
|
||||
void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
|
||||
{
|
||||
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(and8u);
|
||||
binary_op(a, b, c, mask, &f, true);
|
||||
binary_op(a, b, c, mask, &f, true, OCL_OP_AND);
|
||||
}
|
||||
|
||||
void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
|
||||
{
|
||||
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(or8u);
|
||||
binary_op(a, b, c, mask, &f, true);
|
||||
binary_op(a, b, c, mask, &f, true, OCL_OP_OR);
|
||||
}
|
||||
|
||||
void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
|
||||
{
|
||||
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(xor8u);
|
||||
binary_op(a, b, c, mask, &f, true);
|
||||
binary_op(a, b, c, mask, &f, true, OCL_OP_XOR);
|
||||
}
|
||||
|
||||
void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
|
||||
{
|
||||
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(not8u);
|
||||
binary_op(a, a, c, mask, &f, true);
|
||||
binary_op(a, a, c, mask, &f, true, OCL_OP_NOT);
|
||||
}
|
||||
|
||||
void cv::max( InputArray src1, InputArray src2, OutputArray dst )
|
||||
{
|
||||
binary_op(src1, src2, dst, noArray(), getMaxTab(), false );
|
||||
binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
|
||||
}
|
||||
|
||||
void cv::min( InputArray src1, InputArray src2, OutputArray dst )
|
||||
{
|
||||
binary_op(src1, src2, dst, noArray(), getMinTab(), false );
|
||||
binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN );
|
||||
}
|
||||
|
||||
void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
|
||||
{
|
||||
OutputArray _dst(dst);
|
||||
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false );
|
||||
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
|
||||
}
|
||||
|
||||
void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
|
||||
{
|
||||
OutputArray _dst(dst);
|
||||
binary_op(src1, src2, _dst, noArray(), getMinTab(), false );
|
||||
binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
|
||||
}
|
||||
|
||||
void cv::max(const UMat& src1, const UMat& src2, UMat& dst)
|
||||
{
|
||||
OutputArray _dst(dst);
|
||||
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
|
||||
}
|
||||
|
||||
void cv::min(const UMat& src1, const UMat& src2, UMat& dst)
|
||||
{
|
||||
OutputArray _dst(dst);
|
||||
binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
|
||||
}
|
||||
|
||||
|
||||
@ -1171,73 +1274,213 @@ static int actualScalarDepth(const double* data, int len)
|
||||
CV_32S;
|
||||
}
|
||||
|
||||
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false, void* usrdata=0)
|
||||
|
||||
static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, int wtype,
|
||||
void* usrdata, int oclop,
|
||||
bool haveScalar )
|
||||
{
|
||||
int kind1 = _src1.kind(), kind2 = _src2.kind();
|
||||
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
|
||||
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
|
||||
bool haveMask = !_mask.empty();
|
||||
|
||||
if( (haveMask || haveScalar) && cn > 4 )
|
||||
return false;
|
||||
|
||||
int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = CV_MAT_DEPTH(wtype);
|
||||
wtype = CV_MAKETYPE(wdepth, cn);
|
||||
int type2 = haveScalar ? _src2.type() : wtype, depth2 = CV_MAT_DEPTH(type2);
|
||||
|
||||
UMat src1 = _src1.getUMat(), src2;
|
||||
UMat dst = _dst.getUMat(), mask = _mask.getUMat();
|
||||
|
||||
char opts[1024];
|
||||
int kercn = haveMask || haveScalar ? cn : 1;
|
||||
|
||||
if( (depth1 == depth2 || haveScalar) && ddepth == depth1 && wdepth == depth1 )
|
||||
{
|
||||
const char* oclopstr = oclop2str[oclop];
|
||||
if( wdepth <= CV_16S )
|
||||
{
|
||||
oclopstr = oclop == OCL_OP_ADD ? "OCL_OP_ADD_SAT" :
|
||||
oclop == OCL_OP_SUB ? "OCL_OP_SUB_SAT" :
|
||||
oclop == OCL_OP_RSUB ? "OCL_OP_RSUB_SAT" : oclopstr;
|
||||
}
|
||||
sprintf(opts, "-D %s%s -D %s -D dstT=%s",
|
||||
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
|
||||
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)));
|
||||
}
|
||||
else
|
||||
{
|
||||
char cvtstr[3][32];
|
||||
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s "
|
||||
"-D dstT=%s -D workT=%s -D convertToWT1=%s "
|
||||
"-D convertToWT2=%s -D convertToDT=%s",
|
||||
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
|
||||
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
|
||||
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
|
||||
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
|
||||
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]));
|
||||
}
|
||||
|
||||
const uchar* usrdata_p = (const uchar*)usrdata;
|
||||
const double* usrdata_d = (const double*)usrdata;
|
||||
float usrdata_f[3];
|
||||
int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
|
||||
oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
|
||||
if( n > 0 && wdepth == CV_32F )
|
||||
{
|
||||
for( i = 0; i < n; i++ )
|
||||
usrdata_f[i] = (float)usrdata_d[i];
|
||||
usrdata_p = (const uchar*)usrdata_f;
|
||||
}
|
||||
size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
|
||||
if( k.empty() )
|
||||
return false;
|
||||
|
||||
int cscale = cn/kercn;
|
||||
|
||||
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
|
||||
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
|
||||
ocl::KernelArg::WriteOnly(dst, cscale);
|
||||
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
|
||||
|
||||
if( haveScalar )
|
||||
{
|
||||
size_t esz = CV_ELEM_SIZE(wtype);
|
||||
double buf[4]={0,0,0,0};
|
||||
Mat src2sc = _src2.getMat();
|
||||
|
||||
if( !src2sc.empty() )
|
||||
{
|
||||
convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
|
||||
}
|
||||
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
|
||||
|
||||
if( !haveMask )
|
||||
k.args(src1arg, dstarg, scalararg);
|
||||
else
|
||||
k.args(src1arg, maskarg, dstarg, scalararg);
|
||||
}
|
||||
else
|
||||
{
|
||||
src2 = _src2.getUMat();
|
||||
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
|
||||
|
||||
if( !haveMask )
|
||||
{
|
||||
if(n == 0)
|
||||
k.args(src1arg, src2arg, dstarg);
|
||||
else if(n == 1)
|
||||
k.args(src1arg, src2arg, dstarg,
|
||||
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
|
||||
else if(n == 3)
|
||||
k.args(src1arg, src2arg, dstarg,
|
||||
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz),
|
||||
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
|
||||
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
|
||||
}
|
||||
else
|
||||
{
|
||||
k.args(src1arg, src2arg, maskarg, dstarg);
|
||||
}
|
||||
}
|
||||
|
||||
size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows };
|
||||
return k.run(2, globalsize, 0, false);
|
||||
}
|
||||
|
||||
|
||||
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false,
|
||||
void* usrdata=0, int oclop=-1 )
|
||||
{
|
||||
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
|
||||
int kind1 = psrc1->kind(), kind2 = psrc2->kind();
|
||||
bool haveMask = !_mask.empty();
|
||||
bool reallocate = false;
|
||||
int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
|
||||
int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
|
||||
int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
|
||||
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
|
||||
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
|
||||
bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
|
||||
ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
|
||||
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
|
||||
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
|
||||
|
||||
bool src1Scalar = checkScalar(src1, src2.type(), kind1, kind2);
|
||||
bool src2Scalar = checkScalar(src2, src1.type(), kind2, kind1);
|
||||
|
||||
if( (kind1 == kind2 || src1.channels() == 1) && src1.dims <= 2 && src2.dims <= 2 &&
|
||||
src1.size() == src2.size() && src1.type() == src2.type() &&
|
||||
!haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == src1.depth())) ||
|
||||
(_dst.fixedType() && _dst.type() == _src1.type())) &&
|
||||
if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
|
||||
!haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
|
||||
(_dst.fixedType() && _dst.type() == type1)) &&
|
||||
((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
|
||||
{
|
||||
_dst.create(src1.size(), src1.type());
|
||||
Mat dst = _dst.getMat();
|
||||
_dst.createSameSize(*psrc1, type1);
|
||||
if( use_opencl &&
|
||||
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
|
||||
(!usrdata ? type1 : std::max(depth1, CV_32F)),
|
||||
usrdata, oclop, false))
|
||||
return;
|
||||
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
|
||||
Size sz = getContinuousSize(src1, src2, dst, src1.channels());
|
||||
tab[src1.depth()](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata);
|
||||
tab[depth1](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata);
|
||||
return;
|
||||
}
|
||||
|
||||
bool haveScalar = false, swapped12 = false;
|
||||
int depth2 = src2.depth();
|
||||
if( src1.size != src2.size || src1.channels() != src2.channels() ||
|
||||
|
||||
if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
|
||||
((kind1 == _InputArray::MATX || kind2 == _InputArray::MATX) &&
|
||||
src1.cols == 1 && src2.rows == 4) )
|
||||
(sz1 == Size(1,4) || sz2 == Size(1,4))) )
|
||||
{
|
||||
if( checkScalar(src1, src2.type(), kind1, kind2) )
|
||||
if( checkScalar(*psrc1, type2, kind1, kind2) )
|
||||
{
|
||||
// src1 is a scalar; swap it with src2
|
||||
swap(src1, src2);
|
||||
swap(psrc1, psrc2);
|
||||
swap(sz1, sz2);
|
||||
swap(type1, type2);
|
||||
swap(depth1, depth2);
|
||||
swap(cn, cn2);
|
||||
swap(dims1, dims2);
|
||||
swapped12 = true;
|
||||
if( oclop == OCL_OP_SUB )
|
||||
oclop = OCL_OP_RSUB;
|
||||
}
|
||||
else if( !checkScalar(src2, src1.type(), kind2, kind1) )
|
||||
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
|
||||
CV_Error( CV_StsUnmatchedSizes,
|
||||
"The operation is neither 'array op array' (where arrays have the same size and the same number of channels), "
|
||||
"The operation is neither 'array op array' "
|
||||
"(where arrays have the same size and the same number of channels), "
|
||||
"nor 'array op scalar', nor 'scalar op array'" );
|
||||
haveScalar = true;
|
||||
CV_Assert(src2.type() == CV_64F && (src2.rows == 4 || src2.rows == 1));
|
||||
CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
|
||||
|
||||
if (!muldiv)
|
||||
{
|
||||
depth2 = actualScalarDepth(src2.ptr<double>(), src1.channels());
|
||||
if( depth2 == CV_64F && (src1.depth() < CV_32S || src1.depth() == CV_32F) )
|
||||
Mat sc = psrc2->getMat();
|
||||
depth2 = actualScalarDepth(sc.ptr<double>(), cn);
|
||||
if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
|
||||
depth2 = CV_32F;
|
||||
}
|
||||
else
|
||||
depth2 = CV_64F;
|
||||
}
|
||||
|
||||
int cn = src1.channels(), depth1 = src1.depth(), wtype;
|
||||
BinaryFunc cvtsrc1 = 0, cvtsrc2 = 0, cvtdst = 0;
|
||||
|
||||
if( dtype < 0 )
|
||||
{
|
||||
if( _dst.fixedType() )
|
||||
dtype = _dst.type();
|
||||
else
|
||||
{
|
||||
if( !haveScalar && src1.type() != src2.type() )
|
||||
if( !haveScalar && type1 != type2 )
|
||||
CV_Error(CV_StsBadArg,
|
||||
"When the input arrays in add/subtract/multiply/divide functions have different types, "
|
||||
"the output array type must be explicitly specified");
|
||||
dtype = src1.type();
|
||||
dtype = type1;
|
||||
}
|
||||
}
|
||||
dtype = CV_MAT_DEPTH(dtype);
|
||||
@ -1262,39 +1505,41 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
wtype = std::max(wtype, dtype);
|
||||
}
|
||||
|
||||
cvtsrc1 = depth1 == wtype ? 0 : getConvertFunc(depth1, wtype);
|
||||
cvtsrc2 = depth2 == depth1 ? cvtsrc1 : depth2 == wtype ? 0 : getConvertFunc(depth2, wtype);
|
||||
cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
|
||||
|
||||
dtype = CV_MAKETYPE(dtype, cn);
|
||||
wtype = CV_MAKETYPE(wtype, cn);
|
||||
|
||||
size_t esz1 = src1.elemSize(), esz2 = src2.elemSize();
|
||||
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
|
||||
size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
|
||||
BinaryFunc copymask = 0;
|
||||
Mat mask;
|
||||
|
||||
if( haveMask )
|
||||
{
|
||||
mask = _mask.getMat();
|
||||
CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
|
||||
CV_Assert( mask.size == src1.size );
|
||||
copymask = getCopyMaskFunc(dsz);
|
||||
Mat tdst = _dst.getMat();
|
||||
reallocate = tdst.size != src1.size || tdst.type() != dtype;
|
||||
int mtype = _mask.type();
|
||||
CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
|
||||
reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
|
||||
}
|
||||
|
||||
_dst.createSameSize(*psrc1, dtype);
|
||||
if( reallocate )
|
||||
_dst.setTo(0.);
|
||||
|
||||
if( use_opencl &&
|
||||
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
|
||||
usrdata, oclop, haveScalar))
|
||||
return;
|
||||
|
||||
BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
|
||||
BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
|
||||
BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
|
||||
|
||||
size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
|
||||
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
|
||||
size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
|
||||
BinaryFunc copymask = getCopyMaskFunc(dsz);
|
||||
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
|
||||
|
||||
AutoBuffer<uchar> _buf;
|
||||
uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
|
||||
size_t bufesz = (cvtsrc1 ? wsz : 0) + (cvtsrc2 || haveScalar ? wsz : 0) + (cvtdst ? wsz : 0) + (haveMask ? dsz : 0);
|
||||
|
||||
_dst.create(src1.dims, src1.size, dtype);
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
if( haveMask && reallocate )
|
||||
dst = Scalar::all(0);
|
||||
|
||||
size_t bufesz = (cvtsrc1 ? wsz : 0) +
|
||||
(cvtsrc2 || haveScalar ? wsz : 0) +
|
||||
(cvtdst ? wsz : 0) +
|
||||
(haveMask ? dsz : 0);
|
||||
BinaryFunc func = tab[CV_MAT_DEPTH(wtype)];
|
||||
|
||||
if( !haveScalar )
|
||||
@ -1476,7 +1721,7 @@ static BinaryFunc* getAbsDiffTab()
|
||||
void cv::add( InputArray src1, InputArray src2, OutputArray dst,
|
||||
InputArray mask, int dtype )
|
||||
{
|
||||
arithm_op(src1, src2, dst, mask, dtype, getAddTab() );
|
||||
arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
|
||||
}
|
||||
|
||||
void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
|
||||
@ -1511,12 +1756,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
arithm_op(src1, src2, dst, mask, dtype, getSubTab() );
|
||||
arithm_op(src1, src2, dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB );
|
||||
}
|
||||
|
||||
void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
|
||||
{
|
||||
arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab());
|
||||
arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
|
||||
}
|
||||
|
||||
/****************************************************************************************\
|
||||
@ -1847,19 +2092,20 @@ static BinaryFunc* getRecipTab()
|
||||
void cv::multiply(InputArray src1, InputArray src2,
|
||||
OutputArray dst, double scale, int dtype)
|
||||
{
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), true, &scale);
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
|
||||
true, &scale, scale == 1. ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
|
||||
}
|
||||
|
||||
void cv::divide(InputArray src1, InputArray src2,
|
||||
OutputArray dst, double scale, int dtype)
|
||||
{
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale);
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
|
||||
}
|
||||
|
||||
void cv::divide(double scale, InputArray src2,
|
||||
OutputArray dst, int dtype)
|
||||
{
|
||||
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale);
|
||||
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
|
||||
}
|
||||
|
||||
/****************************************************************************************\
|
||||
@ -2020,7 +2266,7 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
|
||||
double beta, double gamma, OutputArray dst, int dtype )
|
||||
{
|
||||
double scalars[] = {alpha, beta, gamma};
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars);
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
|
||||
}
|
||||
|
||||
|
||||
|
@ -220,6 +220,21 @@ void Mat::copyTo( OutputArray _dst ) const
|
||||
return;
|
||||
}
|
||||
|
||||
if( _dst.isUMat() )
|
||||
{
|
||||
_dst.create( dims, size.p, type() );
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
|
||||
for( i = 0; i < (size_t)dims; i++ )
|
||||
sz[i] = size.p[i];
|
||||
sz[dims-1] *= esz;
|
||||
dst.ndoffset(dstofs);
|
||||
dstofs[dims-1] *= esz;
|
||||
dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p);
|
||||
return;
|
||||
}
|
||||
|
||||
if( dims <= 2 )
|
||||
{
|
||||
_dst.create( rows, cols, type() );
|
||||
|
@ -1436,6 +1436,181 @@ Size _InputArray::size(int i) const
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int _InputArray::sizend(int* arrsz, int i) const
|
||||
{
|
||||
int j, d=0, k = kind();
|
||||
|
||||
if( k == NONE )
|
||||
;
|
||||
else if( k == MAT )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
const Mat& m = *(const Mat*)obj;
|
||||
d = m.dims;
|
||||
if(arrsz)
|
||||
for(j = 0; j < d; j++)
|
||||
arrsz[j] = m.size.p[j];
|
||||
}
|
||||
else if( k == UMAT )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
const UMat& m = *(const UMat*)obj;
|
||||
d = m.dims;
|
||||
if(arrsz)
|
||||
for(j = 0; j < d; j++)
|
||||
arrsz[j] = m.size.p[j];
|
||||
}
|
||||
else if( k == STD_VECTOR_MAT && i >= 0 )
|
||||
{
|
||||
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
|
||||
CV_Assert( i < (int)vv.size() );
|
||||
const Mat& m = vv[i];
|
||||
d = m.dims;
|
||||
if(arrsz)
|
||||
for(j = 0; j < d; j++)
|
||||
arrsz[j] = m.size.p[j];
|
||||
}
|
||||
else if( k == STD_VECTOR_UMAT && i >= 0 )
|
||||
{
|
||||
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
|
||||
CV_Assert( i < (int)vv.size() );
|
||||
const UMat& m = vv[i];
|
||||
d = m.dims;
|
||||
if(arrsz)
|
||||
for(j = 0; j < d; j++)
|
||||
arrsz[j] = m.size.p[j];
|
||||
}
|
||||
else
|
||||
{
|
||||
Size sz2d = size(i);
|
||||
d = 2;
|
||||
if(arrsz)
|
||||
{
|
||||
arrsz[0] = sz2d.height;
|
||||
arrsz[1] = sz2d.width;
|
||||
}
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
bool _InputArray::sameSize(const _InputArray& arr) const
|
||||
{
|
||||
int k1 = kind(), k2 = arr.kind();
|
||||
Size sz1;
|
||||
|
||||
if( k1 == MAT )
|
||||
{
|
||||
const Mat* m = ((const Mat*)obj);
|
||||
if( k2 == MAT )
|
||||
return m->size == ((const Mat*)arr.obj)->size;
|
||||
if( k2 == UMAT )
|
||||
return m->size == ((const UMat*)arr.obj)->size;
|
||||
if( m->dims > 2 )
|
||||
return false;
|
||||
sz1 = m->size();
|
||||
}
|
||||
else if( k1 == UMAT )
|
||||
{
|
||||
const UMat* m = ((const UMat*)obj);
|
||||
if( k2 == MAT )
|
||||
return m->size == ((const Mat*)arr.obj)->size;
|
||||
if( k2 == UMAT )
|
||||
return m->size == ((const UMat*)arr.obj)->size;
|
||||
if( m->dims > 2 )
|
||||
return false;
|
||||
sz1 = m->size();
|
||||
}
|
||||
else
|
||||
sz1 = size();
|
||||
if( arr.dims() > 2 )
|
||||
return false;
|
||||
return sz1 == arr.size();
|
||||
}
|
||||
|
||||
int _InputArray::dims(int i) const
|
||||
{
|
||||
int k = kind();
|
||||
|
||||
if( k == MAT )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return ((const Mat*)obj)->dims;
|
||||
}
|
||||
|
||||
if( k == EXPR )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return ((const MatExpr*)obj)->a.dims;
|
||||
}
|
||||
|
||||
if( k == UMAT )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return ((const UMat*)obj)->dims;
|
||||
}
|
||||
|
||||
if( k == MATX )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == NONE )
|
||||
return 0;
|
||||
|
||||
if( k == STD_VECTOR_VECTOR )
|
||||
{
|
||||
const std::vector<std::vector<uchar> >& vv = *(const std::vector<std::vector<uchar> >*)obj;
|
||||
if( i < 0 )
|
||||
return 1;
|
||||
CV_Assert( i < (int)vv.size() );
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
{
|
||||
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
|
||||
if( i < 0 )
|
||||
return 1;
|
||||
CV_Assert( i < (int)vv.size() );
|
||||
|
||||
return vv[i].dims;
|
||||
}
|
||||
|
||||
if( k == OPENGL_BUFFER )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == GPU_MAT )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == OCL_MAT )
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
CV_Assert( k == CUDA_MEM );
|
||||
//if( k == CUDA_MEM )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
size_t _InputArray::total(int i) const
|
||||
{
|
||||
int k = kind();
|
||||
@ -1570,6 +1745,61 @@ bool _InputArray::empty() const
|
||||
return ((const cuda::CudaMem*)obj)->empty();
|
||||
}
|
||||
|
||||
bool _InputArray::isContinuous(int i) const
|
||||
{
|
||||
int k = kind();
|
||||
|
||||
if( k == MAT )
|
||||
return i < 0 ? ((const Mat*)obj)->isContinuous() : true;
|
||||
|
||||
if( k == UMAT )
|
||||
return i < 0 ? ((const UMat*)obj)->isContinuous() : true;
|
||||
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
|
||||
return true;
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
{
|
||||
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
|
||||
CV_Assert((size_t)i < vv.size());
|
||||
return vv[i].isContinuous();
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_UMAT )
|
||||
{
|
||||
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
|
||||
CV_Assert((size_t)i < vv.size());
|
||||
return vv[i].isContinuous();
|
||||
}
|
||||
|
||||
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
|
||||
return false;
|
||||
}
|
||||
|
||||
void _InputArray::copyTo(const _OutputArray& arr) const
|
||||
{
|
||||
int k = kind();
|
||||
|
||||
if( k == NONE )
|
||||
arr.release();
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR )
|
||||
{
|
||||
Mat m = getMat();
|
||||
m.copyTo(arr);
|
||||
}
|
||||
else if( k == EXPR )
|
||||
{
|
||||
const MatExpr& e = *((MatExpr*)obj);
|
||||
if( arr.kind() == MAT )
|
||||
arr.getMatRef() = e;
|
||||
else
|
||||
Mat(e).copyTo(arr);
|
||||
}
|
||||
else if( k == UMAT )
|
||||
((UMat*)obj)->copyTo(arr);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
|
||||
bool _OutputArray::fixedSize() const
|
||||
{
|
||||
@ -1665,7 +1895,7 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
|
||||
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
||||
}
|
||||
|
||||
void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
void _OutputArray::create(int d, const int* sizes, int mtype, int i,
|
||||
bool allowTransposed, int fixedDepthMask) const
|
||||
{
|
||||
int k = kind();
|
||||
@ -1683,7 +1913,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
m.release();
|
||||
}
|
||||
|
||||
if( dims == 2 && m.dims == 2 && m.data &&
|
||||
if( d == 2 && m.dims == 2 && m.data &&
|
||||
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
|
||||
return;
|
||||
}
|
||||
@ -1697,11 +1927,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
}
|
||||
if(fixedSize())
|
||||
{
|
||||
CV_Assert(m.dims == dims);
|
||||
for(int j = 0; j < dims; ++j)
|
||||
CV_Assert(m.dims == d);
|
||||
for(int j = 0; j < d; ++j)
|
||||
CV_Assert(m.size[j] == sizes[j]);
|
||||
}
|
||||
m.create(dims, sizes, mtype);
|
||||
m.create(d, sizes, mtype);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1717,7 +1947,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
m.release();
|
||||
}
|
||||
|
||||
if( dims == 2 && m.dims == 2 && !m.empty() &&
|
||||
if( d == 2 && m.dims == 2 && !m.empty() &&
|
||||
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
|
||||
return;
|
||||
}
|
||||
@ -1731,11 +1961,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
}
|
||||
if(fixedSize())
|
||||
{
|
||||
CV_Assert(m.dims == dims);
|
||||
for(int j = 0; j < dims; ++j)
|
||||
CV_Assert(m.dims == d);
|
||||
for(int j = 0; j < d; ++j)
|
||||
CV_Assert(m.size[j] == sizes[j]);
|
||||
}
|
||||
m.create(dims, sizes, mtype);
|
||||
m.create(d, sizes, mtype);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1744,14 +1974,14 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
CV_Assert( i < 0 );
|
||||
int type0 = CV_MAT_TYPE(flags);
|
||||
CV_Assert( mtype == type0 || (CV_MAT_CN(mtype) == 1 && ((1 << type0) & fixedDepthMask) != 0) );
|
||||
CV_Assert( dims == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) ||
|
||||
CV_Assert( d == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) ||
|
||||
(allowTransposed && sizes[0] == sz.width && sizes[1] == sz.height)));
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR || k == STD_VECTOR_VECTOR )
|
||||
{
|
||||
CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
|
||||
CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
|
||||
size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0;
|
||||
std::vector<uchar>* v = (std::vector<uchar>*)obj;
|
||||
|
||||
@ -1843,7 +2073,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
|
||||
if( i < 0 )
|
||||
{
|
||||
CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
|
||||
CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
|
||||
size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0, len0 = v.size();
|
||||
|
||||
CV_Assert(!fixedSize() || len == len0);
|
||||
@ -1873,7 +2103,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
m.release();
|
||||
}
|
||||
|
||||
if( dims == 2 && m.dims == 2 && m.data &&
|
||||
if( d == 2 && m.dims == 2 && m.data &&
|
||||
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
|
||||
return;
|
||||
}
|
||||
@ -1887,18 +2117,24 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
|
||||
}
|
||||
if(fixedSize())
|
||||
{
|
||||
CV_Assert(m.dims == dims);
|
||||
for(int j = 0; j < dims; ++j)
|
||||
CV_Assert(m.dims == d);
|
||||
for(int j = 0; j < d; ++j)
|
||||
CV_Assert(m.size[j] == sizes[j]);
|
||||
}
|
||||
|
||||
m.create(dims, sizes, mtype);
|
||||
m.create(d, sizes, mtype);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
|
||||
}
|
||||
|
||||
void _OutputArray::createSameSize(const _InputArray& arr, int mtype) const
|
||||
{
|
||||
int arrsz[CV_MAX_DIM], d = arr.sizend(arrsz);
|
||||
create(d, arrsz, mtype);
|
||||
}
|
||||
|
||||
void _OutputArray::release() const
|
||||
{
|
||||
CV_Assert(!fixedSize());
|
||||
@ -2010,6 +2246,23 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const
|
||||
return *(cuda::CudaMem*)obj;
|
||||
}
|
||||
|
||||
void _OutputArray::setTo(const _InputArray& arr) const
|
||||
{
|
||||
int k = kind();
|
||||
|
||||
if( k == NONE )
|
||||
;
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR )
|
||||
{
|
||||
Mat m = getMat();
|
||||
m.setTo(arr);
|
||||
}
|
||||
else if( k == UMAT )
|
||||
((UMat*)obj)->setTo(arr);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
|
||||
static _InputOutputArray _none;
|
||||
InputOutputArray noArray() { return _none; }
|
||||
|
||||
|
@ -114,8 +114,13 @@ typedef struct _cl_sampler * cl_sampler;
|
||||
|
||||
typedef int cl_int;
|
||||
typedef unsigned cl_uint;
|
||||
typedef long cl_long;
|
||||
typedef unsigned long cl_ulong;
|
||||
#if defined (_WIN32) && defined(_MSC_VER)
|
||||
typedef __int64 cl_long;
|
||||
typedef unsigned __int64 cl_ulong;
|
||||
#else
|
||||
typedef long cl_long;
|
||||
typedef unsigned long cl_ulong;
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
|
||||
typedef cl_ulong cl_bitfield;
|
||||
@ -592,9 +597,16 @@ static void* initOpenCLAndLoad(const char* funcname)
|
||||
{
|
||||
if(!initialized)
|
||||
{
|
||||
handle = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY);
|
||||
const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
|
||||
oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
|
||||
"/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
|
||||
handle = dlopen(oclpath, RTLD_LAZY);
|
||||
initialized = true;
|
||||
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
|
||||
if( g_haveOpenCL )
|
||||
fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath);
|
||||
else
|
||||
fprintf(stderr, "Failed to load OpenCL runtime\n");
|
||||
}
|
||||
if(!handle)
|
||||
return 0;
|
||||
@ -1212,16 +1224,13 @@ namespace cv { namespace ocl {
|
||||
|
||||
struct UMat2D
|
||||
{
|
||||
UMat2D(const UMat& m, int accessFlags)
|
||||
UMat2D(const UMat& m)
|
||||
{
|
||||
CV_Assert(m.dims == 2);
|
||||
data = (cl_mem)m.handle(accessFlags);
|
||||
offset = m.offset;
|
||||
step = m.step;
|
||||
rows = m.rows;
|
||||
cols = m.cols;
|
||||
}
|
||||
cl_mem data;
|
||||
size_t offset;
|
||||
size_t step;
|
||||
int rows;
|
||||
@ -1230,10 +1239,8 @@ struct UMat2D
|
||||
|
||||
struct UMat3D
|
||||
{
|
||||
UMat3D(const UMat& m, int accessFlags)
|
||||
UMat3D(const UMat& m)
|
||||
{
|
||||
CV_Assert(m.dims == 3);
|
||||
data = (cl_mem)m.handle(accessFlags);
|
||||
offset = m.offset;
|
||||
step = m.step.p[1];
|
||||
slicestep = m.step.p[0];
|
||||
@ -1241,7 +1248,6 @@ struct UMat3D
|
||||
rows = m.size.p[1];
|
||||
cols = m.size.p[2];
|
||||
}
|
||||
cl_mem data;
|
||||
size_t offset;
|
||||
size_t slicestep;
|
||||
size_t step;
|
||||
@ -1315,7 +1321,7 @@ void setUseOpenCL(bool flag)
|
||||
}
|
||||
}
|
||||
|
||||
void finish()
|
||||
void finish2()
|
||||
{
|
||||
Queue::getDefault().finish();
|
||||
}
|
||||
@ -1528,7 +1534,7 @@ String Device::OpenCLVersion() const
|
||||
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
|
||||
|
||||
String Device::driverVersion() const
|
||||
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
|
||||
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
|
||||
|
||||
int Device::type() const
|
||||
{ return p ? p->getProp<cl_device_type, int>(CL_DEVICE_TYPE) : 0; }
|
||||
@ -1705,14 +1711,14 @@ size_t Device::profilingTimerResolution() const
|
||||
|
||||
const Device& Device::getDefault()
|
||||
{
|
||||
const Context& ctx = Context::getDefault();
|
||||
const Context2& ctx = Context2::getDefault();
|
||||
int idx = TLSData::get()->device;
|
||||
return ctx.device(idx);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct Context::Impl
|
||||
struct Context2::Impl
|
||||
{
|
||||
Impl(int dtype0)
|
||||
{
|
||||
@ -1777,7 +1783,7 @@ struct Context::Impl
|
||||
devices.clear();
|
||||
}
|
||||
|
||||
Program getProg(const ProgramSource& src,
|
||||
Program getProg(const ProgramSource2& src,
|
||||
const String& buildflags, String& errmsg)
|
||||
{
|
||||
String prefix = Program::getPrefix(buildflags);
|
||||
@ -1787,7 +1793,8 @@ struct Context::Impl
|
||||
return it->second;
|
||||
//String filename = format("%08x%08x_%08x%08x.clb2",
|
||||
Program prog(src, buildflags, errmsg);
|
||||
phash.insert(std::pair<HashKey,Program>(k, prog));
|
||||
if(prog.ptr())
|
||||
phash.insert(std::pair<HashKey,Program>(k, prog));
|
||||
return prog;
|
||||
}
|
||||
|
||||
@ -1797,7 +1804,7 @@ struct Context::Impl
|
||||
std::vector<Device> devices;
|
||||
bool initialized;
|
||||
|
||||
typedef ProgramSource::hash_t hash_t;
|
||||
typedef ProgramSource2::hash_t hash_t;
|
||||
|
||||
struct HashKey
|
||||
{
|
||||
@ -1812,18 +1819,18 @@ struct Context::Impl
|
||||
};
|
||||
|
||||
|
||||
Context::Context()
|
||||
Context2::Context2()
|
||||
{
|
||||
p = 0;
|
||||
}
|
||||
|
||||
Context::Context(int dtype)
|
||||
Context2::Context2(int dtype)
|
||||
{
|
||||
p = 0;
|
||||
create(dtype);
|
||||
}
|
||||
|
||||
bool Context::create(int dtype0)
|
||||
bool Context2::create(int dtype0)
|
||||
{
|
||||
if( !haveOpenCL() )
|
||||
return false;
|
||||
@ -1838,19 +1845,19 @@ bool Context::create(int dtype0)
|
||||
return p != 0;
|
||||
}
|
||||
|
||||
Context::~Context()
|
||||
Context2::~Context2()
|
||||
{
|
||||
p->release();
|
||||
}
|
||||
|
||||
Context::Context(const Context& c)
|
||||
Context2::Context2(const Context2& c)
|
||||
{
|
||||
p = (Impl*)c.p;
|
||||
if(p)
|
||||
p->addref();
|
||||
}
|
||||
|
||||
Context& Context::operator = (const Context& c)
|
||||
Context2& Context2::operator = (const Context2& c)
|
||||
{
|
||||
Impl* newp = (Impl*)c.p;
|
||||
if(newp)
|
||||
@ -1861,30 +1868,30 @@ Context& Context::operator = (const Context& c)
|
||||
return *this;
|
||||
}
|
||||
|
||||
void* Context::ptr() const
|
||||
void* Context2::ptr() const
|
||||
{
|
||||
return p->handle;
|
||||
}
|
||||
|
||||
size_t Context::ndevices() const
|
||||
size_t Context2::ndevices() const
|
||||
{
|
||||
return p ? p->devices.size() : 0;
|
||||
}
|
||||
|
||||
const Device& Context::device(size_t idx) const
|
||||
const Device& Context2::device(size_t idx) const
|
||||
{
|
||||
static Device dummy;
|
||||
return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
|
||||
}
|
||||
|
||||
Context& Context::getDefault()
|
||||
Context2& Context2::getDefault()
|
||||
{
|
||||
static Context ctx;
|
||||
static Context2 ctx;
|
||||
if( !ctx.p && haveOpenCL() )
|
||||
{
|
||||
// do not create new Context right away.
|
||||
// do not create new Context2 right away.
|
||||
// First, try to retrieve existing context of the same type.
|
||||
// In its turn, Platform::getContext() may call Context::create()
|
||||
// In its turn, Platform::getContext() may call Context2::create()
|
||||
// if there is no such context.
|
||||
ctx.create(Device::TYPE_ACCELERATOR);
|
||||
if(!ctx.p)
|
||||
@ -1898,7 +1905,7 @@ Context& Context::getDefault()
|
||||
return ctx;
|
||||
}
|
||||
|
||||
Program Context::getProg(const ProgramSource& prog,
|
||||
Program Context2::getProg(const ProgramSource2& prog,
|
||||
const String& buildopts, String& errmsg)
|
||||
{
|
||||
return p ? p->getProg(prog, buildopts, errmsg) : Program();
|
||||
@ -1906,14 +1913,14 @@ Program Context::getProg(const ProgramSource& prog,
|
||||
|
||||
struct Queue::Impl
|
||||
{
|
||||
Impl(const Context& c, const Device& d)
|
||||
Impl(const Context2& c, const Device& d)
|
||||
{
|
||||
refcount = 1;
|
||||
const Context* pc = &c;
|
||||
const Context2* pc = &c;
|
||||
cl_context ch = (cl_context)pc->ptr();
|
||||
if( !ch )
|
||||
{
|
||||
pc = &Context::getDefault();
|
||||
pc = &Context2::getDefault();
|
||||
ch = (cl_context)pc->ptr();
|
||||
}
|
||||
cl_device_id dh = (cl_device_id)d.ptr();
|
||||
@ -1943,7 +1950,7 @@ Queue::Queue()
|
||||
p = 0;
|
||||
}
|
||||
|
||||
Queue::Queue(const Context& c, const Device& d)
|
||||
Queue::Queue(const Context2& c, const Device& d)
|
||||
{
|
||||
p = 0;
|
||||
create(c, d);
|
||||
@ -1973,7 +1980,7 @@ Queue::~Queue()
|
||||
p->release();
|
||||
}
|
||||
|
||||
bool Queue::create(const Context& c, const Device& d)
|
||||
bool Queue::create(const Context2& c, const Device& d)
|
||||
{
|
||||
if(p)
|
||||
p->release();
|
||||
@ -1996,7 +2003,7 @@ Queue& Queue::getDefault()
|
||||
{
|
||||
Queue& q = TLSData::get()->oclQueue;
|
||||
if( !q.p )
|
||||
q.create(Context::getDefault());
|
||||
q.create(Context2::getDefault());
|
||||
return q;
|
||||
}
|
||||
|
||||
@ -2008,15 +2015,20 @@ static cl_command_queue getQueue(const Queue& q)
|
||||
return qq;
|
||||
}
|
||||
|
||||
KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz)
|
||||
: flags(_flags), m(_m), obj(_obj), sz(_sz)
|
||||
KernelArg::KernelArg()
|
||||
: flags(0), m(0), obj(0), sz(0), wscale(1)
|
||||
{
|
||||
}
|
||||
|
||||
KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, const void* _obj, size_t _sz)
|
||||
: flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale)
|
||||
{
|
||||
}
|
||||
|
||||
KernelArg KernelArg::Constant(const Mat& m)
|
||||
{
|
||||
CV_Assert(m.isContinuous());
|
||||
return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize());
|
||||
return KernelArg(CONSTANT, 0, 1, m.data, m.total()*m.elemSize());
|
||||
}
|
||||
|
||||
|
||||
@ -2099,8 +2111,8 @@ Kernel::Kernel(const char* kname, const Program& prog)
|
||||
create(kname, prog);
|
||||
}
|
||||
|
||||
Kernel::Kernel(const char* kname, const ProgramSource& src,
|
||||
const String& buildopts, String& errmsg)
|
||||
Kernel::Kernel(const char* kname, const ProgramSource2& src,
|
||||
const String& buildopts, String* errmsg)
|
||||
{
|
||||
p = 0;
|
||||
create(kname, src, buildopts, errmsg);
|
||||
@ -2143,15 +2155,17 @@ bool Kernel::create(const char* kname, const Program& prog)
|
||||
return p != 0;
|
||||
}
|
||||
|
||||
bool Kernel::create(const char* kname, const ProgramSource& src,
|
||||
const String& buildopts, String& errmsg)
|
||||
bool Kernel::create(const char* kname, const ProgramSource2& src,
|
||||
const String& buildopts, String* errmsg)
|
||||
{
|
||||
if(p)
|
||||
{
|
||||
p->release();
|
||||
p = 0;
|
||||
}
|
||||
const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg);
|
||||
String tempmsg;
|
||||
if( !errmsg ) errmsg = &tempmsg;
|
||||
const Program& prog = Context2::getDefault().getProg(src, buildopts, *errmsg);
|
||||
return create(kname, prog);
|
||||
}
|
||||
|
||||
@ -2160,55 +2174,91 @@ void* Kernel::ptr() const
|
||||
return p ? p->handle : 0;
|
||||
}
|
||||
|
||||
void Kernel::set(int i, const void* value, size_t sz)
|
||||
bool Kernel::empty() const
|
||||
{
|
||||
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
|
||||
if( i == 0 )
|
||||
p->cleanupUMats();
|
||||
return ptr() == 0;
|
||||
}
|
||||
|
||||
void Kernel::set(int i, const UMat& m)
|
||||
int Kernel::set(int i, const void* value, size_t sz)
|
||||
{
|
||||
set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
|
||||
}
|
||||
|
||||
void Kernel::set(int i, const KernelArg& arg)
|
||||
{
|
||||
CV_Assert( p && p->handle );
|
||||
CV_Assert(i >= 0);
|
||||
if( i == 0 )
|
||||
p->cleanupUMats();
|
||||
if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 )
|
||||
return -1;
|
||||
return i+1;
|
||||
}
|
||||
|
||||
int Kernel::set(int i, const UMat& m)
|
||||
{
|
||||
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
|
||||
}
|
||||
|
||||
int Kernel::set(int i, const KernelArg& arg)
|
||||
{
|
||||
CV_Assert( i >= 0 );
|
||||
if( i == 0 )
|
||||
p->cleanupUMats();
|
||||
if( !p || !p->handle )
|
||||
return -1;
|
||||
if( arg.m )
|
||||
{
|
||||
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
|
||||
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
|
||||
cl_mem h = (cl_mem)arg.m->handle(accessFlags);
|
||||
|
||||
if( arg.m->dims <= 2 )
|
||||
{
|
||||
UMat2D u2d(*arg.m, accessFlags);
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
|
||||
UMat2D u2d(*arg.m);
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
|
||||
i += 3;
|
||||
|
||||
if( !(arg.flags & KernelArg::NO_SIZE) )
|
||||
{
|
||||
int cols = u2d.cols*arg.wscale;
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.cols), &cols);
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UMat3D u3d(*arg.m, accessFlags);
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
|
||||
UMat3D u3d(*arg.m);
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
|
||||
i += 4;
|
||||
if( !(arg.flags & KernelArg::NO_SIZE) )
|
||||
{
|
||||
int cols = u3d.cols*arg.wscale;
|
||||
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
|
||||
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
|
||||
i += 3;
|
||||
}
|
||||
}
|
||||
p->addUMat(*arg.m);
|
||||
return i;
|
||||
}
|
||||
else
|
||||
{
|
||||
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
|
||||
}
|
||||
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
|
||||
return i+1;
|
||||
}
|
||||
|
||||
|
||||
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
|
||||
bool Kernel::run(int dims, size_t globalsize[], size_t localsize[],
|
||||
bool sync, const Queue& q)
|
||||
{
|
||||
CV_Assert(p && p->handle && p->e == 0);
|
||||
if(!p || !p->handle || p->e != 0)
|
||||
return false;
|
||||
cl_command_queue qq = getQueue(q);
|
||||
clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
|
||||
offset, globalsize, localsize, 0, 0,
|
||||
sync ? 0 : &p->e);
|
||||
if( sync )
|
||||
size_t offset[CV_MAX_DIM] = {0};
|
||||
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
|
||||
offset, globalsize, localsize, 0, 0,
|
||||
sync ? 0 : &p->e);
|
||||
if( sync || retval < 0 )
|
||||
{
|
||||
clFinish(qq);
|
||||
p->cleanupUMats();
|
||||
@ -2218,14 +2268,17 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz
|
||||
p->addref();
|
||||
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
|
||||
}
|
||||
return retval >= 0;
|
||||
}
|
||||
|
||||
void Kernel::runTask(bool sync, const Queue& q)
|
||||
bool Kernel::runTask(bool sync, const Queue& q)
|
||||
{
|
||||
CV_Assert(p && p->handle && p->e == 0);
|
||||
if(!p || !p->handle || p->e != 0)
|
||||
return false;
|
||||
|
||||
cl_command_queue qq = getQueue(q);
|
||||
clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
|
||||
if( sync )
|
||||
cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
|
||||
if( sync || retval < 0 )
|
||||
{
|
||||
clFinish(qq);
|
||||
p->cleanupUMats();
|
||||
@ -2235,6 +2288,7 @@ void Kernel::runTask(bool sync, const Queue& q)
|
||||
p->addref();
|
||||
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
|
||||
}
|
||||
return retval >= 0;
|
||||
}
|
||||
|
||||
|
||||
@ -2273,11 +2327,11 @@ size_t Kernel::localMemSize() const
|
||||
|
||||
struct Program::Impl
|
||||
{
|
||||
Impl(const ProgramSource& _src,
|
||||
Impl(const ProgramSource2& _src,
|
||||
const String& _buildflags, String& errmsg)
|
||||
{
|
||||
refcount = 1;
|
||||
const Context& ctx = Context::getDefault();
|
||||
const Context2& ctx = Context2::getDefault();
|
||||
src = _src;
|
||||
buildflags = _buildflags;
|
||||
const String& srcstr = src.source();
|
||||
@ -2293,17 +2347,20 @@ struct Program::Impl
|
||||
void** deviceList = deviceListBuf;
|
||||
for( i = 0; i < n; i++ )
|
||||
deviceList[i] = ctx.device(i).ptr();
|
||||
printf("Building the OpenCL program ...\n");
|
||||
retval = clBuildProgram(handle, n,
|
||||
(const cl_device_id*)deviceList,
|
||||
buildflags.c_str(), 0, 0);
|
||||
if( retval == CL_BUILD_PROGRAM_FAILURE )
|
||||
{
|
||||
char buf[1024];
|
||||
char buf[1<<16];
|
||||
size_t retsz = 0;
|
||||
clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG,
|
||||
sizeof(buf)-16, buf, &retsz);
|
||||
errmsg = String(buf);
|
||||
CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
|
||||
}
|
||||
CV_Assert(retval >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2315,7 +2372,7 @@ struct Program::Impl
|
||||
if(_buf.empty())
|
||||
return;
|
||||
String prefix0 = Program::getPrefix(buildflags);
|
||||
const Context& ctx = Context::getDefault();
|
||||
const Context2& ctx = Context2::getDefault();
|
||||
const Device& dev = Device::getDefault();
|
||||
const char* pos0 = _buf.c_str();
|
||||
const char* pos1 = strchr(pos0, '\n');
|
||||
@ -2366,7 +2423,7 @@ struct Program::Impl
|
||||
|
||||
IMPLEMENT_REFCOUNTABLE();
|
||||
|
||||
ProgramSource src;
|
||||
ProgramSource2 src;
|
||||
String buildflags;
|
||||
cl_program handle;
|
||||
};
|
||||
@ -2374,7 +2431,7 @@ struct Program::Impl
|
||||
|
||||
Program::Program() { p = 0; }
|
||||
|
||||
Program::Program(const ProgramSource& src,
|
||||
Program::Program(const ProgramSource2& src,
|
||||
const String& buildflags, String& errmsg)
|
||||
{
|
||||
p = 0;
|
||||
@ -2405,7 +2462,7 @@ Program::~Program()
|
||||
p->release();
|
||||
}
|
||||
|
||||
bool Program::create(const ProgramSource& src,
|
||||
bool Program::create(const ProgramSource2& src,
|
||||
const String& buildflags, String& errmsg)
|
||||
{
|
||||
if(p)
|
||||
@ -2419,9 +2476,9 @@ bool Program::create(const ProgramSource& src,
|
||||
return p != 0;
|
||||
}
|
||||
|
||||
const ProgramSource& Program::source() const
|
||||
const ProgramSource2& Program::source() const
|
||||
{
|
||||
static ProgramSource dummy;
|
||||
static ProgramSource2 dummy;
|
||||
return p ? p->src : dummy;
|
||||
}
|
||||
|
||||
@ -2455,7 +2512,7 @@ String Program::getPrefix() const
|
||||
|
||||
String Program::getPrefix(const String& buildflags)
|
||||
{
|
||||
const Context& ctx = Context::getDefault();
|
||||
const Context2& ctx = Context2::getDefault();
|
||||
const Device& dev = ctx.device(0);
|
||||
return format("name=%s\ndriver=%s\nbuildflags=%s\n",
|
||||
dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
|
||||
@ -2463,7 +2520,7 @@ String Program::getPrefix(const String& buildflags)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct ProgramSource::Impl
|
||||
struct ProgramSource2::Impl
|
||||
{
|
||||
Impl(const char* _src)
|
||||
{
|
||||
@ -2482,39 +2539,39 @@ struct ProgramSource::Impl
|
||||
|
||||
IMPLEMENT_REFCOUNTABLE();
|
||||
String src;
|
||||
ProgramSource::hash_t h;
|
||||
ProgramSource2::hash_t h;
|
||||
};
|
||||
|
||||
|
||||
ProgramSource::ProgramSource()
|
||||
ProgramSource2::ProgramSource2()
|
||||
{
|
||||
p = 0;
|
||||
}
|
||||
|
||||
ProgramSource::ProgramSource(const char* prog)
|
||||
ProgramSource2::ProgramSource2(const char* prog)
|
||||
{
|
||||
p = new Impl(prog);
|
||||
}
|
||||
|
||||
ProgramSource::ProgramSource(const String& prog)
|
||||
ProgramSource2::ProgramSource2(const String& prog)
|
||||
{
|
||||
p = new Impl(prog);
|
||||
}
|
||||
|
||||
ProgramSource::~ProgramSource()
|
||||
ProgramSource2::~ProgramSource2()
|
||||
{
|
||||
if(p)
|
||||
p->release();
|
||||
}
|
||||
|
||||
ProgramSource::ProgramSource(const ProgramSource& prog)
|
||||
ProgramSource2::ProgramSource2(const ProgramSource2& prog)
|
||||
{
|
||||
p = prog.p;
|
||||
if(p)
|
||||
p->addref();
|
||||
}
|
||||
|
||||
ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
|
||||
ProgramSource2& ProgramSource2::operator = (const ProgramSource2& prog)
|
||||
{
|
||||
Impl* newp = (Impl*)prog.p;
|
||||
if(newp)
|
||||
@ -2525,13 +2582,13 @@ ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
|
||||
return *this;
|
||||
}
|
||||
|
||||
const String& ProgramSource::source() const
|
||||
const String& ProgramSource2::source() const
|
||||
{
|
||||
static String dummy;
|
||||
return p ? p->src : dummy;
|
||||
}
|
||||
|
||||
ProgramSource::hash_t ProgramSource::hash() const
|
||||
ProgramSource2::hash_t ProgramSource2::hash() const
|
||||
{
|
||||
return p ? p->h : 0;
|
||||
}
|
||||
@ -2551,7 +2608,7 @@ public:
|
||||
return u;
|
||||
}
|
||||
|
||||
void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const
|
||||
void getBestFlags(const Context2& ctx, int& createFlags, int& flags0) const
|
||||
{
|
||||
const Device& dev = ctx.device(0);
|
||||
createFlags = CL_MEM_READ_WRITE;
|
||||
@ -2574,7 +2631,7 @@ public:
|
||||
total *= sizes[i];
|
||||
}
|
||||
|
||||
Context& ctx = Context::getDefault();
|
||||
Context2& ctx = Context2::getDefault();
|
||||
int createFlags = 0, flags0 = 0;
|
||||
getBestFlags(ctx, createFlags, flags0);
|
||||
|
||||
@ -2603,7 +2660,7 @@ public:
|
||||
if(u->handle == 0)
|
||||
{
|
||||
CV_Assert(u->origdata != 0);
|
||||
Context& ctx = Context::getDefault();
|
||||
Context2& ctx = Context2::getDefault();
|
||||
int createFlags = 0, flags0 = 0;
|
||||
getBestFlags(ctx, createFlags, flags0);
|
||||
|
||||
@ -2848,7 +2905,6 @@ public:
|
||||
new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
|
||||
new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 );
|
||||
}
|
||||
clFinish(q);
|
||||
}
|
||||
|
||||
void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
|
||||
@ -2890,6 +2946,9 @@ public:
|
||||
|
||||
if( iscontinuous )
|
||||
{
|
||||
int crc = 0;
|
||||
for( size_t i = 0; i < total; i++ )
|
||||
crc ^= ((uchar*)srcptr)[i];
|
||||
CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
|
||||
CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 );
|
||||
}
|
||||
@ -2949,10 +3008,11 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
||||
cl_int retval;
|
||||
CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
||||
new_srcofs, new_dstofs, new_sz,
|
||||
new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1],
|
||||
0, 0, 0) >= 0 );
|
||||
0, 0, 0)) >= 0 );
|
||||
}
|
||||
|
||||
dst->markHostCopyObsolete(true);
|
||||
@ -2969,4 +3029,61 @@ MatAllocator* getOpenCLAllocator()
|
||||
return &allocator;
|
||||
}
|
||||
|
||||
const char* typeToStr(int t)
|
||||
{
|
||||
static const char* tab[]=
|
||||
{
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"char", "char2", "char3", "char4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"short", "short2", "short3", "short4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"float", "float2", "float3", "float4",
|
||||
"double", "double2", "double3", "double4",
|
||||
"?", "?", "?", "?"
|
||||
};
|
||||
int cn = CV_MAT_CN(t);
|
||||
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
|
||||
}
|
||||
|
||||
const char* memopTypeToStr(int t)
|
||||
{
|
||||
static const char* tab[]=
|
||||
{
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"long", "long2", "long3", "long4",
|
||||
"?", "?", "?", "?"
|
||||
};
|
||||
int cn = CV_MAT_CN(t);
|
||||
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
|
||||
}
|
||||
|
||||
const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
|
||||
{
|
||||
if( sdepth == ddepth )
|
||||
return "noconvert";
|
||||
const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
|
||||
if( ddepth >= CV_32F ||
|
||||
(ddepth == CV_32S && sdepth < CV_32S) ||
|
||||
(ddepth == CV_16S && sdepth <= CV_8S) ||
|
||||
(ddepth == CV_16U && sdepth == CV_8U))
|
||||
{
|
||||
sprintf(buf, "convert_%s", typestr);
|
||||
}
|
||||
else if( sdepth >= CV_32F )
|
||||
{
|
||||
sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(buf, "convert_%s_sat", typestr);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
}}
|
||||
|
303
modules/core/src/opencl/arithm.cl
Normal file
303
modules/core/src/opencl/arithm.cl
Normal file
@ -0,0 +1,303 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
Usage:
|
||||
after compiling this program user gets a single kernel called KF.
|
||||
the following flags should be passed:
|
||||
1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
|
||||
2) the actual operation performed, one of "-D OP_...", see below the list of operations.
|
||||
2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
|
||||
for some operations, like min/max/and/or/xor it's enough
|
||||
2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
|
||||
-D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
|
||||
*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CV_32S 4
|
||||
#define CV_32F 5
|
||||
|
||||
#define dstelem *(dstT*)(dstptr + dst_index)
|
||||
#define noconvert(x) x
|
||||
|
||||
#ifndef workT
|
||||
|
||||
#define srcT1 dstT
|
||||
#define srcT2 dstT
|
||||
#define workT dstT
|
||||
#define srcelem1 *(dstT*)(srcptr1 + src1_index)
|
||||
#define srcelem2 *(dstT*)(srcptr2 + src2_index)
|
||||
#define convertToDT noconvert
|
||||
|
||||
#else
|
||||
|
||||
#define srcelem1 convertToWT1(*(srcT1*)(srcptr1 + src1_index))
|
||||
#define srcelem2 convertToWT2(*(srcT2*)(srcptr2 + src2_index))
|
||||
|
||||
#endif
|
||||
|
||||
#define EXTRA_PARAMS
|
||||
|
||||
#if defined OP_ADD_SAT
|
||||
#define PROCESS_ELEM dstelem = add_sat(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_ADD
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2)
|
||||
|
||||
#elif defined OP_SUB_SAT
|
||||
#define PROCESS_ELEM dstelem = sub_sat(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_SUB
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2)
|
||||
|
||||
#elif defined OP_RSUB_SAT
|
||||
#define PROCESS_ELEM dstelem = sub_sat(srcelem2, srcelem1)
|
||||
|
||||
#elif defined OP_RSUB
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1)
|
||||
|
||||
#elif defined OP_ABSDIFF
|
||||
#define PROCESS_ELEM dstelem = abs_diff(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_AND
|
||||
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2
|
||||
|
||||
#elif defined OP_OR
|
||||
#define PROCESS_ELEM dstelem = srcelem1 | srcelem2
|
||||
|
||||
#elif defined OP_XOR
|
||||
#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2
|
||||
|
||||
#elif defined OP_NOT
|
||||
#define PROCESS_ELEM dstelem = ~srcelem1
|
||||
|
||||
#elif defined OP_MIN
|
||||
#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_MAX
|
||||
#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_MUL
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2)
|
||||
|
||||
#elif defined OP_MUL_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale)
|
||||
|
||||
#elif defined OP_DIV
|
||||
#define PROCESS_ELEM \
|
||||
workT e2 = srcelem2, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero)
|
||||
|
||||
#elif defined OP_DIV_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM \
|
||||
workT e2 = srcelem2, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero)
|
||||
|
||||
#elif defined OP_RECIP_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM \
|
||||
workT e1 = srcelem1, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e1 != zero ? scale / e1 : zero)
|
||||
|
||||
#elif defined OP_ADDW
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma)
|
||||
|
||||
#elif defined OP_MAG
|
||||
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_PHASE_RADIANS
|
||||
#define PROCESS_ELEM \
|
||||
workT tmp = atan2(srcelem2, srcelem1); \
|
||||
if(tmp < 0) tmp += 6.283185307179586232; \
|
||||
dstelem = tmp
|
||||
|
||||
#elif defined OP_PHASE_DEGREES
|
||||
#define PROCESS_ELEM \
|
||||
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465; \
|
||||
if(tmp < 0) tmp += 360; \
|
||||
dstelem = tmp
|
||||
|
||||
#elif defined OP_EXP
|
||||
#define PROCESS_ELEM dstelem = exp(srcelem1)
|
||||
|
||||
#elif defined OP_SQRT
|
||||
#define PROCESS_ELEM dstelem = sqrt(srcelem1)
|
||||
|
||||
#elif defined OP_LOG
|
||||
#define PROCESS_ELEM dstelem = log(abs(srcelem1))
|
||||
|
||||
#elif defined OP_CMP
|
||||
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
|
||||
|
||||
#elif defined OP_CONVERT
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
|
||||
|
||||
#elif defined OP_CONVERT_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha, workT beta
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
|
||||
|
||||
#else
|
||||
#error "unknown op type"
|
||||
#endif
|
||||
|
||||
#if defined UNARY_OP || defined MASK_UNARY_OP
|
||||
#undef srcelem2
|
||||
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
|
||||
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
|
||||
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT srcelem2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined BINARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
//printf("(x=%d, y=%d). %d, %d, %d\n", x, y, (int)srcelem1, (int)srcelem2, (int)dstelem);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined MASK_BINARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
|
||||
__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined UNARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined MASK_UNARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error "Unknown operation type"
|
||||
|
||||
#endif
|
73
modules/core/src/opencl/copyset.cl
Normal file
73
modules/core/src/opencl/copyset.cl
Normal file
@ -0,0 +1,73 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
@ -205,13 +205,30 @@ enum { BLOCK_SIZE = 1024 };
|
||||
|
||||
inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind)
|
||||
{
|
||||
if( sc.dims > 2 || (sc.cols != 1 && sc.rows != 1) || !sc.isContinuous() )
|
||||
if( sc.dims > 2 || !sc.isContinuous() )
|
||||
return false;
|
||||
Size sz = sc.size();
|
||||
if(sz.width != 1 && sz.height != 1)
|
||||
return false;
|
||||
int cn = CV_MAT_CN(atype);
|
||||
if( akind == _InputArray::MATX && sckind != _InputArray::MATX )
|
||||
return false;
|
||||
return sc.size() == Size(1, 1) || sc.size() == Size(1, cn) || sc.size() == Size(cn, 1) ||
|
||||
(sc.size() == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
|
||||
return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) ||
|
||||
(sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
|
||||
}
|
||||
|
||||
inline bool checkScalar(InputArray sc, int atype, int sckind, int akind)
|
||||
{
|
||||
if( sc.dims() > 2 || !sc.isContinuous() )
|
||||
return false;
|
||||
Size sz = sc.size();
|
||||
if(sz.width != 1 && sz.height != 1)
|
||||
return false;
|
||||
int cn = CV_MAT_CN(atype);
|
||||
if( akind == _InputArray::MATX && sckind != _InputArray::MATX )
|
||||
return false;
|
||||
return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) ||
|
||||
(sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
|
||||
}
|
||||
|
||||
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize );
|
||||
@ -227,7 +244,10 @@ struct TLSData
|
||||
static TLSData* get();
|
||||
};
|
||||
|
||||
namespace ocl { MatAllocator* getOpenCLAllocator(); }
|
||||
namespace ocl
|
||||
{
|
||||
MatAllocator* getOpenCLAllocator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
///////////////////////////////// UMat implementation ///////////////////////////////
|
||||
|
||||
@ -174,8 +175,8 @@ static void updateContinuityFlag(UMat& m)
|
||||
break;
|
||||
}
|
||||
|
||||
uint64 t = (uint64)m.step[0]*m.size[0];
|
||||
if( j <= i && t == (size_t)t )
|
||||
uint64 total = (uint64)m.step[0]*m.size[0];
|
||||
if( j <= i && total == (size_t)total )
|
||||
m.flags |= UMat::CONTINUOUS_FLAG;
|
||||
else
|
||||
m.flags &= ~UMat::CONTINUOUS_FLAG;
|
||||
@ -197,6 +198,7 @@ UMat Mat::getUMat(int accessFlags) const
|
||||
if(!u)
|
||||
return hdr;
|
||||
UMat::getStdAllocator()->allocate(u, accessFlags);
|
||||
hdr.flags = flags;
|
||||
setSize(hdr, dims, size.p, step.p);
|
||||
finalizeHdr(hdr);
|
||||
hdr.u = u;
|
||||
@ -548,7 +550,8 @@ Mat UMat::getMat(int accessFlags) const
|
||||
CV_Assert(u->data != 0);
|
||||
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
|
||||
hdr.u = u;
|
||||
hdr.datastart = hdr.data = u->data;
|
||||
hdr.datastart = u->data;
|
||||
hdr.data = hdr.datastart + offset;
|
||||
hdr.datalimit = hdr.dataend = u->data + u->size;
|
||||
CV_XADD(&hdr.u->refcount, 1);
|
||||
return hdr;
|
||||
@ -617,7 +620,7 @@ void UMat::copyTo(OutputArray _dst) const
|
||||
void* dsthandle = dst.handle(ACCESS_WRITE);
|
||||
if( srchandle == dsthandle && dst.offset == offset )
|
||||
return;
|
||||
ndoffset(dstofs);
|
||||
dst.ndoffset(dstofs);
|
||||
CV_Assert(u->currAllocator == dst.u->currAllocator);
|
||||
u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
|
||||
}
|
||||
@ -633,6 +636,50 @@ void UMat::convertTo(OutputArray, int, double, double) const
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
|
||||
UMat& UMat::setTo(InputArray _value, InputArray _mask)
|
||||
{
|
||||
bool haveMask = !_mask.empty();
|
||||
int tp = type(), cn = CV_MAT_CN(tp);
|
||||
if( dims <= 2 && cn <= 4 && ocl::useOpenCL() )
|
||||
{
|
||||
Mat value = _value.getMat();
|
||||
CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
|
||||
double buf[4];
|
||||
convertAndUnrollScalar(value, tp, (uchar*)buf, 1);
|
||||
|
||||
char opts[1024];
|
||||
sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp));
|
||||
|
||||
ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
|
||||
if( !setK.empty() )
|
||||
{
|
||||
ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp));
|
||||
UMat mask;
|
||||
|
||||
if( haveMask )
|
||||
{
|
||||
mask = _mask.getUMat();
|
||||
CV_Assert( mask.size() == size() && mask.type() == CV_8U );
|
||||
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
|
||||
ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this);
|
||||
setK.args(maskarg, dstarg, scalararg);
|
||||
}
|
||||
else
|
||||
{
|
||||
ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this);
|
||||
setK.args(dstarg, scalararg);
|
||||
}
|
||||
|
||||
size_t globalsize[] = { cols, rows };
|
||||
if( setK.run(2, globalsize, 0, false) )
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
|
||||
m.setTo(_value, _mask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UMat& UMat::operator = (const Scalar&)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
|
@ -91,11 +91,11 @@ bool CV_UMatTest::TestUMat()
|
||||
{
|
||||
try
|
||||
{
|
||||
Mat a(100, 100, CV_16S), b;
|
||||
Mat a(100, 100, CV_16SC2), b, c;
|
||||
randu(a, Scalar::all(-100), Scalar::all(100));
|
||||
Rect roi(1, 3, 10, 20);
|
||||
Mat ra(a, roi), rb;
|
||||
UMat ua, ura;
|
||||
Rect roi(1, 3, 5, 4);
|
||||
Mat ra(a, roi), rb, rc, rc0;
|
||||
UMat ua, ura, ub, urb, uc, urc;
|
||||
a.copyTo(ua);
|
||||
ua.copyTo(b);
|
||||
CHECK_DIFF(a, b);
|
||||
@ -112,6 +112,71 @@ bool CV_UMatTest::TestUMat()
|
||||
}
|
||||
ra.copyTo(rb);
|
||||
CHECK_DIFF(ra, rb);
|
||||
|
||||
b = a.clone();
|
||||
ra = a(roi);
|
||||
rb = b(roi);
|
||||
randu(b, Scalar::all(-100), Scalar::all(100));
|
||||
b.copyTo(ub);
|
||||
urb = ub(roi);
|
||||
|
||||
/*std::cout << "==============================================\nbefore op (CPU):\n";
|
||||
std::cout << "ra: " << ra << std::endl;
|
||||
std::cout << "rb: " << rb << std::endl;*/
|
||||
|
||||
ra.copyTo(ura);
|
||||
rb.copyTo(urb);
|
||||
ra.release();
|
||||
rb.release();
|
||||
ura.copyTo(ra);
|
||||
urb.copyTo(rb);
|
||||
|
||||
/*std::cout << "==============================================\nbefore op (GPU):\n";
|
||||
std::cout << "ra: " << ra << std::endl;
|
||||
std::cout << "rb: " << rb << std::endl;*/
|
||||
|
||||
cv::max(ra, rb, rc);
|
||||
cv::max(ura, urb, urc);
|
||||
urc.copyTo(rc0);
|
||||
|
||||
/*std::cout << "==============================================\nafter op:\n";
|
||||
std::cout << "rc: " << rc << std::endl;
|
||||
std::cout << "rc0: " << rc0 << std::endl;*/
|
||||
|
||||
CHECK_DIFF(rc0, rc);
|
||||
|
||||
{
|
||||
UMat tmp = rc0.getUMat(ACCESS_WRITE);
|
||||
cv::max(ura, urb, tmp);
|
||||
}
|
||||
CHECK_DIFF(rc0, rc);
|
||||
|
||||
ura.copyTo(urc);
|
||||
cv::max(urc, urb, urc);
|
||||
urc.copyTo(rc0);
|
||||
CHECK_DIFF(rc0, rc);
|
||||
|
||||
rc = ra ^ rb;
|
||||
cv::bitwise_xor(ura, urb, urc);
|
||||
urc.copyTo(rc0);
|
||||
|
||||
/*std::cout << "==============================================\nafter op:\n";
|
||||
std::cout << "ra: " << rc0 << std::endl;
|
||||
std::cout << "rc: " << rc << std::endl;*/
|
||||
|
||||
CHECK_DIFF(rc0, rc);
|
||||
|
||||
rc = ra + rb;
|
||||
cv::add(ura, urb, urc);
|
||||
urc.copyTo(rc0);
|
||||
|
||||
CHECK_DIFF(rc0, rc);
|
||||
|
||||
cv::subtract(ra, Scalar::all(5), rc);
|
||||
cv::subtract(ura, Scalar::all(5), urc);
|
||||
urc.copyTo(rc0);
|
||||
|
||||
CHECK_DIFF(rc0, rc);
|
||||
}
|
||||
catch (const test_excep& e)
|
||||
{
|
||||
|
@ -320,7 +320,7 @@ VideoCapture::retrieve
|
||||
----------------------
|
||||
Decodes and returns the grabbed video frame.
|
||||
|
||||
.. ocv:function:: bool VideoCapture::retrieve( Mat& image, int flag=0 )
|
||||
.. ocv:function:: bool VideoCapture::retrieve( OutputArray image, int flag=0 )
|
||||
|
||||
.. ocv:pyfunction:: cv2.VideoCapture.retrieve([image[, flag]]) -> retval, image
|
||||
|
||||
@ -337,7 +337,9 @@ Grabs, decodes and returns the next video frame.
|
||||
|
||||
.. ocv:function:: VideoCapture& VideoCapture::operator >> (Mat& image)
|
||||
|
||||
.. ocv:function:: bool VideoCapture::read(Mat& image)
|
||||
.. ocv:function:: VideoCapture& VideoCapture::operator >> (UMat& image)
|
||||
|
||||
.. ocv:function:: bool VideoCapture::read(OutputArray image)
|
||||
|
||||
.. ocv:pyfunction:: cv2.VideoCapture.read([image]) -> retval, image
|
||||
|
||||
|
@ -511,9 +511,10 @@ public:
|
||||
CV_WRAP virtual void release();
|
||||
|
||||
CV_WRAP virtual bool grab();
|
||||
CV_WRAP virtual bool retrieve(CV_OUT Mat& image, int flag = 0);
|
||||
CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0);
|
||||
virtual VideoCapture& operator >> (CV_OUT Mat& image);
|
||||
CV_WRAP virtual bool read(CV_OUT Mat& image);
|
||||
virtual VideoCapture& operator >> (CV_OUT UMat& image);
|
||||
CV_WRAP virtual bool read(OutputArray image);
|
||||
|
||||
CV_WRAP virtual bool set(int propId, double value);
|
||||
CV_WRAP virtual double get(int propId);
|
||||
|
@ -515,7 +515,7 @@ bool VideoCapture::grab()
|
||||
return cvGrabFrame(cap) != 0;
|
||||
}
|
||||
|
||||
bool VideoCapture::retrieve(Mat& image, int channel)
|
||||
bool VideoCapture::retrieve(OutputArray image, int channel)
|
||||
{
|
||||
IplImage* _img = cvRetrieveFrame(cap, channel);
|
||||
if( !_img )
|
||||
@ -533,7 +533,7 @@ bool VideoCapture::retrieve(Mat& image, int channel)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VideoCapture::read(Mat& image)
|
||||
bool VideoCapture::read(OutputArray image)
|
||||
{
|
||||
if(grab())
|
||||
retrieve(image);
|
||||
@ -548,6 +548,12 @@ VideoCapture& VideoCapture::operator >> (Mat& image)
|
||||
return *this;
|
||||
}
|
||||
|
||||
VideoCapture& VideoCapture::operator >> (UMat& image)
|
||||
{
|
||||
read(image);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool VideoCapture::set(int propId, double value)
|
||||
{
|
||||
return cvSetCaptureProperty(cap, propId, value) != 0;
|
||||
|
@ -90,6 +90,7 @@
|
||||
\**********************************************************************************/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include <limits>
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
@ -2687,6 +2688,125 @@ struct mRGBA2RGBA
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
|
||||
{
|
||||
bool ok = true;
|
||||
UMat src = _src.getUMat(), dst;
|
||||
Size sz = src.size(), dstSz = sz;
|
||||
int scn = src.channels(), depth = src.depth(), bidx;
|
||||
size_t globalsize[] = { src.cols, src.rows };
|
||||
ocl::Kernel k;
|
||||
|
||||
if(depth != CV_8U && depth != CV_16U && depth != CV_32F)
|
||||
return false;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
/*
|
||||
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
|
||||
case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
|
||||
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
|
||||
case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
|
||||
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
|
||||
*/
|
||||
case COLOR_BGR2GRAY:
|
||||
case COLOR_BGRA2GRAY:
|
||||
case COLOR_RGB2GRAY:
|
||||
case COLOR_RGBA2GRAY:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
|
||||
dcn = 1;
|
||||
k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d", depth, scn, bidx));
|
||||
break;
|
||||
}
|
||||
case COLOR_GRAY2BGR:
|
||||
case COLOR_GRAY2BGRA:
|
||||
{
|
||||
CV_Assert(scn == 1);
|
||||
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
|
||||
k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=%d -D scn=1 -D dcn=%d", depth, dcn));
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YUV:
|
||||
case COLOR_RGB2YUV:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_RGB2YUV ? 0 : 2;
|
||||
dcn = 3;
|
||||
k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx));
|
||||
break;
|
||||
}
|
||||
case COLOR_YUV2BGR:
|
||||
case COLOR_YUV2RGB:
|
||||
{
|
||||
if(dcn < 0) dcn = 3;
|
||||
CV_Assert(dcn == 3 || dcn == 4);
|
||||
bidx = code == COLOR_YUV2RGB ? 0 : 2;
|
||||
k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d", depth, dcn, bidx));
|
||||
break;
|
||||
}
|
||||
case COLOR_YUV2RGB_NV12:
|
||||
case COLOR_YUV2BGR_NV12:
|
||||
case COLOR_YUV2RGBA_NV12:
|
||||
case COLOR_YUV2BGRA_NV12:
|
||||
{
|
||||
CV_Assert( scn == 1 );
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
|
||||
dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
|
||||
bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2;
|
||||
|
||||
dstSz = Size(sz.width, sz.height * 2 / 3);
|
||||
globalsize[0] = dstSz.height/2;
|
||||
globalsize[1] = dstSz.width/2;
|
||||
k.create("YUV2RGBA_NV12", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=0 -D scn=1 -D dcn=%d -D bidx=%d", dcn, bidx));
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YCrCb:
|
||||
case COLOR_RGB2YCrCb:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
|
||||
dcn = 3;
|
||||
k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc,
|
||||
format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx));
|
||||
break;
|
||||
}
|
||||
case COLOR_YCrCb2BGR:
|
||||
case COLOR_YCrCb2RGB:
|
||||
break;
|
||||
/*
|
||||
case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
|
||||
case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
|
||||
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
|
||||
case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
|
||||
case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
|
||||
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
|
||||
case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
|
||||
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
|
||||
case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
|
||||
*/
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
if( !k.empty() )
|
||||
{
|
||||
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
dst = _dst.getUMat();
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
|
||||
ok = k.run(2, globalsize, 0, false);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
}//namespace cv
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -2695,9 +2815,15 @@ struct mRGBA2RGBA
|
||||
|
||||
void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
|
||||
{
|
||||
bool use_opencl = ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT;
|
||||
int stype = _src.type();
|
||||
int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx;
|
||||
|
||||
if( use_opencl && ocl_cvtColor(_src, _dst, code, dcn) )
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat(), dst;
|
||||
Size sz = src.size();
|
||||
int scn = src.channels(), depth = src.depth(), bidx;
|
||||
|
||||
CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
|
||||
|
||||
|
@ -47,6 +47,7 @@
|
||||
// */
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
@ -1901,8 +1902,45 @@ private:
|
||||
};
|
||||
#endif
|
||||
|
||||
static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
double fx, double fy, int interpolation)
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
if( !(cn <= 4 &&
|
||||
(interpolation == INTER_NEAREST ||
|
||||
(interpolation == INTER_LINEAR && (depth == CV_8U || depth == CV_32F)))) )
|
||||
return false;
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(dsize, type);
|
||||
UMat dst = _dst.getUMat();
|
||||
ocl::Kernel k;
|
||||
|
||||
if (interpolation == INTER_LINEAR)
|
||||
{
|
||||
int wdepth = depth == CV_8U ? CV_32S : CV_32F;
|
||||
int wtype = CV_MAKETYPE(wdepth, cn);
|
||||
char buf[2][32];
|
||||
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
|
||||
format("-D INTER_LINEAR -D depth=%s -D PIXTYPE=%s -D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s",
|
||||
depth, ocl::typeToStr(type), ocl::typeToStr(wtype),
|
||||
ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
|
||||
ocl::convertTypeStr(wdepth, depth, cn, buf[1])));
|
||||
}
|
||||
else if (interpolation == INTER_NEAREST)
|
||||
{
|
||||
k.create("resizeNN", ocl::imgproc::resize_oclsrc,
|
||||
format("-D INTER_NEAREST -D PIXTYPE=%s", ocl::memopTypeToStr(type) ));
|
||||
}
|
||||
|
||||
if( k.empty() )
|
||||
return false;
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
|
||||
(float)(1./fx), (float)(1./fy));
|
||||
size_t globalsize[] = { dst.cols, dst.rows };
|
||||
return k.run(2, globalsize, 0, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -2013,26 +2051,30 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
resizeArea_<double, double>, 0
|
||||
};
|
||||
|
||||
Mat src = _src.getMat();
|
||||
Size ssize = src.size();
|
||||
Size ssize = _src.size();
|
||||
|
||||
CV_Assert( ssize.area() > 0 );
|
||||
CV_Assert( dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0) );
|
||||
if( !dsize.area() )
|
||||
CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) );
|
||||
if( dsize.area() == 0 )
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols*inv_scale_x),
|
||||
saturate_cast<int>(src.rows*inv_scale_y));
|
||||
CV_Assert( dsize.area() );
|
||||
dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
|
||||
saturate_cast<int>(ssize.height*inv_scale_y));
|
||||
CV_Assert( dsize.area() > 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
inv_scale_x = (double)dsize.width/src.cols;
|
||||
inv_scale_y = (double)dsize.height/src.rows;
|
||||
inv_scale_x = (double)dsize.width/ssize.width;
|
||||
inv_scale_y = (double)dsize.height/ssize.height;
|
||||
}
|
||||
|
||||
if( ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT &&
|
||||
ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation) )
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create(dsize, src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
|
||||
return;
|
||||
|
306
modules/imgproc/src/opencl/cvtcolor.cl
Normal file
306
modules/imgproc/src/opencl/cvtcolor.cl
Normal file
@ -0,0 +1,306 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#if depth == 0
|
||||
#define DATA_TYPE uchar
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX 128
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define DEPTH_0
|
||||
#elif depth == 2
|
||||
#define DATA_TYPE ushort
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX 32768
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define DEPTH_2
|
||||
#elif depth == 5
|
||||
#define DATA_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX 0.5f
|
||||
#define SAT_CAST(num) (num)
|
||||
#define DEPTH_5
|
||||
#else
|
||||
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
xyz_shift = 12,
|
||||
R2Y = 4899,
|
||||
G2Y = 9617,
|
||||
B2Y = 1868,
|
||||
BLOCK_SIZE = 256
|
||||
};
|
||||
|
||||
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
|
||||
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
|
||||
|
||||
///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
|
||||
|
||||
__kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
|
||||
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
|
||||
#if defined (DEPTH_5)
|
||||
dst[0] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f;
|
||||
#else
|
||||
dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
|
||||
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
|
||||
DATA_TYPE val = src[0];
|
||||
dst[0] = dst[1] = dst[2] = val;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
||||
|
||||
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
|
||||
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
|
||||
|
||||
__kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
|
||||
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
|
||||
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2];
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
__constant float * coeffs = c_RGB2YUVCoeffs_f;
|
||||
const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2];
|
||||
const DATA_TYPE U = (b - Y) * coeffs[3] + HALF_MAX;
|
||||
const DATA_TYPE V = (r - Y) * coeffs[4] + HALF_MAX;
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YUVCoeffs_i;
|
||||
const int delta = HALF_MAX * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift);
|
||||
const int U = CV_DESCALE((b - Y) * coeffs[3] + delta, yuv_shift);
|
||||
const int V = CV_DESCALE((r - Y) * coeffs[4] + delta, yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( U );
|
||||
dst[2] = SAT_CAST( V );
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
||||
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
|
||||
|
||||
__kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
|
||||
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
|
||||
DATA_TYPE Y = src[0], U = src[1], V = src[2];
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
__constant float * coeffs = c_YUV2RGBCoeffs_f;
|
||||
const float r = Y + (V - HALF_MAX) * coeffs[3];
|
||||
const float g = Y + (V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1];
|
||||
const float b = Y + (U - HALF_MAX) * coeffs[0];
|
||||
#else
|
||||
__constant int * coeffs = c_YUV2RGBCoeffs_i;
|
||||
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
|
||||
const int g = Y + CV_DESCALE((V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1], yuv_shift);
|
||||
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[bidx] = SAT_CAST( b );
|
||||
dst[1] = SAT_CAST( g );
|
||||
dst[bidx^2] = SAT_CAST( r );
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__constant int ITUR_BT_601_CY = 1220542;
|
||||
__constant int ITUR_BT_601_CUB = 2116026;
|
||||
__constant int ITUR_BT_601_CUG = 409993;
|
||||
__constant int ITUR_BT_601_CVG = 852492;
|
||||
__constant int ITUR_BT_601_CVR = 1673527;
|
||||
__constant int ITUR_BT_601_SHIFT = 20;
|
||||
|
||||
__kernel void YUV2RGBA_NV12(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
const int x = get_global_id(0); // max_x = width / 2
|
||||
const int y = get_global_id(1); // max_y = height/ 2
|
||||
|
||||
if (y < rows / 2 && x < cols / 2 )
|
||||
{
|
||||
__global const uchar* ysrc = srcptr + mad24(y << 1, srcstep, (x << 1) + srcoffset);
|
||||
__global const uchar* usrc = srcptr + mad24(rows + y, srcstep, (x << 1) + srcoffset);
|
||||
__global uchar* dst1 = dstptr + mad24(y << 1, dststep, x*(dcn*2) + dstoffset);
|
||||
__global uchar* dst2 = dstptr + mad24((y << 1) + 1, dststep, x*(dcn*2) + dstoffset);
|
||||
|
||||
int Y1 = ysrc[0];
|
||||
int Y2 = ysrc[1];
|
||||
int Y3 = ysrc[srcstep];
|
||||
int Y4 = ysrc[srcstep + 1];
|
||||
|
||||
int U = usrc[0] - 128;
|
||||
int V = usrc[1] - 128;
|
||||
|
||||
int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V;
|
||||
int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U;
|
||||
int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U;
|
||||
|
||||
Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY;
|
||||
dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT);
|
||||
#if dcn == 4
|
||||
dst1[3] = 255;
|
||||
#endif
|
||||
|
||||
Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY;
|
||||
dst1[(dcn + 2) - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT);
|
||||
#if dcn == 4
|
||||
dst1[7] = 255;
|
||||
#endif
|
||||
|
||||
Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY;
|
||||
dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT);
|
||||
#if dcn == 4
|
||||
dst2[3] = 255;
|
||||
#endif
|
||||
|
||||
Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY;
|
||||
dst2[(dcn + 2) - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT);
|
||||
#if dcn == 4
|
||||
dst2[7] = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
||||
|
||||
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
|
||||
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
|
||||
|
||||
__kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
|
||||
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
|
||||
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2];
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
|
||||
const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2];
|
||||
const DATA_TYPE Cr = (r - Y) * coeffs[3] + HALF_MAX;
|
||||
const DATA_TYPE Cb = (b - Y) * coeffs[4] + HALF_MAX;
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
|
||||
const int delta = HALF_MAX * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift);
|
||||
const int Cr = CV_DESCALE((r - Y) * coeffs[3] + delta, yuv_shift);
|
||||
const int Cb = CV_DESCALE((b - Y) * coeffs[4] + delta, yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( Cr );
|
||||
dst[2] = SAT_CAST( Cb );
|
||||
}
|
||||
}
|
151
modules/imgproc/src/opencl/resize.cl
Normal file
151
modules/imgproc/src/opencl/resize.cl
Normal file
@ -0,0 +1,151 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Zhang Ying, zhangying913@gmail.com
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
// resize kernel
|
||||
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
|
||||
// We shall support other types later if necessary.
|
||||
|
||||
#if defined DOUBLE_SUPPORT
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define F double
|
||||
#else
|
||||
#define F float
|
||||
#endif
|
||||
|
||||
#define INTER_RESIZE_COEF_BITS 11
|
||||
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
|
||||
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
|
||||
#define CAST_SCALE (1.0f/(1<<CAST_BITS))
|
||||
#define INC(x,l) min(x+1,l-1)
|
||||
|
||||
#define PIXSIZE ((int)sizeof(PIXTYPE))
|
||||
#define noconvert(x) (x)
|
||||
|
||||
#if defined INTER_LINEAR
|
||||
|
||||
__kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
int srcrows, int srccols,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int dstrows, int dstcols,
|
||||
float ifx, float ify)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
|
||||
int x = floor(sx), y = floor(sy);
|
||||
|
||||
float u = sx - x, v = sy - y;
|
||||
|
||||
if ( x<0 ) x=0,u=0;
|
||||
if ( x>=srccols ) x=srccols-1,u=0;
|
||||
if ( y<0 ) y=0,v=0;
|
||||
if ( y>=srcrows ) y=srcrows-1,v=0;
|
||||
|
||||
int y_ = INC(y,srcrows);
|
||||
int x_ = INC(x,srccols);
|
||||
const PIXTYPE* src = (const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE));
|
||||
|
||||
#if depth == 0
|
||||
u = u * INTER_RESIZE_COEF_SCALE;
|
||||
v = v * INTER_RESIZE_COEF_SCALE;
|
||||
|
||||
int U = rint(u);
|
||||
int V = rint(v);
|
||||
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
|
||||
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
|
||||
|
||||
WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
|
||||
WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
|
||||
WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
|
||||
WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
|
||||
WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) +
|
||||
mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3);
|
||||
|
||||
PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
|
||||
#else
|
||||
float u1 = 1.f-u;
|
||||
float v1 = 1.f-v;
|
||||
WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
|
||||
WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
|
||||
WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
|
||||
WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
|
||||
PIXTYPE uval = u1 * v1 * s_data1 + u * v1 * s_data2 + u1 * v *s_data3 + u * v *s_data4;
|
||||
#endif
|
||||
|
||||
if(dx < dstcols && dy < dstrows)
|
||||
{
|
||||
PIXTYPE* dst = (PIXTYPE*)(dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
|
||||
dst[0] = uval;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined INTER_NEAREST
|
||||
|
||||
__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
|
||||
int srcrows, int srccols,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int dstrows, int dstcols,
|
||||
float ifx, float ify)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
if( dx < dstcols && dy < dstrows )
|
||||
{
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int sx = min(convert_int_rtz(s1), srccols-1);
|
||||
int sy = min(convert_int_rtz(s2), srcrows-1);
|
||||
PIXTYPE* dst = (PIXTYPE*)(dstptr +
|
||||
mad24(dy, dststep, dstoffset + dx*PIXSIZE));
|
||||
const PIXTYPE* src = (const PIXTYPE*)(srcptr +
|
||||
mad24(sy, srcstep, srcoffset + sx*PIXSIZE));
|
||||
dst[0] = src[0];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -48,6 +48,7 @@
|
||||
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
82
modules/imgproc/test/test_imgproc_umat.cpp
Normal file
82
modules/imgproc/test/test_imgproc_umat.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include <string>
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
class CV_ImgprocUMatTest : public cvtest::BaseTest
|
||||
{
|
||||
public:
|
||||
CV_ImgprocUMatTest() {}
|
||||
~CV_ImgprocUMatTest() {}
|
||||
protected:
|
||||
void run(int)
|
||||
{
|
||||
string imgpath = string(ts->get_data_path()) + "shared/lena.png";
|
||||
Mat img = imread(imgpath, 1), gray, smallimg, result;
|
||||
UMat uimg = img.getUMat(ACCESS_READ), ugray, usmallimg, uresult;
|
||||
|
||||
cvtColor(img, gray, COLOR_BGR2GRAY);
|
||||
resize(gray, smallimg, Size(), 0.75, 0.75, INTER_LINEAR);
|
||||
equalizeHist(smallimg, result);
|
||||
|
||||
cvtColor(uimg, ugray, COLOR_BGR2GRAY);
|
||||
resize(ugray, usmallimg, Size(), 0.75, 0.75, INTER_LINEAR);
|
||||
equalizeHist(usmallimg, uresult);
|
||||
|
||||
#if 0
|
||||
imshow("orig", uimg);
|
||||
imshow("small", usmallimg);
|
||||
imshow("equalized gray", uresult);
|
||||
waitKey();
|
||||
destroyWindow("orig");
|
||||
destroyWindow("small");
|
||||
destroyWindow("equalized gray");
|
||||
#endif
|
||||
ts->set_failed_test_info(cvtest::TS::OK);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Imgproc_UMat, regression) { CV_ImgprocUMatTest test; test.safe_run(); }
|
@ -52,6 +52,8 @@
|
||||
#include "opencv2/nonfree/cuda.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDAARITHM
|
||||
|
@ -51,6 +51,8 @@
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
static ProgramEntry surfprog = cv::ocl::nonfree::surf;
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
@ -499,7 +501,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
|
||||
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
|
||||
1
|
||||
};
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
|
||||
@ -545,7 +547,7 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
|
||||
1
|
||||
};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
|
||||
@ -570,7 +572,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
|
||||
size_t localThreads[3] = {3, 3, 3};
|
||||
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
|
||||
@ -597,7 +599,7 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
|
||||
size_t localThreads[3] = {32, 4, 1};
|
||||
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
|
||||
@ -614,7 +616,7 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
size_t globalThreads[3] = {saturate_cast<size_t>(nFeatures), 1, 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
|
||||
@ -654,7 +656,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors64";
|
||||
|
||||
@ -668,7 +670,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -697,7 +699,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors128";
|
||||
|
||||
@ -711,7 +713,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,8 +188,8 @@ CascadeClassifier::detectMultiScale
|
||||
---------------------------------------
|
||||
Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles.
|
||||
|
||||
.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
|
||||
.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, vector<int>& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
|
||||
.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector<Rect>& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
|
||||
.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector<Rect>& objects, vector<int>& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
|
||||
|
||||
.. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects
|
||||
.. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects, rejectLevels, levelWeights
|
||||
|
@ -159,14 +159,14 @@ public:
|
||||
CV_WRAP virtual bool empty() const;
|
||||
CV_WRAP bool load( const String& filename );
|
||||
virtual bool read( const FileNode& node );
|
||||
CV_WRAP virtual void detectMultiScale( const Mat& image,
|
||||
CV_WRAP virtual void detectMultiScale( InputArray image,
|
||||
CV_OUT std::vector<Rect>& objects,
|
||||
double scaleFactor = 1.1,
|
||||
int minNeighbors = 3, int flags = 0,
|
||||
Size minSize = Size(),
|
||||
Size maxSize = Size() );
|
||||
|
||||
CV_WRAP virtual void detectMultiScale( const Mat& image,
|
||||
CV_WRAP virtual void detectMultiScale( InputArray image,
|
||||
CV_OUT std::vector<Rect>& objects,
|
||||
CV_OUT std::vector<int>& numDetections,
|
||||
double scaleFactor=1.1,
|
||||
@ -174,7 +174,7 @@ public:
|
||||
Size minSize=Size(),
|
||||
Size maxSize=Size() );
|
||||
|
||||
CV_WRAP virtual void detectMultiScale( const Mat& image,
|
||||
CV_WRAP virtual void detectMultiScale( InputArray image,
|
||||
CV_OUT std::vector<Rect>& objects,
|
||||
CV_OUT std::vector<int>& rejectLevels,
|
||||
CV_OUT std::vector<double>& levelWeights,
|
||||
|
@ -1154,13 +1154,14 @@ void CascadeClassifier::detectMultiScaleNoGrouping( const Mat& image, std::vecto
|
||||
}
|
||||
}
|
||||
|
||||
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
|
||||
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
|
||||
std::vector<int>& rejectLevels,
|
||||
std::vector<double>& levelWeights,
|
||||
double scaleFactor, int minNeighbors,
|
||||
int flags, Size minObjectSize, Size maxObjectSize,
|
||||
bool outputRejectLevels )
|
||||
{
|
||||
Mat image = _image.getMat();
|
||||
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
|
||||
|
||||
if( empty() )
|
||||
@ -1188,21 +1189,23 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& o
|
||||
}
|
||||
}
|
||||
|
||||
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
|
||||
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
|
||||
double scaleFactor, int minNeighbors,
|
||||
int flags, Size minObjectSize, Size maxObjectSize)
|
||||
{
|
||||
Mat image = _image.getMat();
|
||||
std::vector<int> fakeLevels;
|
||||
std::vector<double> fakeWeights;
|
||||
detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor,
|
||||
minNeighbors, flags, minObjectSize, maxObjectSize );
|
||||
}
|
||||
|
||||
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
|
||||
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
|
||||
std::vector<int>& numDetections, double scaleFactor,
|
||||
int minNeighbors, int flags, Size minObjectSize,
|
||||
Size maxObjectSize )
|
||||
{
|
||||
Mat image = _image.getMat();
|
||||
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
|
||||
|
||||
if( empty() )
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include "opencv2/ml.hpp"
|
||||
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
#ifdef HAVE_OPENCV_HIGHGUI
|
||||
|
@ -47,6 +47,7 @@
|
||||
#define __OPENCV_OCL_PRIVATE_UTIL__
|
||||
|
||||
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp"
|
||||
#include "opencv2/core/ocl_genbase.hpp"
|
||||
|
||||
#include "opencv2/ocl.hpp"
|
||||
|
||||
@ -55,13 +56,6 @@ namespace cv
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
struct ProgramEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* programStr;
|
||||
const char* programHash;
|
||||
};
|
||||
|
||||
inline cl_device_id getClDeviceID(const Context *ctx)
|
||||
{
|
||||
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
|
||||
|
@ -50,11 +50,11 @@ void loadImage(string path, Mat &img)
|
||||
ASSERT_FALSE(img.empty()) << "Could not load input image " << path;
|
||||
}
|
||||
|
||||
void checkEqual(Mat img0, Mat img1, double threshold)
|
||||
void checkEqual(Mat img0, Mat img1, double threshold, const string& name)
|
||||
{
|
||||
double max = 1.0;
|
||||
minMaxLoc(abs(img0 - img1), NULL, &max);
|
||||
ASSERT_FALSE(max > threshold) << max;
|
||||
ASSERT_FALSE(max > threshold) << "max=" << max << " threshold=" << threshold << " method=" << name;
|
||||
}
|
||||
|
||||
static vector<float> DEFAULT_VECTOR;
|
||||
@ -98,31 +98,31 @@ TEST(Photo_Tonemap, regression)
|
||||
linear->process(img, result);
|
||||
loadImage(test_path + "linear.png", expected);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(result, expected, 3);
|
||||
checkEqual(result, expected, 3, "Simple");
|
||||
|
||||
Ptr<TonemapDrago> drago = createTonemapDrago(gamma);
|
||||
drago->process(img, result);
|
||||
loadImage(test_path + "drago.png", expected);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(result, expected, 3);
|
||||
checkEqual(result, expected, 3, "Drago");
|
||||
|
||||
Ptr<TonemapDurand> durand = createTonemapDurand(gamma);
|
||||
durand->process(img, result);
|
||||
loadImage(test_path + "durand.png", expected);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(result, expected, 3);
|
||||
checkEqual(result, expected, 3, "Durand");
|
||||
|
||||
Ptr<TonemapReinhard> reinhard = createTonemapReinhard(gamma);
|
||||
reinhard->process(img, result);
|
||||
loadImage(test_path + "reinhard.png", expected);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(result, expected, 3);
|
||||
checkEqual(result, expected, 3, "Reinhard");
|
||||
|
||||
Ptr<TonemapMantiuk> mantiuk = createTonemapMantiuk(gamma);
|
||||
mantiuk->process(img, result);
|
||||
loadImage(test_path + "mantiuk.png", expected);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(result, expected, 3);
|
||||
checkEqual(result, expected, 3, "Mantiuk");
|
||||
}
|
||||
|
||||
TEST(Photo_AlignMTB, regression)
|
||||
@ -165,7 +165,7 @@ TEST(Photo_MergeMertens, regression)
|
||||
loadImage(test_path + "merge/mertens.png", expected);
|
||||
merge->process(images, result);
|
||||
result.convertTo(result, CV_8UC3, 255);
|
||||
checkEqual(expected, result, 3);
|
||||
checkEqual(expected, result, 3, "Mertens");
|
||||
}
|
||||
|
||||
TEST(Photo_MergeDebevec, regression)
|
||||
@ -188,7 +188,7 @@ TEST(Photo_MergeDebevec, regression)
|
||||
map->process(result, result);
|
||||
map->process(expected, expected);
|
||||
|
||||
checkEqual(expected, result, 1e-2f);
|
||||
checkEqual(expected, result, 1e-2f, "Debevec");
|
||||
}
|
||||
|
||||
TEST(Photo_MergeRobertson, regression)
|
||||
@ -208,7 +208,7 @@ TEST(Photo_MergeRobertson, regression)
|
||||
map->process(result, result);
|
||||
map->process(expected, expected);
|
||||
|
||||
checkEqual(expected, result, 1e-2f);
|
||||
checkEqual(expected, result, 1e-2f, "MergeRobertson");
|
||||
}
|
||||
|
||||
TEST(Photo_CalibrateDebevec, regression)
|
||||
@ -242,5 +242,5 @@ TEST(Photo_CalibrateRobertson, regression)
|
||||
|
||||
Ptr<CalibrateRobertson> calibrate = createCalibrateRobertson();
|
||||
calibrate->process(images, response, times);
|
||||
checkEqual(expected, response, 1e-3f);
|
||||
checkEqual(expected, response, 1e-3f, "CalibrateRobertson");
|
||||
}
|
||||
|
@ -64,6 +64,8 @@ using namespace cv::ocl;
|
||||
using namespace cv::superres;
|
||||
using namespace cv::superres::detail;
|
||||
|
||||
static ProgramEntry superres_btvl1 = cv::ocl::superres::superres_btvl1;
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
|
@ -56,6 +56,7 @@
|
||||
#include "opencv2/core/private.hpp"
|
||||
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDAARITHM
|
||||
# include "opencv2/cudaarithm.hpp"
|
||||
|
276
samples/cpp/ufacedetect.cpp
Normal file
276
samples/cpp/ufacedetect.cpp
Normal file
@ -0,0 +1,276 @@
|
||||
#include "opencv2/objdetect.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
static void help()
|
||||
{
|
||||
cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n"
|
||||
"This classifier can recognize many kinds of rigid objects, once the appropriate classifier is trained.\n"
|
||||
"It's most known use is for faces.\n"
|
||||
"Usage:\n"
|
||||
"./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
|
||||
" [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n"
|
||||
" [--scale=<image scale greater or equal to 1, try 1.3 for example>]\n"
|
||||
" [--try-flip]\n"
|
||||
" [filename|camera_index]\n\n"
|
||||
"see facedetect.cmd for one call:\n"
|
||||
"./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3\n\n"
|
||||
"During execution:\n\tHit any key to quit.\n"
|
||||
"\tUsing OpenCV version " << CV_VERSION << "\n" << endl;
|
||||
}
|
||||
|
||||
void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
|
||||
CascadeClassifier& nestedCascade,
|
||||
double scale, bool tryflip );
|
||||
|
||||
string cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||
string nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
|
||||
|
||||
int main( int argc, const char** argv )
|
||||
{
|
||||
VideoCapture capture;
|
||||
UMat frame, image;
|
||||
Mat canvas;
|
||||
const string scaleOpt = "--scale=";
|
||||
size_t scaleOptLen = scaleOpt.length();
|
||||
const string cascadeOpt = "--cascade=";
|
||||
size_t cascadeOptLen = cascadeOpt.length();
|
||||
const string nestedCascadeOpt = "--nested-cascade";
|
||||
size_t nestedCascadeOptLen = nestedCascadeOpt.length();
|
||||
const string tryFlipOpt = "--try-flip";
|
||||
size_t tryFlipOptLen = tryFlipOpt.length();
|
||||
String inputName;
|
||||
bool tryflip = false;
|
||||
|
||||
help();
|
||||
|
||||
CascadeClassifier cascade, nestedCascade;
|
||||
double scale = 1;
|
||||
|
||||
for( int i = 1; i < argc; i++ )
|
||||
{
|
||||
cout << "Processing " << i << " " << argv[i] << endl;
|
||||
if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
|
||||
{
|
||||
cascadeName.assign( argv[i] + cascadeOptLen );
|
||||
cout << " from which we have cascadeName= " << cascadeName << endl;
|
||||
}
|
||||
else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 )
|
||||
{
|
||||
if( argv[i][nestedCascadeOpt.length()] == '=' )
|
||||
nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 );
|
||||
if( !nestedCascade.load( nestedCascadeName ) )
|
||||
cerr << "WARNING: Could not load classifier cascade for nested objects" << endl;
|
||||
}
|
||||
else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
|
||||
{
|
||||
if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale > 1 )
|
||||
scale = 1;
|
||||
cout << " from which we read scale = " << scale << endl;
|
||||
}
|
||||
else if( tryFlipOpt.compare( 0, tryFlipOptLen, argv[i], tryFlipOptLen ) == 0 )
|
||||
{
|
||||
tryflip = true;
|
||||
cout << " will try to flip image horizontally to detect assymetric objects\n";
|
||||
}
|
||||
else if( argv[i][0] == '-' )
|
||||
{
|
||||
cerr << "WARNING: Unknown option %s" << argv[i] << endl;
|
||||
}
|
||||
else
|
||||
inputName = argv[i];
|
||||
}
|
||||
|
||||
if( !cascade.load( cascadeName ) )
|
||||
{
|
||||
cerr << "ERROR: Could not load classifier cascade" << endl;
|
||||
help();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0';
|
||||
if(!capture.open(c))
|
||||
cout << "Capture from camera #" << c << " didn't work" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( inputName.empty() )
|
||||
inputName = "lena.jpg";
|
||||
image = imread( inputName, 1 ).getUMat(ACCESS_READ);
|
||||
if( image.empty() )
|
||||
{
|
||||
if(!capture.open( inputName ))
|
||||
cout << "Could not read " << inputName << endl;
|
||||
}
|
||||
}
|
||||
|
||||
namedWindow( "result", 1 );
|
||||
|
||||
if( capture.isOpened() )
|
||||
{
|
||||
cout << "Video capturing has been started ..." << endl;
|
||||
for(;;)
|
||||
{
|
||||
capture >> frame;
|
||||
if( frame.empty() )
|
||||
break;
|
||||
|
||||
detectAndDraw( frame, canvas, cascade, nestedCascade, scale, tryflip );
|
||||
|
||||
if( waitKey( 10 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Detecting face(s) in " << inputName << endl;
|
||||
if( !image.empty() )
|
||||
{
|
||||
detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip );
|
||||
waitKey(0);
|
||||
}
|
||||
else if( !inputName.empty() )
|
||||
{
|
||||
/* assume it is a text file containing the
|
||||
list of the image filenames to be processed - one per line */
|
||||
FILE* f = fopen( inputName.c_str(), "rt" );
|
||||
if( f )
|
||||
{
|
||||
char buf[1000+1];
|
||||
while( fgets( buf, 1000, f ) )
|
||||
{
|
||||
int len = (int)strlen(buf), c;
|
||||
while( len > 0 && isspace(buf[len-1]) )
|
||||
len--;
|
||||
buf[len] = '\0';
|
||||
cout << "file " << buf << endl;
|
||||
image = imread( buf, 1 ).getUMat(ACCESS_READ);
|
||||
if( !image.empty() )
|
||||
{
|
||||
detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip );
|
||||
c = waitKey(0);
|
||||
if( c == 27 || c == 'q' || c == 'Q' )
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Aw snap, couldn't read image " << buf << endl;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
|
||||
CascadeClassifier& nestedCascade,
|
||||
double scale0, bool tryflip )
|
||||
{
|
||||
int i = 0;
|
||||
double t = 0, scale=1;
|
||||
vector<Rect> faces, faces2;
|
||||
const static Scalar colors[] =
|
||||
{
|
||||
Scalar(0,0,255),
|
||||
Scalar(0,128,255),
|
||||
Scalar(0,255,255),
|
||||
Scalar(0,255,0),
|
||||
Scalar(255,128,0),
|
||||
Scalar(255,255,0),
|
||||
Scalar(255,0,0),
|
||||
Scalar(255,0,255)
|
||||
};
|
||||
static UMat gray, smallImg;
|
||||
|
||||
t = (double)getTickCount();
|
||||
|
||||
cvtColor( img, gray, COLOR_BGR2GRAY );
|
||||
resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR );
|
||||
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
|
||||
equalizeHist( smallImg, smallImg );
|
||||
|
||||
cascade.detectMultiScale( smallImg, faces,
|
||||
1.1, 2, 0
|
||||
//|CASCADE_FIND_BIGGEST_OBJECT
|
||||
//|CASCADE_DO_ROUGH_SEARCH
|
||||
|CASCADE_SCALE_IMAGE
|
||||
,
|
||||
Size(30, 30) );
|
||||
if( tryflip )
|
||||
{
|
||||
flip(smallImg, smallImg, 1);
|
||||
cascade.detectMultiScale( smallImg, faces2,
|
||||
1.1, 2, 0
|
||||
//|CASCADE_FIND_BIGGEST_OBJECT
|
||||
//|CASCADE_DO_ROUGH_SEARCH
|
||||
|CASCADE_SCALE_IMAGE
|
||||
,
|
||||
Size(30, 30) );
|
||||
for( vector<Rect>::const_iterator r = faces2.begin(); r != faces2.end(); r++ )
|
||||
{
|
||||
faces.push_back(Rect(smallImg.cols - r->x - r->width, r->y, r->width, r->height));
|
||||
}
|
||||
}
|
||||
t = (double)getTickCount() - t;
|
||||
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
|
||||
|
||||
double fps = getTickFrequency()/t;
|
||||
|
||||
putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", fps), Point(250, 50),
|
||||
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
|
||||
|
||||
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||
{
|
||||
vector<Rect> nestedObjects;
|
||||
Point center;
|
||||
Scalar color = colors[i%8];
|
||||
int radius;
|
||||
|
||||
double aspect_ratio = (double)r->width/r->height;
|
||||
if( 0.75 < aspect_ratio && aspect_ratio < 1.3 )
|
||||
{
|
||||
center.x = cvRound((r->x + r->width*0.5)*scale);
|
||||
center.y = cvRound((r->y + r->height*0.5)*scale);
|
||||
radius = cvRound((r->width + r->height)*0.25*scale);
|
||||
circle( canvas, center, radius, color, 3, 8, 0 );
|
||||
}
|
||||
else
|
||||
rectangle( canvas, Point(cvRound(r->x*scale), cvRound(r->y*scale)),
|
||||
Point(cvRound((r->x + r->width-1)*scale), cvRound((r->y + r->height-1)*scale)),
|
||||
color, 3, 8, 0);
|
||||
if( nestedCascade.empty() )
|
||||
continue;
|
||||
UMat smallImgROI = smallImg(*r);
|
||||
nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
|
||||
1.1, 2, 0
|
||||
//|CASCADE_FIND_BIGGEST_OBJECT
|
||||
//|CASCADE_DO_ROUGH_SEARCH
|
||||
//|CASCADE_DO_CANNY_PRUNING
|
||||
|CASCADE_SCALE_IMAGE
|
||||
,
|
||||
Size(30, 30) );
|
||||
for( vector<Rect>::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ )
|
||||
{
|
||||
center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale);
|
||||
center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale);
|
||||
radius = cvRound((nr->width + nr->height)*0.25*scale);
|
||||
circle( canvas, center, radius, color, 3, 8, 0 );
|
||||
}
|
||||
}
|
||||
imshow( "result", canvas );
|
||||
}
|
@ -11,7 +11,7 @@
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
#define LOOP_NUM 10
|
||||
#define LOOP_NUM 1
|
||||
|
||||
const static Scalar colors[] = { CV_RGB(0,0,255),
|
||||
CV_RGB(0,128,255),
|
||||
@ -46,12 +46,12 @@ static double getTime()
|
||||
|
||||
static void detect( Mat& img, vector<Rect>& faces,
|
||||
ocl::OclCascadeClassifier& cascade,
|
||||
double scale, bool calTime);
|
||||
double scale);
|
||||
|
||||
|
||||
static void detectCPU( Mat& img, vector<Rect>& faces,
|
||||
CascadeClassifier& cascade,
|
||||
double scale, bool calTime);
|
||||
double scale);
|
||||
|
||||
static void Draw(Mat& img, vector<Rect>& faces, double scale);
|
||||
|
||||
@ -83,7 +83,7 @@ int main( int argc, const char** argv )
|
||||
}
|
||||
|
||||
CvCapture* capture = 0;
|
||||
Mat frame, frameCopy, image;
|
||||
Mat frame, frameCopy0, frameCopy, image;
|
||||
|
||||
bool useCPU = cmd.get<bool>("s");
|
||||
string inputName = cmd.get<string>("i");
|
||||
@ -129,16 +129,21 @@ int main( int argc, const char** argv )
|
||||
if( frame.empty() )
|
||||
break;
|
||||
if( iplImg->origin == IPL_ORIGIN_TL )
|
||||
frame.copyTo( frameCopy );
|
||||
frame.copyTo( frameCopy0 );
|
||||
else
|
||||
flip( frame, frameCopy, 0 );
|
||||
flip( frame, frameCopy0, 0 );
|
||||
if( scale == 1)
|
||||
frameCopy0.copyTo(frameCopy);
|
||||
else
|
||||
resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
|
||||
|
||||
work_end = 0;
|
||||
if(useCPU)
|
||||
detectCPU(frameCopy, faces, cpu_cascade, scale, false);
|
||||
detectCPU(frameCopy, faces, cpu_cascade, 1);
|
||||
else
|
||||
detect(frameCopy, faces, cascade, scale, false);
|
||||
detect(frameCopy, faces, cascade, 1);
|
||||
|
||||
Draw(frameCopy, faces, scale);
|
||||
Draw(frameCopy, faces, 1);
|
||||
if( waitKey( 10 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
@ -150,17 +155,19 @@ int main( int argc, const char** argv )
|
||||
vector<Rect> faces;
|
||||
vector<Rect> ref_rst;
|
||||
double accuracy = 0.;
|
||||
detectCPU(image, ref_rst, cpu_cascade, scale);
|
||||
work_end = 0;
|
||||
|
||||
for(int i = 0; i <= LOOP_NUM; i ++)
|
||||
{
|
||||
cout << "loop" << i << endl;
|
||||
if(useCPU)
|
||||
detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
|
||||
detectCPU(image, faces, cpu_cascade, scale);
|
||||
else
|
||||
{
|
||||
detect(image, faces, cascade, scale, i==0?false:true);
|
||||
detect(image, faces, cascade, scale);
|
||||
if(i == 0)
|
||||
{
|
||||
detectCPU(image, ref_rst, cpu_cascade, scale, false);
|
||||
accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
|
||||
}
|
||||
}
|
||||
@ -184,11 +191,11 @@ int main( int argc, const char** argv )
|
||||
|
||||
void detect( Mat& img, vector<Rect>& faces,
|
||||
ocl::OclCascadeClassifier& cascade,
|
||||
double scale, bool calTime)
|
||||
double scale)
|
||||
{
|
||||
ocl::oclMat image(img);
|
||||
ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
|
||||
if(calTime) workBegin();
|
||||
workBegin();
|
||||
ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
|
||||
ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||
ocl::equalizeHist( smallImg, smallImg );
|
||||
@ -197,14 +204,14 @@ void detect( Mat& img, vector<Rect>& faces,
|
||||
3, 0
|
||||
|CASCADE_SCALE_IMAGE
|
||||
, Size(30,30), Size(0, 0) );
|
||||
if(calTime) workEnd();
|
||||
workEnd();
|
||||
}
|
||||
|
||||
void detectCPU( Mat& img, vector<Rect>& faces,
|
||||
CascadeClassifier& cascade,
|
||||
double scale, bool calTime)
|
||||
double scale)
|
||||
{
|
||||
if(calTime) workBegin();
|
||||
workBegin();
|
||||
Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
|
||||
cvtColor(img, cpu_gray, COLOR_BGR2GRAY);
|
||||
resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
|
||||
@ -212,13 +219,15 @@ void detectCPU( Mat& img, vector<Rect>& faces,
|
||||
cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
|
||||
3, 0 | CASCADE_SCALE_IMAGE,
|
||||
Size(30, 30), Size(0, 0));
|
||||
if(calTime) workEnd();
|
||||
workEnd();
|
||||
}
|
||||
|
||||
|
||||
void Draw(Mat& img, vector<Rect>& faces, double scale)
|
||||
{
|
||||
int i = 0;
|
||||
putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50),
|
||||
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
|
||||
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||
{
|
||||
Point center;
|
||||
@ -229,7 +238,7 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
|
||||
radius = cvRound((r->width + r->height)*0.25*scale);
|
||||
circle( img, center, radius, color, 3, 8, 0 );
|
||||
}
|
||||
imwrite( outputName, img );
|
||||
//imwrite( outputName, img );
|
||||
if(abs(scale-1.0)>.001)
|
||||
{
|
||||
resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
|
||||
|
Loading…
Reference in New Issue
Block a user