diff --git a/CMakeLists.txt b/CMakeLists.txt index 8aa4f87af..efb9e12db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -415,10 +415,10 @@ if(WITH_OPENCL) if(OPENCL_FOUND) set(HAVE_OPENCL 1) endif() - if(WITH_OPENCLAMDFFT) + if(WITH_OPENCLAMDFFT AND CLAMDFFT_INCLUDE_DIR) set(HAVE_CLAMDFFT 1) endif() - if(WITH_OPENCLAMDBLAS) + if(WITH_OPENCLAMDBLAS AND CLAMDBLAS_INCLUDE_DIR) set(HAVE_CLAMDBLAS 1) endif() endif() diff --git a/android/libinfo/info.c b/android/libinfo/info.c index f0c2dd6a2..225cc10c1 100644 --- a/android/libinfo/info.c +++ b/android/libinfo/info.c @@ -6,8 +6,8 @@ const char* GetRevision(void); const char* GetLibraryList(void); JNIEXPORT jstring JNICALL Java_org_opencv_android_StaticHelper_getLibraryList(JNIEnv *, jclass); -#define PACKAGE_NAME "org.opencv.lib_v" CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) "_" ANDROID_PACKAGE_PLATFORM -#define PACKAGE_REVISION CVAUX_STR(CV_SUBMINOR_VERSION) "." CVAUX_STR(ANDROID_PACKAGE_RELEASE) +#define PACKAGE_NAME "org.opencv.lib_v" CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) "_" ANDROID_PACKAGE_PLATFORM +#define PACKAGE_REVISION CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(ANDROID_PACKAGE_RELEASE) const char* GetPackageName(void) { diff --git a/android/package/CMakeLists.txt b/android/package/CMakeLists.txt index 0e7848d6e..24ce87661 100644 --- a/android/package/CMakeLists.txt +++ b/android/package/CMakeLists.txt @@ -56,7 +56,7 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${PACKAGE configure_file("${CMAKE_CURRENT_SOURCE_DIR}/res/values/strings.xml" "${PACKAGE_DIR}/res/values/strings.xml" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/res/drawable/icon.png" "${PACKAGE_DIR}/res/drawable/icon.png" COPYONLY) -set(target_name "OpenCV_${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}_binary_pack_${ANDROID_PACKAGE_PLATFORM}") +set(target_name "OpenCV_${OPENCV_VERSION}_binary_pack_${ANDROID_PACKAGE_PLATFORM}") get_target_property(opencv_java_location opencv_java LOCATION) set(android_proj_target_files ${ANDROID_PROJECT_FILES}) diff --git a/android/service/engine/AndroidManifest.xml b/android/service/engine/AndroidManifest.xml index 088d51cae..4af965202 100644 --- a/android/service/engine/AndroidManifest.xml +++ b/android/service/engine/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="25@ANDROID_PLATFORM_VERSION_CODE@" + android:versionName="2.5" > diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp index 7cfe73ddf..1a02b8a39 100644 --- a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp +++ b/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp @@ -15,60 +15,44 @@ using namespace android; const int OpenCVEngine::Platform = DetectKnownPlatforms(); const int OpenCVEngine::CpuID = GetCpuID(); +const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302}; -std::set OpenCVEngine::InitKnownOpenCVersions() +bool OpenCVEngine::ValidateVersion(int version) { - std::set result; + for (size_t i = 0; i < sizeof(KnownVersions)/sizeof(int); i++) + if (KnownVersions[i] == version) + return true; - result.insert("240"); - result.insert("241"); - result.insert("242"); - result.insert("243"); - - return result; + return false; } -const std::set OpenCVEngine::KnownVersions = InitKnownOpenCVersions(); - -bool OpenCVEngine::ValidateVersionString(const std::string& version) +int OpenCVEngine::NormalizeVersionString(std::string version) { - return (KnownVersions.find(version) != KnownVersions.end()); -} - -std::string OpenCVEngine::NormalizeVersionString(std::string version) -{ - std::string result = ""; - std::string suffix = ""; + int result = 0; if (version.empty()) { return result; } - if (('a' == version[version.size()-1]) || ('b' == version[version.size()-1])) - { - suffix = version[version.size()-1]; - version.erase(version.size()-1); - } - std::vector parts = SplitStringVector(version, '.'); - if (parts.size() >= 2) + // Use only 4 digits of the version, i.e. 1.2.3.4. + // Other digits will be ignored. + if (parts.size() > 4) + parts.erase(parts.begin()+4, parts.end()); + + int multiplyer = 1000000; + for (std::vector::const_iterator it = parts.begin(); it != parts.end(); ++it) { - if (parts.size() >= 3) - { - result = parts[0] + parts[1] + parts[2] + suffix; - if (!ValidateVersionString(result)) - result = ""; - } - else - { - result = parts[0] + parts[1] + "0" + suffix; - if (!ValidateVersionString(result)) - result = ""; - } + int digit = atoi(it->c_str()); + result += multiplyer*digit; + multiplyer /= 100; } + if (!ValidateVersion(result)) + result = 0; + return result; } @@ -86,19 +70,19 @@ int32_t OpenCVEngine::GetVersion() String16 OpenCVEngine::GetLibPathByVersion(android::String16 version) { std::string std_version(String8(version).string()); - std::string norm_version; + int norm_version; std::string path; LOGD("OpenCVEngine::GetLibPathByVersion(%s) impl", String8(version).string()); norm_version = NormalizeVersionString(std_version); - if (!norm_version.empty()) + if (0 != norm_version) { path = PackageManager->GetPackagePathByVersion(norm_version, Platform, CpuID); if (path.empty()) { - LOGI("Package OpenCV of version %s is not installed. Try to install it :)", norm_version.c_str()); + LOGI("Package OpenCV of version \"%s\" (%d) is not installed. Try to install it :)", String8(version).string(), norm_version); } else { @@ -107,7 +91,7 @@ String16 OpenCVEngine::GetLibPathByVersion(android::String16 version) } else { - LOGE("OpenCV version \"%s\" (%s) is not supported", String8(version).string(), norm_version.c_str()); + LOGE("OpenCV version \"%s\" (%d) is not supported", String8(version).string(), norm_version); } return String16(path.c_str()); @@ -116,11 +100,11 @@ String16 OpenCVEngine::GetLibPathByVersion(android::String16 version) android::String16 OpenCVEngine::GetLibraryList(android::String16 version) { std::string std_version = String8(version).string(); - std::string norm_version; + int norm_version; String16 result; norm_version = NormalizeVersionString(std_version); - if (!norm_version.empty()) + if (0 != norm_version) { std::string tmp = PackageManager->GetPackagePathByVersion(norm_version, Platform, CpuID); if (!tmp.empty()) @@ -156,12 +140,12 @@ android::String16 OpenCVEngine::GetLibraryList(android::String16 version) } else { - LOGI("Package OpenCV of version %s is not installed. Try to install it :)", norm_version.c_str()); + LOGI("Package OpenCV of version \"%s\" (%d) is not installed. Try to install it :)", std_version.c_str(), norm_version); } } else { - LOGE("OpenCV version \"%s\" is not supported", norm_version.c_str()); + LOGE("OpenCV version \"%s\" is not supported", std_version.c_str()); } return result; @@ -170,21 +154,21 @@ android::String16 OpenCVEngine::GetLibraryList(android::String16 version) bool OpenCVEngine::InstallVersion(android::String16 version) { std::string std_version = String8(version).string(); - std::string norm_version; + int norm_version; bool result = false; LOGD("OpenCVEngine::InstallVersion() begin"); norm_version = NormalizeVersionString(std_version); - if (!norm_version.empty()) + if (0 != norm_version) { LOGD("PackageManager->InstallVersion call"); result = PackageManager->InstallVersion(norm_version, Platform, CpuID); } else { - LOGE("OpenCV version \"%s\" is not supported", norm_version.c_str()); + LOGE("OpenCV version \"%s\" (%d) is not supported", std_version.c_str(), norm_version); } LOGD("OpenCVEngine::InstallVersion() end"); diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.h b/android/service/engine/jni/BinderComponent/OpenCVEngine.h index edb2f8f61..10da157cc 100644 --- a/android/service/engine/jni/BinderComponent/OpenCVEngine.h +++ b/android/service/engine/jni/BinderComponent/OpenCVEngine.h @@ -23,16 +23,15 @@ public: protected: IPackageManager* PackageManager; - static const std::set KnownVersions; + static const int KnownVersions[]; OpenCVEngine(); - static std::set InitKnownOpenCVersions(); - bool ValidateVersionString(const std::string& version); - std::string NormalizeVersionString(std::string version); + bool ValidateVersion(int version); + int NormalizeVersionString(std::string version); bool FixPermissions(const std::string& path); static const int Platform; static const int CpuID; }; -#endif \ No newline at end of file +#endif diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.cpp b/android/service/engine/jni/NativeService/CommonPackageManager.cpp index 164b415a7..9e1ffaa98 100644 --- a/android/service/engine/jni/NativeService/CommonPackageManager.cpp +++ b/android/service/engine/jni/NativeService/CommonPackageManager.cpp @@ -11,22 +11,24 @@ using namespace std; -set CommonPackageManager::GetInstalledVersions() +vector CommonPackageManager::GetInstalledVersions() { - set result; + vector result; vector installed_packages = GetInstalledPackages(); - for (vector::const_iterator it = installed_packages.begin(); it != installed_packages.end(); ++it) + result.resize(installed_packages.size()); + + for (size_t i = 0; i < installed_packages.size(); i++) { - string version = it->GetVersion(); - assert(!version.empty()); - result.insert(version); + int version = installed_packages[i].GetVersion(); + assert(version); + result[i] = version; } return result; } -bool CommonPackageManager::CheckVersionInstalled(const std::string& version, int platform, int cpu_id) +bool CommonPackageManager::CheckVersionInstalled(int version, int platform, int cpu_id) { bool result = false; LOGD("CommonPackageManager::CheckVersionInstalled() begin"); @@ -48,14 +50,14 @@ bool CommonPackageManager::CheckVersionInstalled(const std::string& version, int return result; } -bool CommonPackageManager::InstallVersion(const std::string& version, int platform, int cpu_id) +bool CommonPackageManager::InstallVersion(int version, int platform, int cpu_id) { LOGD("CommonPackageManager::InstallVersion() begin"); PackageInfo package(version, platform, cpu_id); return InstallPackage(package); } -string CommonPackageManager::GetPackagePathByVersion(const std::string& version, int platform, int cpu_id) +string CommonPackageManager::GetPackagePathByVersion(int version, int platform, int cpu_id) { string result; PackageInfo target_package(version, platform, cpu_id); @@ -64,7 +66,7 @@ string CommonPackageManager::GetPackagePathByVersion(const std::string& version, for (vector::iterator it = all_packages.begin(); it != all_packages.end(); ++it) { - LOGD("Check version \"%s\" compatibility with \"%s\"\n", version.c_str(), it->GetVersion().c_str()); + LOGD("Check version \"%d\" compatibility with \"%d\"\n", version, it->GetVersion()); if (IsVersionCompatible(version, it->GetVersion())) { LOGD("Compatible"); @@ -79,7 +81,7 @@ string CommonPackageManager::GetPackagePathByVersion(const std::string& version, if (!packages.empty()) { int OptRating = -1; - std::string OptVersion = ""; + int OptVersion = 0; std::vector >& group = CommonPackageManager::ArmRating; if ((cpu_id & ARCH_X86) || (cpu_id & ARCH_X64)) @@ -124,20 +126,13 @@ string CommonPackageManager::GetPackagePathByVersion(const std::string& version, return result; } -bool CommonPackageManager::IsVersionCompatible(const std::string& target_version, const std::string& package_version) +bool CommonPackageManager::IsVersionCompatible(int target_version, int package_version) { - assert (target_version.size() == 3); - assert (package_version.size() == 3); - - bool result = false; + assert(target_version); + assert(package_version); // major version is the same and minor package version is above or the same as target. - if ((package_version[0] == target_version[0]) && (package_version[1] == target_version[1]) && (package_version[2] >= target_version[2])) - { - result = true; - } - - return result; + return ( (package_version/10000 == target_version/10000) && (package_version%10000 >= target_version%10000) ); } int CommonPackageManager::GetHardwareRating(int platform, int cpu_id, const std::vector >& group) diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.h b/android/service/engine/jni/NativeService/CommonPackageManager.h index 86f31788a..c02eb322e 100644 --- a/android/service/engine/jni/NativeService/CommonPackageManager.h +++ b/android/service/engine/jni/NativeService/CommonPackageManager.h @@ -3,17 +3,16 @@ #include "IPackageManager.h" #include "PackageInfo.h" -#include #include #include class CommonPackageManager: public IPackageManager { public: - std::set GetInstalledVersions(); - bool CheckVersionInstalled(const std::string& version, int platform, int cpu_id); - bool InstallVersion(const std::string& version, int platform, int cpu_id); - std::string GetPackagePathByVersion(const std::string& version, int platform, int cpu_id); + std::vector GetInstalledVersions(); + bool CheckVersionInstalled(int version, int platform, int cpu_id); + bool InstallVersion(int version, int platform, int cpu_id); + std::string GetPackagePathByVersion(int version, int platform, int cpu_id); virtual ~CommonPackageManager(); protected: @@ -23,7 +22,7 @@ protected: static std::vector > InitArmRating(); static std::vector > InitIntelRating(); - bool IsVersionCompatible(const std::string& target_version, const std::string& package_version); + bool IsVersionCompatible(int target_version, int package_version); int GetHardwareRating(int platform, int cpu_id, const std::vector >& group); virtual bool InstallPackage(const PackageInfo& package) = 0; @@ -31,4 +30,4 @@ protected: }; -#endif \ No newline at end of file +#endif diff --git a/android/service/engine/jni/NativeService/PackageInfo.cpp b/android/service/engine/jni/NativeService/PackageInfo.cpp index 396178d5d..7428de01b 100644 --- a/android/service/engine/jni/NativeService/PackageInfo.cpp +++ b/android/service/engine/jni/NativeService/PackageInfo.cpp @@ -124,14 +124,19 @@ inline int SplitIntelFeatures(const vector& features) return result; } -inline string SplitVersion(const vector& features, const string& package_version) +inline int SplitVersion(const vector& features, const string& package_version) { - string result; + int result = 0; if ((features.size() > 1) && ('v' == features[1][0])) { - result = features[1].substr(1); - result += SplitStringVector(package_version, '.')[0]; + // Taking major and minor mart of library version from package name + string tmp1 = features[1].substr(1); + result += atoi(tmp1.substr(0,1).c_str())*1000000 + atoi(tmp1.substr(1,1).c_str())*10000; + + // Taking release and build number from package revision + vector tmp2 = SplitStringVector(package_version, '.'); + result += atoi(tmp2[0].c_str())*100 + atoi(tmp2[1].c_str()); } else { @@ -186,9 +191,9 @@ inline int SplitPlatfrom(const vector& features) * Second part is version. Version starts from "v" symbol. After "v" symbol version nomber without dot symbol added. * If platform is known third part is platform name * If platform is unknown it is defined by hardware capabilities using pattern: __ - * Example: armv7_neon, armv5_vfpv3 + * Example: armv7_neon */ -PackageInfo::PackageInfo(const string& version, int platform, int cpu_id, std::string install_path): +PackageInfo::PackageInfo(int version, int platform, int cpu_id, std::string install_path): Version(version), Platform(platform), CpuID(cpu_id), @@ -198,7 +203,14 @@ InstallPath("") Platform = PLATFORM_UNKNOWN; #endif - FullName = BasePackageName + "_v" + Version.substr(0, Version.size()-1); + int major_version = version/1000000; + int minor_version = version/10000 - major_version*100; + + char tmp[32]; + + sprintf(tmp, "%d%d", major_version, minor_version); + + FullName = BasePackageName + std::string("_v") + std::string(tmp); if (PLATFORM_UNKNOWN != Platform) { FullName += string("_") + JoinPlatform(platform); @@ -296,7 +308,7 @@ InstallPath("") else { LOGD("PackageInfo::PackageInfo: package arch unknown"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; } @@ -304,7 +316,7 @@ InstallPath("") else { LOGD("PackageInfo::PackageInfo: package arch unknown"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; } @@ -371,7 +383,7 @@ InstallPath(install_path) { LOGI("Info library not found in package"); LOGI("OpenCV Manager package does not contain any verison of OpenCV library"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; return; @@ -383,7 +395,7 @@ InstallPath(install_path) if (!features.empty() && (BasePackageName == features[0])) { Version = SplitVersion(features, package_version); - if (Version.empty()) + if (0 == Version) { CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; @@ -410,7 +422,7 @@ InstallPath(install_path) if (features.size() < 3) { LOGD("It is not OpenCV library package for this platform"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; return; @@ -444,7 +456,7 @@ InstallPath(install_path) else { LOGD("It is not OpenCV library package for this platform"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; return; @@ -454,7 +466,7 @@ InstallPath(install_path) else { LOGD("It is not OpenCV library package for this platform"); - Version.clear(); + Version = 0; CpuID = ARCH_UNKNOWN; Platform = PLATFORM_UNKNOWN; return; @@ -463,7 +475,7 @@ InstallPath(install_path) bool PackageInfo::IsValid() const { - return !(Version.empty() && (PLATFORM_UNKNOWN == Platform) && (ARCH_UNKNOWN == CpuID)); + return !((0 == Version) && (PLATFORM_UNKNOWN == Platform) && (ARCH_UNKNOWN == CpuID)); } int PackageInfo::GetPlatform() const @@ -481,7 +493,7 @@ string PackageInfo::GetFullName() const return FullName; } -string PackageInfo::GetVersion() const +int PackageInfo::GetVersion() const { return Version; } @@ -494,4 +506,4 @@ string PackageInfo::GetInstalationPath() const bool PackageInfo::operator==(const PackageInfo& package) const { return (package.FullName == FullName); -} \ No newline at end of file +} diff --git a/android/service/engine/jni/NativeService/PackageInfo.h b/android/service/engine/jni/NativeService/PackageInfo.h index 05d9098d6..b86ef7a92 100644 --- a/android/service/engine/jni/NativeService/PackageInfo.h +++ b/android/service/engine/jni/NativeService/PackageInfo.h @@ -30,10 +30,10 @@ class PackageInfo { public: - PackageInfo(const std::string& version, int platform, int cpu_id, std::string install_path = "/data/data/"); + PackageInfo(int version, int platform, int cpu_id, std::string install_path = "/data/data/"); PackageInfo(const std::string& fullname, const std::string& install_path, std::string package_version = "0.0"); std::string GetFullName() const; - std::string GetVersion() const; + int GetVersion() const; int GetPlatform() const; int GetCpuID() const; std::string GetInstalationPath() const; @@ -43,7 +43,7 @@ public: protected: static std::map InitPlatformNameMap(); - std::string Version; + int Version; int Platform; int CpuID; std::string FullName; @@ -51,4 +51,4 @@ protected: static const std::string BasePackageName; }; -#endif \ No newline at end of file +#endif diff --git a/android/service/engine/jni/include/IPackageManager.h b/android/service/engine/jni/include/IPackageManager.h index d616d3399..1b009ae75 100644 --- a/android/service/engine/jni/include/IPackageManager.h +++ b/android/service/engine/jni/include/IPackageManager.h @@ -1,17 +1,17 @@ #ifndef __IPACKAGE_MANAGER__ #define __IPACKAGE_MANAGER__ -#include +#include #include class IPackageManager { public: - virtual std::set GetInstalledVersions() = 0; - virtual bool CheckVersionInstalled(const std::string& version, int platform, int cpu_id) = 0; - virtual bool InstallVersion(const std::string&, int platform, int cpu_id) = 0; - virtual std::string GetPackagePathByVersion(const std::string&, int platform, int cpu_id) = 0; + virtual std::vector GetInstalledVersions() = 0; + virtual bool CheckVersionInstalled(int version, int platform, int cpu_id) = 0; + virtual bool InstallVersion(int version, int platform, int cpu_id) = 0; + virtual std::string GetPackagePathByVersion(int version, int platform, int cpu_id) = 0; virtual ~IPackageManager(){}; }; -#endif \ No newline at end of file +#endif diff --git a/android/service/engine/res/layout/info.xml b/android/service/engine/res/layout/info.xml index cd2e874a0..c9bd248cf 100644 --- a/android/service/engine/res/layout/info.xml +++ b/android/service/engine/res/layout/info.xml @@ -26,7 +26,7 @@ android:id="@+id/textView1" android:layout_width="wrap_content" android:layout_height="wrap_content" - android:text="Version: " + android:text="Library version: " android:textAppearance="?android:attr/textAppearanceSmall" /> & keypoints1, // draw matches for( size_t m = 0; m < matches1to2.size(); m++ ) { - int i1 = matches1to2[m].queryIdx; - int i2 = matches1to2[m].trainIdx; if( matchesMask.empty() || matchesMask[m] ) { + int i1 = matches1to2[m].queryIdx; + int i2 = matches1to2[m].trainIdx; + CV_Assert(i1 >= 0 && i1 < static_cast(keypoints1.size())); + CV_Assert(i2 >= 0 && i2 < static_cast(keypoints2.size())); + const KeyPoint &kp1 = keypoints1[i1], &kp2 = keypoints2[i2]; _drawMatch( outImg, outImg1, outImg2, kp1, kp2, matchColor, flags ); } diff --git a/modules/gpu/src/ffmpeg_video_source.cpp b/modules/gpu/src/ffmpeg_video_source.cpp index dbbe780d0..bd3d70058 100644 --- a/modules/gpu/src/ffmpeg_video_source.cpp +++ b/modules/gpu/src/ffmpeg_video_source.cpp @@ -64,7 +64,7 @@ namespace { #if defined WIN32 || defined _WIN32 const char* module_name = "opencv_ffmpeg" - CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION) + CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__) "_64" #endif diff --git a/modules/gpu/src/video_writer.cpp b/modules/gpu/src/video_writer.cpp index dd4b8fb03..9b866d6d5 100644 --- a/modules/gpu/src/video_writer.cpp +++ b/modules/gpu/src/video_writer.cpp @@ -767,7 +767,7 @@ namespace { #if defined WIN32 || defined _WIN32 const char* module_name = "opencv_ffmpeg" - CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION) + CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__) "_64" #endif diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp index f9fc75cc4..cc910a332 100644 --- a/modules/highgui/src/cap_ffmpeg.cpp +++ b/modules/highgui/src/cap_ffmpeg.cpp @@ -86,7 +86,7 @@ private: { #if defined WIN32 || defined _WIN32 const char* module_name = "opencv_ffmpeg" - CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION) + CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__) "_64" #endif diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 98e4bc2f7..12fa13fc9 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -21,16 +21,14 @@ PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear, Size from = get<1>(GetParam()); Size to = get<2>(GetParam()); - cv::Mat src(from, matType); - cv::Mat dst(to, matType); - - declare.in(src, WARMUP_RNG).out(dst); + cv::Mat src(from, matType), dst(to, matType); + cvtest::fillGradient(src); + declare.in(src).out(dst); TEST_CYCLE() resize(src, dst, to); - // Test case temporary disabled for Android Platform #ifdef ANDROID - SANITY_CHECK(dst, 255); // TODO: Reimplement check in future versions + SANITY_CHECK(dst, 5); #else SANITY_CHECK(dst, 1 + 1e-6); #endif @@ -50,16 +48,14 @@ PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear, Size from = get<1>(GetParam()); Size to = get<2>(GetParam()); - cv::Mat src(from, matType); - cv::Mat dst(to, matType); - - declare.in(src, WARMUP_RNG).out(dst); + cv::Mat src(from, matType), dst(to, matType); + cvtest::fillGradient(src); + declare.in(src).out(dst); TEST_CYCLE() resize(src, dst, to); - // Test case temporary disabled for Android Platform #ifdef ANDROID - SANITY_CHECK(dst, 255); // TODO: Reimplement check in future versions + SANITY_CHECK(dst, 5); #else SANITY_CHECK(dst, 1 + 1e-6); #endif diff --git a/modules/imgproc/perf/perf_warp.cpp b/modules/imgproc/perf/perf_warp.cpp index 1f2ffb692..f530df12c 100644 --- a/modules/imgproc/perf/perf_warp.cpp +++ b/modules/imgproc/perf/perf_warp.cpp @@ -28,24 +28,23 @@ PERF_TEST_P( TestWarpAffine, WarpAffine, ) ) { - Size sz; + Size sz, szSrc(512, 512); int borderMode, interType; sz = get<0>(GetParam()); interType = get<1>(GetParam()); borderMode = get<2>(GetParam()); + Scalar borderColor = Scalar::all(150); - Mat src, img = imread(getDataPath("cv/shared/fruits.png")); - cvtColor(img, src, COLOR_BGR2RGBA, 4); + Mat src(szSrc,CV_8UC4), dst(sz, CV_8UC4); + cvtest::fillGradient(src); + if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2); - Mat dst(sz, CV_8UC4); - declare.in(src).out(dst); - TEST_CYCLE() warpAffine( src, dst, warpMat, sz, interType, borderMode, Scalar::all(150) ); + TEST_CYCLE() warpAffine( src, dst, warpMat, sz, interType, borderMode, borderColor ); - // Test case temporary disabled for Android Platform #ifdef ANDROID - SANITY_CHECK(dst, 255); // TODO: Reimplement check in future versions + SANITY_CHECK(dst, interType==INTER_LINEAR? 5 : 10); #else SANITY_CHECK(dst, 1); #endif @@ -59,15 +58,16 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective, ) ) { - Size sz; + Size sz, szSrc(512, 512); int borderMode, interType; sz = get<0>(GetParam()); interType = get<1>(GetParam()); borderMode = get<2>(GetParam()); + Scalar borderColor = Scalar::all(150); - - Mat src, img = imread(getDataPath("cv/shared/fruits.png")); - cvtColor(img, src, COLOR_BGR2RGBA, 4); + Mat src(szSrc,CV_8UC4), dst(sz, CV_8UC4); + cvtest::fillGradient(src); + if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); Mat rotMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2); Mat warpMat(3, 3, CV_64FC1); for(int r=0; r<2; r++) @@ -76,13 +76,16 @@ PERF_TEST_P( TestWarpPerspective, WarpPerspective, warpMat.at(2, 0) = .3/sz.width; warpMat.at(2, 1) = .3/sz.height; warpMat.at(2, 2) = 1; - Mat dst(sz, CV_8UC4); declare.in(src).out(dst); - TEST_CYCLE() warpPerspective( src, dst, warpMat, sz, interType, borderMode, Scalar::all(150) ); + TEST_CYCLE() warpPerspective( src, dst, warpMat, sz, interType, borderMode, borderColor ); +#ifdef ANDROID + SANITY_CHECK(dst, interType==INTER_LINEAR? 5 : 10); +#else SANITY_CHECK(dst, 1); +#endif } PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear, @@ -105,24 +108,11 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear, interType = get<1>(GetParam()); borderMode = get<2>(GetParam()); type = get<3>(GetParam()); + Scalar borderColor = Scalar::all(150); - Mat src, img = imread(getDataPath("cv/shared/5MP.png")); - - if( type == CV_8UC1 ) - { - cvtColor(img, src, COLOR_BGR2GRAY, 1); - } - else if( type == CV_8UC4 ) - { - cvtColor(img, src, COLOR_BGR2BGRA, 4); - } - else - { - FAIL(); - } - - resize(src, src, size); - + Mat src(size, type), dst(size, type); + cvtest::fillGradient(src); + if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1); int shift = static_cast(src.cols*0.04); Mat srcVertices = (Mat_(1, 4) << Vec2f(0, 0), Vec2f(static_cast(size.width-1), 0), @@ -134,19 +124,16 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear, Vec2f(static_cast(shift/2), static_cast(size.height-1))); Mat warpMat = getPerspectiveTransform(srcVertices, dstVertices); - Mat dst(size, type); - declare.in(src).out(dst); declare.time(100); TEST_CYCLE() { - warpPerspective( src, dst, warpMat, size, interType, borderMode, Scalar::all(150) ); + warpPerspective( src, dst, warpMat, size, interType, borderMode, borderColor ); } - // Test case temporary disabled for Android Platform #ifdef ANDROID - SANITY_CHECK(dst, 255); // TODO: Reimplement check in future versions + SANITY_CHECK(dst, interType==INTER_LINEAR? 5 : 10); #else SANITY_CHECK(dst, 1); #endif diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index 372bf84d7..400668817 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -1,15 +1,10 @@ # ---------------------------------------------------------------------------- # CMake file for java support # ---------------------------------------------------------------------------- -if(IOS OR NOT PYTHON_EXECUTABLE OR NOT (JNI_FOUND OR (ANDROID AND ANDROID_NATIVE_API_LEVEL GREATER 7))) +if(IOS OR NOT PYTHON_EXECUTABLE OR NOT ANT_EXECUTABLE OR NOT (JNI_FOUND OR (ANDROID AND ANDROID_NATIVE_API_LEVEL GREATER 7))) ocv_module_disable(java) endif() -if(NOT ANDROID) - # disable java by default because java support on desktop is experimental - set(BUILD_opencv_java_INIT OFF) -endif() - set(the_description "The java bindings") ocv_add_module(java BINDINGS opencv_core opencv_imgproc OPTIONAL opencv_objdetect opencv_features2d opencv_video opencv_highgui opencv_ml opencv_calib3d opencv_photo opencv_nonfree opencv_contrib) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp") @@ -18,6 +13,10 @@ if(NOT ANDROID) include_directories(${JNI_INCLUDE_DIRS}) endif() +# output locations +set(JAVA_INSTALL_ROOT "sdk/java") +set(JNI_INSTALL_ROOT "sdk/native") + # get list of modules to wrap string(REPLACE "opencv_" "" OPENCV_JAVA_MODULES "${OPENCV_MODULE_${the_module}_REQ_DEPS};${OPENCV_MODULE_${the_module}_OPT_DEPS}") foreach(module ${OPENCV_JAVA_MODULES}) @@ -26,20 +25,29 @@ foreach(module ${OPENCV_JAVA_MODULES}) endif() endforeach() -set(GEN_JAVA "${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_java.py") -set(HDR_PARSER "${CMAKE_CURRENT_SOURCE_DIR}/../python/src2/hdr_parser.py") -set(GEN_JAVADOC "${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_javadoc.py") -set(RST_PARSER "${CMAKE_CURRENT_SOURCE_DIR}/generator/rst_parser.py") +###################################################################################################################################### -# add dependencies to cmake (we should rerun cmake if any of these scripts is modified) -configure_file("${GEN_JAVA}" "${OpenCV_BINARY_DIR}/junk/gen_java.junk" COPYONLY) -configure_file("${HDR_PARSER}" "${OpenCV_BINARY_DIR}/junk/hdr_parser.junk" COPYONLY) +# scripts +set(scripts_gen_java "${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_java.py") +set(scripts_hdr_parser "${CMAKE_CURRENT_SOURCE_DIR}/../python/src2/hdr_parser.py") +set(scripts_gen_javadoc "${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_javadoc.py") +set(scripts_rst_parser "${CMAKE_CURRENT_SOURCE_DIR}/generator/rst_parser.py") -set(java_hdr_deps "") -set(generated_cpp_sources "") -set(generated_java_sources "") +# handwritten C/C++ and Java sources +file(GLOB handwrittren_h_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.h" "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.hpp") +file(GLOB handwrittren_cpp_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.cpp") +file(GLOB handwrittren_java_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java/*.java") +file(GLOB handwrittren_aidl_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java/*.aidl") +if(NOT ANDROID) + ocv_list_filterout(handwrittren_java_sources "/(engine|android)\\\\+") + ocv_list_filterout(handwrittren_aidl_sources "/(engine|android)\\\\+") +else() + file(GLOB_RECURSE handwrittren_lib_project_files_rel RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/" "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/*") + list(REMOVE_ITEM handwrittren_lib_project_files_rel "${ANDROID_MANIFEST_FILE}") +endif() -# setup raw java and cpp files generation (without javadoc and at temporary location) +# headers of OpenCV modules +set(opencv_public_headers "") foreach(module ${OPENCV_JAVA_MODULES}) # get list of module headers if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/generator/config/${module}.filelist") @@ -48,121 +56,213 @@ foreach(module ${OPENCV_JAVA_MODULES}) else() set(module_headers "${OPENCV_MODULE_opencv_${module}_HEADERS}") endif() - - # C headers must go first - set(module_headers_cpp ${module_headers}) - ocv_list_filterout(module_headers_cpp "\\\\.h$") - if(module_headers_cpp) - list(REMOVE_ITEM module_headers ${module_headers_cpp}) - list(APPEND module_headers ${module_headers_cpp}) - endif() - unset(module_headers_cpp) - - # add dependencies to cmake (we should rerun cmake if any of these headers is modified) - foreach(header ${module_headers}) - get_filename_component(header_name "${header}" NAME_WE) - configure_file("${header}" "${OpenCV_BINARY_DIR}/junk/${header_name}.junk" COPYONLY) - endforeach() - - # first run (to get list of generated files) if(module_headers) - file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/") - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out") - execute_process(COMMAND ${PYTHON_EXECUTABLE} "${GEN_JAVA}" "${HDR_PARSER}" ${module} ${module_headers} - WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out" - OUTPUT_QUIET ERROR_QUIET) - file(GLOB_RECURSE ${module}_generated_java_sources RELATIVE "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/" "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/*.java") - ocv_list_add_prefix(${module}_generated_java_sources "${CMAKE_CURRENT_BINARY_DIR}/") + # C headers must go first + set(module_headers_cpp ${module_headers}) + ocv_list_filterout(module_headers_cpp "\\\\.h$") + if(module_headers_cpp) + list(REMOVE_ITEM module_headers ${module_headers_cpp}) + list(APPEND module_headers ${module_headers_cpp}) + endif() + unset(module_headers_cpp) - # second run (at build time) - add_custom_command(OUTPUT ${${module}_generated_java_sources} "${CMAKE_CURRENT_BINARY_DIR}/${module}.cpp" - COMMAND ${PYTHON_EXECUTABLE} "${GEN_JAVA}" "${HDR_PARSER}" ${module} ${module_headers} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS "${GEN_JAVA}" "${HDR_PARSER}" ${module_headers}) - - list(APPEND java_hdr_deps ${module_headers}) - list(APPEND generated_cpp_sources "${CMAKE_CURRENT_BINARY_DIR}/${module}.cpp") - list(APPEND generated_java_sources ${${module}_generated_java_sources}) + set(opencv_public_headers_${module} ${module_headers}) + list(APPEND opencv_public_headers ${module_headers}) + else() + list(REMOVE_ITEM OPENCV_JAVA_MODULES ${module}) endif() endforeach() -# get handwritten files used for wrappers generation -file(GLOB handwrittren_h_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.h" "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.hpp") -file(GLOB handwrittren_cpp_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/*.cpp") -file(GLOB handwrittren_java_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java/*.java") -file(GLOB handwrittren_aidl_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java/*.aidl") - -# remove handwritten java files for disabled modules -foreach(jfile ${handwrittren_java_sources}) - string(REGEX REPLACE "^.*/([^+]+)\\+.*\\.java$" "\\1" jmodname "${jfile}") - if(DEFINED HAVE_opencv_${jmodname} AND NOT HAVE_opencv_${jmodname}) - list(REMOVE_ITEM handwrittren_java_sources "${jfile}") - endif() +# rst documentation used for javadoc generation +set(javadoc_rst_sources "") +foreach(module ${OPENCV_JAVA_MODULES}) + file(GLOB_RECURSE refman_rst_headers "${OPENCV_MODULE_opencv_${module}_LOCATION}/*.rst") + list(APPEND javadoc_rst_sources ${refman_rst_headers}) endforeach() -# remove VideoCapture wrapper if highgui is disabled -if(NOT HAVE_opencv_highgui) - list(REMOVE_ITEM handwrittren_cpp_sources "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp/VideoCapture.cpp") -endif() +# generated cpp files +set(generated_cpp_sources "") +foreach(module ${OPENCV_JAVA_MODULES}) + list(APPEND generated_cpp_sources "${CMAKE_CURRENT_BINARY_DIR}/${module}.cpp") +endforeach() -# create list of javadoc documented files -unset(documented_java_files) -foreach(java_file ${handwrittren_java_sources} ${generated_java_sources}) +# IMPORTANT: add dependencies to cmake (we should rerun cmake if any of these files is modified) +configure_file("${scripts_gen_java}" "${OpenCV_BINARY_DIR}/junk/gen_java.junk" COPYONLY) +configure_file("${scripts_hdr_parser}" "${OpenCV_BINARY_DIR}/junk/hdr_parser.junk" COPYONLY) +foreach(header ${opencv_public_headers}) + get_filename_component(header_name "${header}" NAME) + configure_file("${header}" "${OpenCV_BINARY_DIR}/junk/${header_name}.junk" COPYONLY) +endforeach() + +# generated java files +set(generated_java_sources "") +foreach(module ${OPENCV_JAVA_MODULES}) + # first run of gen_java.py (to get list of generated files) + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out") + execute_process(COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_java}" "${scripts_hdr_parser}" ${module} ${opencv_public_headers_${module}} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out" + OUTPUT_QUIET ERROR_QUIET) + unset(generated_java_sources_${module}) + file(GLOB_RECURSE generated_java_sources_${module} RELATIVE "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/" "${CMAKE_CURRENT_BINARY_DIR}/gen_java_out/*.java") + ocv_list_add_prefix(generated_java_sources_${module} "${CMAKE_CURRENT_BINARY_DIR}/") + + list(APPEND generated_java_sources ${generated_java_sources_${module}}) +endforeach() + +# generated java files with javadoc +set(documented_java_files "") +foreach(java_file ${generated_java_sources} ${handwrittren_java_sources}) get_filename_component(java_file_name "${java_file}" NAME_WE) list(APPEND documented_java_files "${CMAKE_CURRENT_BINARY_DIR}/${java_file_name}-jdoc.java") endforeach() -# generate javadoc files -file(GLOB_RECURSE refman_rst_headers "${CMAKE_CURRENT_SOURCE_DIR}/../*.rst") -set(java_documented_headers_deps ${handwrittren_java_sources} ${generated_java_sources} ${java_hdr_deps} ${refman_rst_headers} - "${GEN_JAVADOC}" "${RST_PARSER}" "${GEN_JAVA}" "${HDR_PARSER}") +###################################################################################################################################### +# step 1: generate .cpp/.java from OpenCV headers +set(step1_depends "${scripts_gen_java}" "${scripts_hdr_parser}" ${opencv_public_headers}) +foreach(module ${OPENCV_JAVA_MODULES}) + # second run of gen_java.py (at build time) + add_custom_command(OUTPUT ${generated_java_sources_${module}} "${CMAKE_CURRENT_BINARY_DIR}/${module}.cpp" + COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_java}" "${scripts_hdr_parser}" ${module} ${opencv_public_headers_${module}} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS "${scripts_gen_java}" "${scripts_hdr_parser}" ${opencv_public_headers_${module}} + ) +endforeach() + +# step 2: generate javadoc comments +set(step2_depends ${step1_depends} ${scripts_gen_javadoc} ${scripts_rst_parser} ${javadoc_rst_sources} ${generated_java_sources} ${handwrittren_java_sources}) string(REPLACE ";" "," OPENCV_JAVA_MODULES_STR "${OPENCV_JAVA_MODULES}") -add_custom_command( - OUTPUT ${documented_java_files} - COMMAND ${PYTHON_EXECUTABLE} "${GEN_JAVADOC}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2>"${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log" - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${java_documented_headers_deps} -) +add_custom_command(OUTPUT ${documented_java_files} + COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2>"${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${step2_depends} + ) -# copy generated java files to the final location -set(JAVA_OUTPUT_DIR "src/org/opencv") -set(JAVA_INSTALL_ROOT "sdk/java") -set(JNI_INSTALL_ROOT "sdk/native") - -# copy each documented header to the final destination -set(java_files "") -set(source_java_files ${documented_java_files} ${handwrittren_aidl_sources}) -if(NOT ANDROID) - ocv_list_filterout(source_java_files "/(engine|android)\\\\+") -endif() - -foreach(java_file ${source_java_files}) +# step 3: copy files to destination +set(step3_input_files ${documented_java_files} ${handwrittren_aidl_sources}) +set(copied_files "") +foreach(java_file ${step3_input_files}) get_filename_component(java_file_name "${java_file}" NAME) string(REPLACE "-jdoc.java" ".java" java_file_name "${java_file_name}") string(REPLACE "+" "/" java_file_name "${java_file_name}") + set(output_name "${OpenCV_BINARY_DIR}/src/org/opencv/${java_file_name}") - add_custom_command( - OUTPUT "${OpenCV_BINARY_DIR}/${JAVA_OUTPUT_DIR}/${java_file_name}" - COMMAND ${CMAKE_COMMAND} -E copy "${java_file}" "${OpenCV_BINARY_DIR}/${JAVA_OUTPUT_DIR}/${java_file_name}" - MAIN_DEPENDENCY "${java_file}" - DEPENDS ${java_documented_headers_deps} - COMMENT "Generating ${JAVA_OUTPUT_DIR}/${java_file_name}" - ) - list(APPEND java_files "${OpenCV_BINARY_DIR}/${JAVA_OUTPUT_DIR}/${java_file_name}") + add_custom_command(OUTPUT "${output_name}" + COMMAND ${CMAKE_COMMAND} -E copy "${java_file}" "${output_name}" + MAIN_DEPENDENCY "${java_file}" + DEPENDS ${step2_depends} + COMMENT "Generating src/org/opencv/${java_file_name}" + ) + list(APPEND copied_files "${output_name}") if(ANDROID) get_filename_component(install_subdir "${java_file_name}" PATH) - install(FILES "${OpenCV_BINARY_DIR}/${JAVA_OUTPUT_DIR}/${java_file_name}" DESTINATION ${JAVA_INSTALL_ROOT}/${JAVA_OUTPUT_DIR}/${install_subdir} COMPONENT main) + install(FILES "${output_name}" DESTINATION "${JAVA_INSTALL_ROOT}/src/org/opencv/${install_subdir}" COMPONENT main) endif() endforeach() -# custom target for java API -set(api_target ${the_module}_api) -add_custom_target(${api_target} DEPENDS ${java_files} ${documented_java_files} ${java_documented_headers_deps}) +if(ANDROID) + set(android_copied_files "") + set(android_step3_input_files "") + foreach(file ${handwrittren_lib_project_files_rel}) + add_custom_command(OUTPUT "${OpenCV_BINARY_DIR}/${file}" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${file}" "${OpenCV_BINARY_DIR}/${file}" + MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${file}" + COMMENT "Generating ${file}" + ) + list(APPEND android_copied_files "${OpenCV_BINARY_DIR}/${file}") + list(APPEND android_step3_input_files "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${file}") -# add opencv_java library -add_library(${the_module} SHARED ${handwrittren_h_sources} ${handwrittren_cpp_sources} ${generated_cpp_sources}) + if(NOT file MATCHES "jni/.+") + get_filename_component(install_subdir "${file}" PATH) + install(FILES "${OpenCV_BINARY_DIR}/${file}" DESTINATION "${JAVA_INSTALL_ROOT}/${install_subdir}" COMPONENT main) + endif() + endforeach() + + # library project jni sources (nothing really depends on them so we will not add them to step3_input_files) + foreach(jni_file ${handwrittren_cpp_sources} ${handwrittren_h_sources} ${generated_cpp_sources}) + get_filename_component(jni_file_name "${jni_file}" NAME) + add_custom_command(OUTPUT "${OpenCV_BINARY_DIR}/jni/${jni_file_name}" + COMMAND ${CMAKE_COMMAND} -E copy "${jni_file}" "${OpenCV_BINARY_DIR}/jni/${jni_file_name}" + DEPENDS "${jni_file}" ${java_hdr_deps} + COMMENT "Generating jni/${jni_file_name}" + ) + list(APPEND android_copied_files "${OpenCV_BINARY_DIR}/jni/${jni_file_name}") + endforeach() +endif(ANDROID) + +# step 3.5: generate Android library project +if(ANDROID AND ANDROID_EXECUTABLE) + set(lib_target_files ${ANDROID_LIB_PROJECT_FILES}) + ocv_list_add_prefix(lib_target_files "${OpenCV_BINARY_DIR}/") + + android_get_compatible_target(lib_target_sdk_target ${ANDROID_NATIVE_API_LEVEL} ${ANDROID_SDK_TARGET} 11) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${ANDROID_MANIFEST_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") + + add_custom_command(OUTPUT ${lib_target_files} "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" + COMMAND ${CMAKE_COMMAND} -E remove ${lib_target_files} + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" + COMMAND ${ANDROID_EXECUTABLE} --silent create lib-project --path \"${OpenCV_BINARY_DIR}\" --target \"${lib_target_sdk_target}\" --name OpenCV --package org.opencv 2>\"${CMAKE_CURRENT_BINARY_DIR}/create_lib_project.log\" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" + MAIN_DEPENDENCY "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" + DEPENDS ${android_step3_input_files} ${android_copied_files} + COMMENT "Generating OpenCV Android library project. SDK target: ${lib_target_sdk_target}" + ) + list(APPEND copied_files ${lib_target_files} "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") + list(APPEND step3_input_files "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") + + install(FILES "${OpenCV_BINARY_DIR}/${ANDROID_PROJECT_PROPERTIES_FILE}" DESTINATION ${JAVA_INSTALL_ROOT} COMPONENT main) + install(FILES "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" DESTINATION ${JAVA_INSTALL_ROOT} COMPONENT main) + # creating empty 'gen' and 'res' folders + install(CODE "MAKE_DIRECTORY(\"\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${JAVA_INSTALL_ROOT}/gen\")" COMPONENT main) + install(CODE "MAKE_DIRECTORY(\"\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${JAVA_INSTALL_ROOT}/res\")" COMPONENT main) +endif(ANDROID AND ANDROID_EXECUTABLE) + +set(step3_depends ${step2_depends} ${step3_input_files} ${copied_files}) + +# step 4: build jar +if(ANDROID) + set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/classes.jar") + if(ANDROID_TOOLS_Pkg_Revision GREATER 13) + # build the library project + # normally we should do this after a native part, but for a library project we can build the java part first + add_custom_command(OUTPUT "${JAR_FILE}" "${JAR_FILE}.dephelper" + COMMAND ${ANT_EXECUTABLE} -q -noinput -k debug + COMMAND ${CMAKE_COMMAND} -E touch "${JAR_FILE}.dephelper" # can not rely on classes.jar because different versions of SDK update timestamp at different times + WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" + DEPENDS ${step3_depends} + COMMENT "Building OpenCV Android library project" + ) + else() + # ditto + add_custom_command(OUTPUT "${JAR_FILE}" "${JAR_FILE}.dephelper" + COMMAND ${CMAKE_COMMAND} -E touch "${JAR_FILE}" + COMMAND ${CMAKE_COMMAND} -E touch "${JAR_FILE}.dephelper" + WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" + DEPENDS ${step3_depends} + COMMENT "" + ) + endif() +else(ANDROID) + set(JAR_NAME opencv-${OPENCV_VERSION}.jar) + set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY) + list(APPEND step3_depends "${OpenCV_BINARY_DIR}/build.xml") + + add_custom_command(OUTPUT "${JAR_FILE}" "${JAR_FILE}.dephelper" + COMMAND ${ANT_EXECUTABLE} -q -noinput -k jar + COMMAND ${CMAKE_COMMAND} -E touch "${JAR_FILE}.dephelper" + WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" + DEPENDS ${step3_depends} + COMMENT "Generating ${JAR_NAME}" + ) +endif(ANDROID) + +# step 5: build native part +add_library(${the_module} SHARED ${handwrittren_h_sources} ${handwrittren_cpp_sources} ${generated_cpp_sources} + ${copied_files} + "${JAR_FILE}" "${JAR_FILE}.dephelper") if(BUILD_FAT_JAVA_LIB) set(__deps ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULES_BUILD}) list(REMOVE_ITEM __deps ${the_module} opencv_ts) @@ -176,22 +276,7 @@ if(BUILD_FAT_JAVA_LIB) else() target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS}) endif() -add_dependencies(${the_module} ${api_target}) -# Additional target properties -set_target_properties(${the_module} PROPERTIES - OUTPUT_NAME "${the_module}" - ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH} - RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH} - INSTALL_NAME_DIR ${OPENCV_LIB_INSTALL_PATH} - LINK_INTERFACE_LIBRARIES "" - ) - -install(TARGETS ${the_module} - LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main - ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) - -set(lib_target ${the_module}_library) if(ANDROID) target_link_libraries(${the_module} jnigraphics) # for Mat <=> Bitmap converters @@ -202,105 +287,37 @@ if(ANDROID) if ( NOT (CMAKE_BUILD_TYPE MATCHES "Debug")) add_custom_command(TARGET ${the_module} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-unneeded "${__opencv_java_location}") endif() +endif() - set(lib_proj_files "") - - # manifest, jni, Eclipse project - file(GLOB_RECURSE android_lib_project_files RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/" "${CMAKE_CURRENT_SOURCE_DIR}/android_lib/*") - list(REMOVE_ITEM android_lib_project_files "${ANDROID_MANIFEST_FILE}") - foreach(f ${android_lib_project_files}) - if(NOT f MATCHES "\\.svn") - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${f}" "${OpenCV_BINARY_DIR}/${f}") - list(APPEND lib_proj_files "${OpenCV_BINARY_DIR}/${f}") - - if(NOT f MATCHES "jni/.+") - get_filename_component(install_subdir "${f}" PATH) - install(FILES "${OpenCV_BINARY_DIR}/${f}" DESTINATION "${JAVA_INSTALL_ROOT}/${install_subdir}" COMPONENT main) - endif() - endif() - endforeach() - - # library project jni sources - foreach(jni_file ${handwrittren_cpp_sources} ${handwrittren_h_sources} ${generated_cpp_sources}) - get_filename_component(jni_file_name "${jni_file}" NAME) - add_custom_command( - OUTPUT "${OpenCV_BINARY_DIR}/jni/${jni_file_name}" - COMMAND ${CMAKE_COMMAND} -E copy "${jni_file}" "${OpenCV_BINARY_DIR}/jni/${jni_file_name}" - DEPENDS "${jni_file}" ${java_hdr_deps} - COMMENT "Generating jni/${jni_file_name}" - ) - list(APPEND lib_proj_files "${OpenCV_BINARY_DIR}/jni/${jni_file_name}") - endforeach() - - # create Android library project in build folder - if(ANDROID_EXECUTABLE) - set(lib_target_files ${ANDROID_LIB_PROJECT_FILES}) - ocv_list_add_prefix(lib_target_files "${OpenCV_BINARY_DIR}/") - - android_get_compatible_target(lib_target_sdk_target ${ANDROID_NATIVE_API_LEVEL} ${ANDROID_SDK_TARGET} 11) - - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/android_lib/${ANDROID_MANIFEST_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") - - add_custom_command( - OUTPUT ${lib_target_files} "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" - COMMAND ${CMAKE_COMMAND} -E remove ${lib_target_files} - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" - COMMAND ${ANDROID_EXECUTABLE} --silent create lib-project --path \"${OpenCV_BINARY_DIR}\" --target \"${lib_target_sdk_target}\" --name OpenCV --package org.opencv 2>\"${CMAKE_CURRENT_BINARY_DIR}/create_lib_project.log\" - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" - MAIN_DEPENDENCY "${CMAKE_CURRENT_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" - DEPENDS ${lib_proj_files} - COMMENT "Generating OpenCV Android library project. SDK target: ${lib_target_sdk_target}" - ) - - install(FILES "${OpenCV_BINARY_DIR}/${ANDROID_PROJECT_PROPERTIES_FILE}" DESTINATION ${JAVA_INSTALL_ROOT} COMPONENT main) - install(FILES "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}" DESTINATION ${JAVA_INSTALL_ROOT} COMPONENT main) - # creating empty 'gen' and 'res' folders - install(CODE "MAKE_DIRECTORY(\"\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/sdk/java/gen\")" COMPONENT main) - install(CODE "MAKE_DIRECTORY(\"\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/sdk/java/res\")" COMPONENT main) - - if(ANT_EXECUTABLE AND ANDROID_TOOLS_Pkg_Revision GREATER 13) - # build the library project - # normally we should do this after a native part, but for a library project we can build the java part first - add_custom_command( - OUTPUT "${OpenCV_BINARY_DIR}/bin/classes.jar" "${OpenCV_BINARY_DIR}/bin/.classes.jar.dephelper" - COMMAND ${ANT_EXECUTABLE} -q -noinput -k debug - COMMAND ${CMAKE_COMMAND} -E touch "${OpenCV_BINARY_DIR}/bin/.classes.jar.dephelper" # can not rely on classes.jar because different versions of SDK update timestamp at different times - WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" - DEPENDS ${lib_proj_files} ${lib_target_files} ${java_files} - COMMENT "Building OpenCV Android library project" - ) - #install(FILES "${OpenCV_BINARY_DIR}/bin/classes.jar" "${OpenCV_BINARY_DIR}/bin/jarlist.cache" "${OpenCV_BINARY_DIR}/bin/build.prop" DESTINATION bin COMPONENT main) - #install(DIRECTORY "${OpenCV_BINARY_DIR}/bin/res" "${OpenCV_BINARY_DIR}/bin/classes" DESTINATION bin COMPONENT main) - list(APPEND lib_target_files "${OpenCV_BINARY_DIR}/bin/classes.jar") - endif() - - add_custom_target(${lib_target} SOURCES ${lib_proj_files} ${lib_target_files} "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") - endif() -else(ANDROID) - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/jar/build.xml" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY) - set(JAR_NAME opencv-${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}.jar) - - add_custom_command( - OUTPUT "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}" "${OpenCV_BINARY_DIR}/bin/.${JAR_NAME}.dephelper" - COMMAND ${ANT_EXECUTABLE} -q -noinput -k jar - COMMAND ${CMAKE_COMMAND} -E touch "${OpenCV_BINARY_DIR}/bin/.${JAR_NAME}.dephelper" - WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" - DEPENDS "${OpenCV_BINARY_DIR}/build.xml" ${java_files} - COMMENT "Generating ${JAR_NAME}" +# Additional target properties +set_target_properties(${the_module} PROPERTIES + OUTPUT_NAME "${the_module}" + ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH} + RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH} + INSTALL_NAME_DIR ${OPENCV_LIB_INSTALL_PATH} + LINK_INTERFACE_LIBRARIES "" ) - add_custom_target(${lib_target} SOURCES "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}") -endif(ANDROID) - -add_dependencies(${lib_target} ${api_target}) -add_dependencies(${the_module} ${lib_target}) - -# android test project -if(ANDROID AND BUILD_TESTS) - add_subdirectory(android_test) +if(ANDROID) + set_target_properties(${the_module} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}) +else() + set_target_properties(${the_module} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}) endif() -# Desktop Java test project. -if((NOT ANDROID) AND BUILD_TESTS) - add_subdirectory(java_test) +if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_module} PROPERTIES FOLDER "bindings") +endif() + +install(TARGETS ${the_module} + LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main + ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) + +###################################################################################################################################### + +if(BUILD_TESTS) + if(ANDROID) + add_subdirectory(android_test) + else() + add_subdirectory(test) + endif() endif() diff --git a/modules/java/android_lib/.project b/modules/java/android_lib/.project index 174e529f5..44f6529bd 100644 --- a/modules/java/android_lib/.project +++ b/modules/java/android_lib/.project @@ -1,6 +1,6 @@ - OpenCV Library - @OPENCV_VERSION_MAJOR@.@OPENCV_VERSION_MINOR@.@OPENCV_VERSION_PATCH@ + OpenCV Library - @OPENCV_VERSION@ diff --git a/modules/java/android_lib/AndroidManifest.xml b/modules/java/android_lib/AndroidManifest.xml index e4567ae02..f71b84d03 100644 --- a/modules/java/android_lib/AndroidManifest.xml +++ b/modules/java/android_lib/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="@OPENCV_VERSION_MAJOR@@OPENCV_VERSION_MINOR@@OPENCV_VERSION_PATCH@@OPENCV_VERSION_TWEAK@" + android:versionName="@OPENCV_VERSION@"> diff --git a/modules/java/android_test/CMakeLists.txt b/modules/java/android_test/CMakeLists.txt index f6e5a7be9..06ebf4aa1 100644 --- a/modules/java/android_test/CMakeLists.txt +++ b/modules/java/android_test/CMakeLists.txt @@ -47,7 +47,7 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E touch "${opencv_test_java_bin_dir}/bin/OpenCVTest-debug.apk" # needed because ant does not update the timestamp of updated apk WORKING_DIRECTORY "${opencv_test_java_bin_dir}" MAIN_DEPENDENCY "${opencv_test_java_bin_dir}/${ANDROID_MANIFEST_FILE}" - DEPENDS "${OpenCV_BINARY_DIR}/bin/.classes.jar.dephelper" opencv_java + DEPENDS "${JAR_FILE}.dephelper" opencv_java DEPENDS ${opencv_test_java_file_deps}) add_custom_target(${PROJECT_NAME} ALL SOURCES "${opencv_test_java_bin_dir}/bin/OpenCVTest-debug.apk" ) diff --git a/modules/java/jar/build.xml b/modules/java/build.xml.in similarity index 84% rename from modules/java/jar/build.xml rename to modules/java/build.xml.in index 71c1b1fef..98ba2e36b 100644 --- a/modules/java/jar/build.xml +++ b/modules/java/build.xml.in @@ -10,6 +10,6 @@ - + \ No newline at end of file diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index 429360cd2..0f3ba1d33 100755 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -871,22 +871,9 @@ public class %(jc)s { // This file is auto-generated, please don't edit! // -#include - -#include "converters.h" - -#if defined DEBUG && defined ANDROID -# include -# define MODULE_LOG_TAG "OpenCV.%(m)s" -# define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, MODULE_LOG_TAG, __VA_ARGS__)) -#else //DEBUG -# define LOGD(...) -#endif //DEBUG - -#ifdef _MSC_VER -# pragma warning(disable:4800 4244) -#endif +#define LOG_TAG "org.opencv.%(m)s" +#include "common.h" #include "opencv2/%(m)s/%(m)s.hpp" using namespace cv; diff --git a/modules/java/generator/src/cpp/Mat.cpp b/modules/java/generator/src/cpp/Mat.cpp index 3831e188a..726dcaf5f 100644 --- a/modules/java/generator/src/cpp/Mat.cpp +++ b/modules/java/generator/src/cpp/Mat.cpp @@ -1,27 +1,6 @@ -#include - -#include "converters.h" - -#ifdef ANDROID - -#include #define LOG_TAG "org.opencv.core.Mat" -#define LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)) -#ifdef DEBUG -#define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)) -#else //!DEBUG -#define LOGD(...) -#endif //DEBUG - -#else -#define LOGE(...) -#define LOGD(...) -#endif - -#ifdef _MSC_VER -# pragma warning(disable:4800) -#endif +#include "common.h" #include "opencv2/core/core.hpp" using namespace cv; diff --git a/modules/java/generator/src/cpp/VideoCapture.cpp b/modules/java/generator/src/cpp/VideoCapture.cpp index 48dc151b0..5b9266660 100644 --- a/modules/java/generator/src/cpp/VideoCapture.cpp +++ b/modules/java/generator/src/cpp/VideoCapture.cpp @@ -1,12 +1,5 @@ -#include - -#if defined DEBUG && defined ANDROID -#include -#define MODULE_LOG_TAG "OpenCV.highgui" -#define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, MODULE_LOG_TAG, __VA_ARGS__)) -#else -#define LOGD(...) -#endif +#define LOG_TAG "org.opencv.highgui.VideoCapture" +#include "common.h" #include "opencv2/opencv_modules.hpp" #ifdef HAVE_OPENCV_HIGHGUI diff --git a/modules/java/generator/src/cpp/common.h b/modules/java/generator/src/cpp/common.h new file mode 100644 index 000000000..b67f633f7 --- /dev/null +++ b/modules/java/generator/src/cpp/common.h @@ -0,0 +1,33 @@ +#ifndef __JAVA_COMMON_H__ +#define __JAVA_COMMON_H__ + +#if !defined(__ppc__) +// to suppress warning from jni.h on OS X +# define TARGET_RT_MAC_CFM 0 +#endif +#include + +#ifdef __ANDROID__ +# include +# define LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)) +# ifdef DEBUG +# define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)) +# else +# define LOGD(...) +# endif +#else +# define LOGE(...) +# define LOGD(...) +#endif + +#include "converters.h" + +#include "core_manual.hpp" +#include "features2d_manual.hpp" + + +#ifdef _MSC_VER +# pragma warning(disable:4800 4244) +#endif + +#endif //__JAVA_COMMON_H__ \ No newline at end of file diff --git a/modules/java/generator/src/cpp/converters.cpp b/modules/java/generator/src/cpp/converters.cpp index 6bf52c00e..9acf318db 100644 --- a/modules/java/generator/src/cpp/converters.cpp +++ b/modules/java/generator/src/cpp/converters.cpp @@ -1,12 +1,5 @@ -#include "converters.h" - -#if defined DEBUG && defined ANDROID -#include -#define MODULE_LOG_TAG "OpenCV.converters" -#define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, MODULE_LOG_TAG, __VA_ARGS__)) -#else //DEBUG -#define LOGD(...) -#endif //DEBUG +#define LOG_TAG "org.opencv.utils.Converters" +#include "common.h" using namespace cv; diff --git a/modules/java/generator/src/cpp/converters.h b/modules/java/generator/src/cpp/converters.h index dde73bc6a..bdffe9269 100644 --- a/modules/java/generator/src/cpp/converters.h +++ b/modules/java/generator/src/cpp/converters.h @@ -1,5 +1,3 @@ -#include - #include "opencv2/opencv_modules.hpp" #include "opencv2/core/core.hpp" #include "features2d_manual.hpp" diff --git a/modules/java/generator/src/cpp/core_manual.cpp b/modules/java/generator/src/cpp/core_manual.cpp new file mode 100644 index 000000000..d7ba5b994 --- /dev/null +++ b/modules/java/generator/src/cpp/core_manual.cpp @@ -0,0 +1,15 @@ +#define LOG_TAG "org.opencv.core.Core" +#include "common.h" + +static int quietCallback( int, const char*, const char*, const char*, int, void* ) +{ + return 0; +} + +void cv::setErrorVerbosity(bool verbose) +{ + if(verbose) + cv::redirectError(0); + else + cv::redirectError((cv::ErrorCallback)quietCallback); +} \ No newline at end of file diff --git a/modules/java/generator/src/cpp/core_manual.hpp b/modules/java/generator/src/cpp/core_manual.hpp index 28d29e0a1..a2fc62782 100644 --- a/modules/java/generator/src/cpp/core_manual.hpp +++ b/modules/java/generator/src/cpp/core_manual.hpp @@ -2,6 +2,13 @@ #include "opencv2/core/core.hpp" +namespace cv +{ + +CV_EXPORTS_W void setErrorVerbosity(bool verbose); + +} + #if 0 namespace cv diff --git a/modules/java/generator/src/cpp/jni_part.cpp b/modules/java/generator/src/cpp/jni_part.cpp index b0315a287..df1bd142c 100644 --- a/modules/java/generator/src/cpp/jni_part.cpp +++ b/modules/java/generator/src/cpp/jni_part.cpp @@ -1,4 +1,4 @@ -#include +#include "common.h" #include "opencv2/opencv_modules.hpp" diff --git a/modules/java/generator/src/cpp/utils.cpp b/modules/java/generator/src/cpp/utils.cpp index 03a2bbdc2..9f6b1bfcf 100644 --- a/modules/java/generator/src/cpp/utils.cpp +++ b/modules/java/generator/src/cpp/utils.cpp @@ -1,24 +1,14 @@ -#include +#define LOG_TAG "org.opencv.android.Utils" +#include "common.h" #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" -#ifdef ANDROID - +#ifdef __ANDROID__ #include -#include -#define LOG_TAG "org.opencv.android.Utils" -#define LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)) -#ifdef DEBUG -#define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)) -#else //!DEBUG -#define LOGD(...) -#endif //DEBUG - using namespace cv; - extern "C" { /* @@ -168,4 +158,4 @@ JNIEXPORT void JNICALL Java_org_opencv_android_Utils_nMatToBitmap } // extern "C" -#endif //ANDROID \ No newline at end of file +#endif //__ANDROID__ \ No newline at end of file diff --git a/modules/java/java_test/CMakeLists.txt b/modules/java/java_test/CMakeLists.txt deleted file mode 100644 index b2c8d10ae..000000000 --- a/modules/java/java_test/CMakeLists.txt +++ /dev/null @@ -1,80 +0,0 @@ -ocv_check_dependencies(opencv_java ${OPENCV_MODULE_opencv_java_OPT_DEPS} ${OPENCV_MODULE_opencv_java_REQ_DEPS}) - -if(NOT OCV_DEPENDENCIES_FOUND OR NOT ANT_EXECUTABLE) - return() -endif() - -# TODO: This has the same name as the Android test project. That project should -# probably be renamed. -project(opencv_test_java) - -set(opencv_test_java_bin_dir "${CMAKE_CURRENT_BINARY_DIR}/.build") - -set(android_source_dir "${CMAKE_CURRENT_SOURCE_DIR}/../android_test") - -set(java_source_dir ${CMAKE_CURRENT_SOURCE_DIR}) - -# get project sources -file(GLOB_RECURSE opencv_test_java_files RELATIVE "${android_source_dir}" "${android_source_dir}/res/*" "${android_source_dir}/src/*") -ocv_list_filterout(opencv_test_java_files ".svn") -ocv_list_filterout(opencv_test_java_files ".*#.*") -# These are the files that need to be updated for pure Java. -ocv_list_filterout(opencv_test_java_files ".*OpenCVTestCase.*") -ocv_list_filterout(opencv_test_java_files ".*OpenCVTestRunner.*") -# These files aren't for desktop Java. -ocv_list_filterout(opencv_test_java_files ".*android.*") - -# These are files updated for pure Java. -file(GLOB_RECURSE modified_files RELATIVE "${java_source_dir}" "${java_source_dir}/src/*") -ocv_list_filterout(modified_files ".svn") -ocv_list_filterout(modified_files ".*#.*") - -# These are extra jars needed to run the tests. -file(GLOB_RECURSE lib_files RELATIVE "${java_source_dir}" "${java_source_dir}/lib/*") -ocv_list_filterout(lib_files ".svn") -ocv_list_filterout(lib_files ".*#.*") - -# copy sources out from the build tree -set(opencv_test_java_file_deps "") -foreach(f ${opencv_test_java_files}) - add_custom_command( - OUTPUT "${opencv_test_java_bin_dir}/${f}" - COMMAND ${CMAKE_COMMAND} -E copy "${android_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}" - MAIN_DEPENDENCY "${android_source_dir}/${f}" - COMMENT "Copying ${f}") - list(APPEND opencv_test_java_file_deps "${android_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}") -endforeach() - -# Overwrite select Android sources with Java-specific sources. -# Also, copy over the libs we'll need for testing. -foreach(f ${modified_files} ${lib_files}) - add_custom_command( - OUTPUT "${opencv_test_java_bin_dir}/${f}" - COMMAND ${CMAKE_COMMAND} -E copy "${java_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}" - MAIN_DEPENDENCY "${java_source_dir}/${f}" - COMMENT "Copying ${f}") - list(APPEND opencv_test_java_file_deps "${java_source_dir}/${f}") -endforeach() - -# Copy the OpenCV jar after it has been generated. -add_custom_command( - OUTPUT "${opencv_test_java_bin_dir}/bin/${JAR_NAME}" - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/bin/${JAR_NAME}" "${opencv_test_java_bin_dir}/bin/${JAR_NAME}" - COMMENT "Copying the OpenCV jar") -add_custom_target(copy_opencv_jar ALL SOURCES "${opencv_test_java_bin_dir}/bin/${JAR_NAME}") -# ${the_module} is the target for the Java jar. -add_dependencies(copy_opencv_jar ${the_module}) - -# Copy the ant build file. -file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/build.xml" DESTINATION "${opencv_test_java_bin_dir}") - -# Create a script for running the Java tests and place it in build/bin. -if(WIN32) - file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java.cmd" "cd ${opencv_test_java_bin_dir}\nset PATH=${EXECUTABLE_OUTPUT_PATH}/Release;%PATH%\nant -DjavaLibraryPath=${EXECUTABLE_OUTPUT_PATH}/Release buildAndTest") - file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java_D.cmd" "cd ${opencv_test_java_bin_dir}\nset PATH=${EXECUTABLE_OUTPUT_PATH}/Debug;%PATH%\nant -DjavaLibraryPath=${EXECUTABLE_OUTPUT_PATH}/Debug buildAndTest") -else() - file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java.sh" "cd ${opencv_test_java_bin_dir};\nant -DjavaLibraryPath=${LIBRARY_OUTPUT_PATH} buildAndTest;\ncd -") -endif() - -add_custom_target(${PROJECT_NAME} ALL SOURCES ${opencv_test_java_file_deps}) -add_dependencies(opencv_tests ${PROJECT_NAME}) diff --git a/modules/java/test/CMakeLists.txt b/modules/java/test/CMakeLists.txt new file mode 100644 index 000000000..8f3021991 --- /dev/null +++ b/modules/java/test/CMakeLists.txt @@ -0,0 +1,77 @@ +ocv_check_dependencies(opencv_java ${OPENCV_MODULE_opencv_java_OPT_DEPS} ${OPENCV_MODULE_opencv_java_REQ_DEPS}) + +if(NOT OCV_DEPENDENCIES_FOUND) + return() +endif() + +project(opencv_test_java) + +set(opencv_test_java_bin_dir "${CMAKE_CURRENT_BINARY_DIR}/.build") +set(android_source_dir "${CMAKE_CURRENT_SOURCE_DIR}/../android_test") +set(java_source_dir ${CMAKE_CURRENT_SOURCE_DIR}) + +# get project sources +file(GLOB_RECURSE opencv_test_java_files RELATIVE "${android_source_dir}" "${android_source_dir}/res/*" "${android_source_dir}/src/*.java") +# These are the files that need to be updated for pure Java. +ocv_list_filterout(opencv_test_java_files "OpenCVTest(Case|Runner).java") +# These files aren't for desktop Java. +ocv_list_filterout(opencv_test_java_files "/android/") + +# These are files updated for pure Java. +file(GLOB_RECURSE modified_files RELATIVE "${java_source_dir}" "${java_source_dir}/src/*") + +# These are extra jars needed to run the tests. +file(GLOB_RECURSE lib_files RELATIVE "${java_source_dir}" "${java_source_dir}/lib/*.jar") + +# copy sources out from the build tree +set(opencv_test_java_file_deps "") +foreach(f ${opencv_test_java_files}) + add_custom_command(OUTPUT "${opencv_test_java_bin_dir}/${f}" + COMMAND ${CMAKE_COMMAND} -E copy "${android_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}" + DEPENDS "${android_source_dir}/${f}" + COMMENT "Copying ${f}" + ) + list(APPEND opencv_test_java_file_deps "${android_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}") +endforeach() + +# Overwrite select Android sources with Java-specific sources. +# Also, copy over the libs we'll need for testing. +foreach(f ${modified_files} ${lib_files}) + add_custom_command(OUTPUT "${opencv_test_java_bin_dir}/${f}" + COMMAND ${CMAKE_COMMAND} -E copy "${java_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}" + DEPENDS "${java_source_dir}/${f}" + COMMENT "Copying ${f}" + ) + list(APPEND opencv_test_java_file_deps "${java_source_dir}/${f}" "${opencv_test_java_bin_dir}/${f}") +endforeach() + +# Copy the OpenCV jar after it has been generated. +add_custom_command(OUTPUT "${opencv_test_java_bin_dir}/bin/${JAR_NAME}" + COMMAND ${CMAKE_COMMAND} -E copy "${JAR_FILE}" "${opencv_test_java_bin_dir}/bin/${JAR_NAME}" + DEPENDS "${JAR_FILE}" + COMMENT "Copying the OpenCV jar" + ) + +add_custom_command(OUTPUT "${opencv_test_java_bin_dir}/build.xml" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/build.xml" "${opencv_test_java_bin_dir}/build.xml" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build.xml" + COMMENT "Copying build.xml" + ) + +# Create a script for running the Java tests and place it in build/bin. +#if(WIN32) + #file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java.cmd" "cd ${opencv_test_java_bin_dir}\nset PATH=${EXECUTABLE_OUTPUT_PATH}/Release;%PATH%\nant -DjavaLibraryPath=${EXECUTABLE_OUTPUT_PATH}/Release buildAndTest") + #file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java_D.cmd" "cd ${opencv_test_java_bin_dir}\nset PATH=${EXECUTABLE_OUTPUT_PATH}/Debug;%PATH%\nant -DjavaLibraryPath=${EXECUTABLE_OUTPUT_PATH}/Debug buildAndTest") +#else() + #file(WRITE "${CMAKE_BINARY_DIR}/bin/opencv_test_java.sh" "cd ${opencv_test_java_bin_dir};\nant -DjavaLibraryPath=${LIBRARY_OUTPUT_PATH} buildAndTest;\ncd -") +#endif() + +add_custom_command(OUTPUT "${opencv_test_java_bin_dir}/build/jar/opencv-test.jar" + COMMAND "${ANT_EXECUTABLE}" build + WORKING_DIRECTORY "${opencv_test_java_bin_dir}" + DEPENDS ${opencv_test_java_file_deps} "${opencv_test_java_bin_dir}/build.xml" "${CMAKE_CURRENT_SOURCE_DIR}/build.xml" "${JAR_FILE}" "${opencv_test_java_bin_dir}/bin/${JAR_NAME}" + COMMENT "Build Java tests" + ) + +add_custom_target(${PROJECT_NAME} ALL SOURCES "${opencv_test_java_bin_dir}/build/jar/opencv-test.jar") +add_dependencies(${PROJECT_NAME} ${the_module}) diff --git a/modules/java/java_test/build.xml b/modules/java/test/build.xml similarity index 65% rename from modules/java/java_test/build.xml rename to modules/java/test/build.xml index 5843f7438..d91118f0a 100644 --- a/modules/java/java_test/build.xml +++ b/modules/java/test/build.xml @@ -16,7 +16,7 @@ - + @@ -26,15 +26,16 @@ - + - + + @@ -43,13 +44,18 @@ - - - + + + + + + + + diff --git a/modules/java/java_test/lib/junit-4.11.jar b/modules/java/test/lib/junit-4.11.jar similarity index 100% rename from modules/java/java_test/lib/junit-4.11.jar rename to modules/java/test/lib/junit-4.11.jar diff --git a/modules/java/java_test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/src/org/opencv/test/OpenCVTestCase.java similarity index 94% rename from modules/java/java_test/src/org/opencv/test/OpenCVTestCase.java rename to modules/java/test/src/org/opencv/test/OpenCVTestCase.java index 15ff735f8..ac1bf863c 100644 --- a/modules/java/java_test/src/org/opencv/test/OpenCVTestCase.java +++ b/modules/java/test/src/org/opencv/test/OpenCVTestCase.java @@ -96,31 +96,33 @@ public class OpenCVTestCase extends TestCase { protected void setUp() throws Exception { super.setUp(); - try { - System.loadLibrary("opencv_java"); - } catch (SecurityException e) { - System.out.println(e.toString()); - System.exit(-1); - } catch (UnsatisfiedLinkError e) { - System.out.println(e.toString()); - System.exit(-1); - } + try { + System.loadLibrary("opencv_java"); + } catch (SecurityException e) { + System.out.println(e.toString()); + System.exit(-1); + } catch (UnsatisfiedLinkError e) { + System.out.println(e.toString()); + System.exit(-1); + } - String pwd; - try { - pwd = new File(".").getCanonicalPath() + File.separator; - } catch (IOException e) { - System.out.println(e); - return; - } + Core.setErrorVerbosity(false); - OpenCVTestRunner.LENA_PATH = pwd + "res/drawable/lena.jpg"; - OpenCVTestRunner.CHESS_PATH = pwd + "res/drawable/chessboard.jpg"; - OpenCVTestRunner.LBPCASCADE_FRONTALFACE_PATH = pwd + "res/raw/lbpcascade_frontalface.xml"; + String pwd; + try { + pwd = new File(".").getCanonicalPath() + File.separator; + } catch (IOException e) { + System.out.println(e); + return; + } - assert(new File(OpenCVTestRunner.LENA_PATH).exists()); - assert(new File(OpenCVTestRunner.CHESS_PATH).exists()); - assert(new File(OpenCVTestRunner.LBPCASCADE_FRONTALFACE_PATH).exists()); + OpenCVTestRunner.LENA_PATH = pwd + "res/drawable/lena.jpg"; + OpenCVTestRunner.CHESS_PATH = pwd + "res/drawable/chessboard.jpg"; + OpenCVTestRunner.LBPCASCADE_FRONTALFACE_PATH = pwd + "res/raw/lbpcascade_frontalface.xml"; + + assert(new File(OpenCVTestRunner.LENA_PATH).exists()); + assert(new File(OpenCVTestRunner.CHESS_PATH).exists()); + assert(new File(OpenCVTestRunner.LBPCASCADE_FRONTALFACE_PATH).exists()); dst = new Mat(); assertTrue(dst.empty()); diff --git a/modules/java/java_test/src/org/opencv/test/OpenCVTestRunner.java b/modules/java/test/src/org/opencv/test/OpenCVTestRunner.java similarity index 100% rename from modules/java/java_test/src/org/opencv/test/OpenCVTestRunner.java rename to modules/java/test/src/org/opencv/test/OpenCVTestRunner.java diff --git a/modules/nonfree/src/surf.cpp b/modules/nonfree/src/surf.cpp index 9e6ce5464..9a4cd28a6 100644 --- a/modules/nonfree/src/surf.cpp +++ b/modules/nonfree/src/surf.cpp @@ -482,6 +482,8 @@ static void fastHessianDetector( const Mat& sum, const Mat& mask_sum, vector sampleSteps(nTotalLayers); vector middleIndices(nMiddleLayers); + keypoints.clear(); + // Allocate space and calculate properties of each layer int index = 0, middleIndex = 0, step = SAMPLE_STEP0; diff --git a/modules/nonfree/test/test_rotation_and_scale_invariance.cpp b/modules/nonfree/test/test_rotation_and_scale_invariance.cpp index 8d32f50be..3479be72a 100644 --- a/modules/nonfree/test/test_rotation_and_scale_invariance.cpp +++ b/modules/nonfree/test/test_rotation_and_scale_invariance.cpp @@ -668,3 +668,22 @@ TEST(Features2d_ScaleInvariance_Descriptor_SIFT, regression) 0.87f); test.safe_run(); } + + +TEST(Features2d_RotationInvariance2_Detector_SURF, regression) +{ + Mat cross(100, 100, CV_8UC1, Scalar(255)); + line(cross, Point(30, 50), Point(69, 50), Scalar(100), 3); + line(cross, Point(50, 30), Point(50, 69), Scalar(100), 3); + + SURF surf(8000., 3, 4, true, false); + + vector keypoints; + + surf(cross, noArray(), keypoints); + + ASSERT_EQ(keypoints.size(), (vector::size_type) 5); + ASSERT_LT( fabs(keypoints[1].response - keypoints[2].response), 1e-6); + ASSERT_LT( fabs(keypoints[1].response - keypoints[3].response), 1e-6); + ASSERT_LT( fabs(keypoints[1].response - keypoints[4].response), 1e-6); +} diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 38c2e0a0b..46a232ed6 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -237,7 +237,7 @@ class MeanshiftGrouping { public: MeanshiftGrouping(const Point3d& densKer, const vector& posV, - const vector& wV, double, int maxIter = 20) + const vector& wV, double eps, int maxIter = 20) { densityKernel = densKer; weightsV = wV; @@ -246,6 +246,7 @@ public: meanshiftV.resize(positionsCount); distanceV.resize(positionsCount); iterMax = maxIter; + modeEps = eps; for (unsigned i = 0; i= 1200 -#pragma warning( push) -#pragma warning( disable: 4267) +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( push) +# pragma warning( disable: 4267) #endif #include "opencv2/ocl/matrix_operations.hpp" -#if _MSC_VER >= 1200 -#pragma warning( pop) +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( pop) #endif + #endif /* __OPENCV_OCL_HPP__ */ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index 663bc197a..f1b688427 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -97,12 +97,12 @@ int main(int argc, char **argv) unsigned int pid = cmd.get("p"); int device = cmd.get("d"); print_info(); - int flag = CVCL_DEVICE_TYPE_GPU; + // int flag = CVCL_DEVICE_TYPE_GPU; - if(type == "cpu") - { - flag = CVCL_DEVICE_TYPE_CPU; - } + // if(type == "cpu") + // { + // flag = CVCL_DEVICE_TYPE_CPU; + // } std::vector oclinfo; int devnums = getDevice(oclinfo); if(devnums <= device || device < 0) diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index 197e8d641..9f1dfa3eb 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -2597,13 +2597,13 @@ TEST_P(Sum, MAT) Has_roi(k); t0 = (double)cvGetTickCount();//cpu start - Scalar cpures = cv::sum(mat1_roi); + cv::sum(mat1_roi); t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gmat1 = mat1_roi; t2 = (double)cvGetTickCount(); //kernel - Scalar gpures = cv::ocl::sum(gmat1); + cv::ocl::sum(gmat1); t2 = (double)cvGetTickCount() - t2;//kernel t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index a271f44d7..525b8fb49 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -109,15 +109,15 @@ TEST_F(Haar, FaceDetect) double t = 0; vector faces, oclfaces; - const static Scalar colors[] = { CV_RGB(0, 0, 255), - CV_RGB(0, 128, 255), - CV_RGB(0, 255, 255), - CV_RGB(0, 255, 0), - CV_RGB(255, 128, 0), - CV_RGB(255, 255, 0), - CV_RGB(255, 0, 0), - CV_RGB(255, 0, 255) - } ; + // const static Scalar colors[] = { CV_RGB(0, 0, 255), + // CV_RGB(0, 128, 255), + // CV_RGB(0, 255, 255), + // CV_RGB(0, 255, 0), + // CV_RGB(255, 128, 0), + // CV_RGB(255, 255, 0), + // CV_RGB(255, 0, 0), + // CV_RGB(255, 0, 255) + // } ; Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); MemStorage storage(cvCreateMemStorage(0)); diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 7d544b0e2..bc54cb275 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -379,7 +379,7 @@ TEST_P(bilateralFilter, Mat) } else { - for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) { cout << borderstr[i] << endl; #ifndef PRINT_KERNEL_RUN_TIME @@ -397,7 +397,7 @@ TEST_P(bilateralFilter, Mat) for(int j = 0; j < LOOP_TIMES + 1; j ++) { Has_roi(k); - if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) + if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) { continue; } @@ -482,7 +482,7 @@ TEST_P(CopyMakeBorder, Mat) } else { - for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick = 0; @@ -1133,7 +1133,6 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) cv::RNG &rng = TS::ptr()->get_rng(); cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); - cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT); cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); double min = 5, max = 16; diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index b3c540d62..34eea555f 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -38,6 +38,15 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + #ifndef __OPENCV_TEST_PRECOMP_HPP__ #define __OPENCV_TEST_PRECOMP_HPP__ diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp index 79f0f10c4..7d34b6731 100644 --- a/modules/ocl/perf/utility.hpp +++ b/modules/ocl/perf/utility.hpp @@ -84,12 +84,12 @@ double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ } -//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ -//{ \ -// ASSERT_EQ(mat1.type(), mat2.type()); \ -// ASSERT_EQ(mat1.size(), mat2.size()); \ -// EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ -//} +/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ +{ \ + ASSERT_EQ(mat1.type(), mat2.type()); \ + ASSERT_EQ(mat1.size(), mat2.size()); \ + EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ +}*/ #define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \ { \ diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 584ca93bb..de8f4343b 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -362,11 +362,11 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { arithmetic_run(src1, src2, dst, kernelName, kernelString, (void *)NULL); } -void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) +static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { @@ -476,7 +476,9 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, //CV_Assert(src1.depth() != CV_8S); if(mask.data) + { CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols); + } Context *clCxt = src1.clCxt; int channels = dst.oclchannels(); @@ -530,7 +532,7 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth); } -void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar) +static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar) { if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { @@ -590,7 +592,7 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co typedef void (*ArithmeticFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar); -void arithmetic_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) +static void arithmetic_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) { static ArithmeticFuncS tab[8] = { @@ -608,7 +610,7 @@ void arithmetic_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, cons cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__); func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar); } -void arithmetic_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) +static void arithmetic_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { arithmetic_scalar(src1, src2, dst, mask, kernelName, kernelString, 0); } @@ -660,7 +662,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// compare /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { dst.create(src1.size(), CV_8UC1); CV_Assert(src1.oclchannels() == 1); @@ -739,7 +741,7 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int ////////////////////////////////////////////////////////////////////////////// //type = 0 sum,type = 1 absSum,type = 2 sqrSum -void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0) +static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0) { vector > args; int all_cols = src.step / (vlen * src.elemSize1()); @@ -872,7 +874,7 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) ////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// minMax ///////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName) +static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName) { vector > args; int all_cols = src.step / (vlen * src.elemSize1()); @@ -909,7 +911,7 @@ void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, i } -void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName) +static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName) { vector > args; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; @@ -1063,7 +1065,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// flip ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) +static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) { if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { @@ -1112,7 +1114,7 @@ void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) openCLExecuteKernel(clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth); } -void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) +static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) { if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { @@ -1183,7 +1185,7 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// LUT ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) +static void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { Context *clCxt = src1.clCxt; int channels = src1.oclchannels(); @@ -1284,7 +1286,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// //////////////////////////////// exp log ///////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString) { dst.create(src.size(), src.type()); CV_Assert(src.cols == dst.cols && @@ -1333,7 +1335,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// ////////////////////////////// magnitude phase /////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) +static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { @@ -1381,7 +1383,7 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude"); } -void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { @@ -1444,7 +1446,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// cartToPolar /////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, +static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, string kernelName, bool angleInDegrees) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) @@ -1499,7 +1501,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// polarToCart /////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, +static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) @@ -1564,7 +1566,7 @@ void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat & ////////////////////////////////////////////////////////////////////////////// /////////////////////////////////// minMaxLoc //////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum) +static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum) { vector > args; int all_cols = src.step / (vlen * src.elemSize1()); @@ -1588,7 +1590,7 @@ void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen , int gro openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc, "arithm_op_minMaxLoc", gt, lt, args, -1, -1, build_options); } -void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum) +static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum) { vector > args; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; @@ -1702,7 +1704,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ////////////////////////////////////////////////////////////////////////////// ///////////////////////////// countNonZero /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName) +static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName) { vector > args; int all_cols = src.step / (vlen * src.elemSize1()); @@ -1759,7 +1761,7 @@ int cv::ocl::countNonZero(const oclMat &src) ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////bitwise_op//////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString) +static void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString) { dst.create(src1.size(), src1.type()); @@ -1853,11 +1855,11 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { bitwise_run(src1, src2, dst, kernelName, kernelString, (void *)NULL); } -void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) +static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { dst.create(src1.size(), src1.type()); CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && @@ -1919,7 +1921,9 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, con if(mask.data) + { CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols); + } Context *clCxt = src1.clCxt; int channels = dst.oclchannels(); @@ -1977,7 +1981,7 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, con typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar); -void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) +static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) { static BitwiseFuncS tab[8] = { @@ -2007,7 +2011,7 @@ void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const o cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__); func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar); } -void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) +static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0); } @@ -2153,7 +2157,7 @@ cv::ocl::oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2) ////////////////////////////////////////////////////////////////////////////// #define TILE_DIM (32) #define BLOCK_ROWS (256/TILE_DIM) -void transpose_run(const oclMat &src, oclMat &dst, string kernelName) +static void transpose_run(const oclMat &src, oclMat &dst, string kernelName) { if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { @@ -2368,7 +2372,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst) openCLExecuteKernel(clCxt, &arithm_magnitudeSqr, "magnitudeSqr", globalThreads, localThreads, args, 2, depth); } -void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString) { CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows); CV_Assert(src1.type() == dst.type()); @@ -2417,7 +2421,7 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) return; } - CV_Assert(x.type() == y.type() && x.size() == y.size() && x.depth() == CV_32F || x.depth() == CV_64F); + CV_Assert((x.type() == y.type() && x.size() == y.size() && x.depth() == CV_32F) || x.depth() == CV_64F); y.create(x.size(), x.type()); string kernelName = "arithm_pow"; diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 20c9ebf57..9fe14d214 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -44,7 +44,7 @@ //M*/ #include "precomp.hpp" - +#include #include #include using namespace cv; @@ -821,7 +821,7 @@ void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const o } } -void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) +static void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) { findKnnMatch<256>(k, trainIdx, distance, allDist, distType); } diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index 5bc19e11c..ab613e051 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -16,6 +16,7 @@ // // @Authors // Wang Weiyan, wangweiyanster@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -70,79 +71,218 @@ void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int, const Stream &) namespace cv { - namespace ocl - { - extern const char *cvt_color; - } +namespace ocl +{ +extern const char *cvt_color; +} } namespace { - void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) - { - vector > args; - int channels = src.oclchannels(); - char build_options[50]; - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); - sprintf(build_options, "-D DEPTH_%d", src.depth()); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options); - } - void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int /*dcn*/) - { - Size sz = src.size(); - int scn = src.oclchannels(), depth = src.depth(), bidx; +void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options); +} +void Gray2RGB_caller(const oclMat &src, oclMat &dst) +{ + vector > args; + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options); +} +void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options); +} +void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, build_options); +} +void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector > args; + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options); +} +void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options); +} +void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) +{ + Size sz = src.size(); + int scn = src.oclchannels(), depth = src.depth(), bidx; - CV_Assert(depth == CV_8U || depth == CV_16U); + CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F); - switch (code) - { - /* - case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: - case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: - case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: - case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: - case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: - case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: - */ - case CV_BGR2GRAY: - case CV_BGRA2GRAY: - case CV_RGB2GRAY: - case CV_RGBA2GRAY: - { - CV_Assert(scn == 3 || scn == 4); - bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; - dst.create(sz, CV_MAKETYPE(depth, 1)); - RGB2Gray_caller(src, dst, bidx); - break; - } + switch (code) + { /* - case CV_BGR5652GRAY: case CV_BGR5552GRAY: - case CV_GRAY2BGR: case CV_GRAY2BGRA: - case CV_GRAY2BGR565: case CV_GRAY2BGR555: - case CV_BGR2YCrCb: case CV_RGB2YCrCb: - case CV_BGR2YUV: case CV_RGB2YUV: - case CV_YCrCb2BGR: case CV_YCrCb2RGB: - case CV_YUV2BGR: case CV_YUV2RGB: - case CV_BGR2XYZ: case CV_RGB2XYZ: - case CV_XYZ2BGR: case CV_XYZ2RGB: - case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: - case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: - case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: - case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: + case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: + case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: + case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: + case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: + case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: + case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: */ - default: - CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); - } + case CV_BGR2GRAY: + case CV_BGRA2GRAY: + case CV_RGB2GRAY: + case CV_RGBA2GRAY: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 1)); + RGB2Gray_caller(src, dst, bidx); + break; } + case CV_GRAY2BGR: + case CV_GRAY2BGRA: + { + CV_Assert(scn == 1); + dcn = code == CV_GRAY2BGRA ? 4 : 3; + dst.create(sz, CV_MAKETYPE(depth, dcn)); + Gray2RGB_caller(src, dst); + break; + } + case CV_BGR2YUV: + case CV_RGB2YUV: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2YUV ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + RGB2YUV_caller(src, dst, bidx); + break; + } + case CV_YUV2BGR: + case CV_YUV2RGB: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_YUV2BGR ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + YUV2RGB_caller(src, dst, bidx); + break; + } + case CV_YUV2RGB_NV12: + case CV_YUV2BGR_NV12: + case CV_YUV2RGBA_NV12: + case CV_YUV2BGRA_NV12: + { + CV_Assert(scn == 1); + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); + dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3; + bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2; + + Size dstSz(sz.width, sz.height * 2 / 3); + dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + YUV2RGB_NV12_caller(src, dst, bidx); + break; + } + case CV_BGR2YCrCb: + case CV_RGB2YCrCb: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2YCrCb ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + RGB2YCrCb_caller(src, dst, bidx); + break; + } + case CV_YCrCb2BGR: + case CV_YCrCb2RGB: + { + break; + } + /* + case CV_BGR5652GRAY: case CV_BGR5552GRAY: + case CV_GRAY2BGR565: case CV_GRAY2BGR555: + case CV_BGR2YCrCb: case CV_RGB2YCrCb: + case CV_BGR2XYZ: case CV_RGB2XYZ: + case CV_XYZ2BGR: case CV_XYZ2RGB: + case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: + case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: + case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: + case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: + */ + default: + CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); + } +} } void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn) diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 6e60da33e..0eca51032 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -47,6 +47,7 @@ //M*/ #include "precomp.hpp" +#include "mcwutil.hpp" #include using namespace std; using namespace cv; @@ -109,7 +110,7 @@ Ptr cv::ocl::createLinearFilter_GPU(int, int, const Mat &, con return Ptr(0); } -Ptr cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) +Ptr cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType) { throw_nogpu(); return Ptr(0); @@ -150,17 +151,17 @@ void cv::ocl::Laplacian(const oclMat &, oclMat &, int, int, double) throw_nogpu(); } -void cv::ocl::erode( const oclMat &, oclMat &, const Mat &, Point, int) +void cv::ocl::erode(const oclMat &, oclMat &, const Mat &, Point, int) { throw_nogpu(); } -void cv::ocl::dilate( const oclMat &, oclMat &, const Mat &, Point, int) +void cv::ocl::dilate(const oclMat &, oclMat &, const Mat &, Point, int) { throw_nogpu(); } -void cv::ocl::morphologyEx( const oclMat &, oclMat &, int, const Mat &, Point, int) +void cv::ocl::morphologyEx(const oclMat &, oclMat &, int, const Mat &, Point, int) { throw_nogpu(); } @@ -170,100 +171,110 @@ void cv::ocl::morphologyEx( const oclMat &, oclMat &, int, const Mat &, Point, i //helper routines namespace cv { - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *filtering_boxFilter; - extern const char *filter_sep_row; - extern const char *filter_sep_col; - extern const char *filtering_laplacian; - extern const char *filtering_morph; - } +namespace ocl +{ +///////////////////////////OpenCL kernel strings/////////////////////////// +extern const char *filtering_boxFilter; +extern const char *filter_sep_row; +extern const char *filter_sep_col; +extern const char *filtering_laplacian; +extern const char *filtering_morph; +} } namespace { - inline int divUp(int total, int grain) - { - return (total + grain - 1) / grain; - } +inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} } namespace { - inline void normalizeAnchor(int &anchor, int ksize) +inline void normalizeAnchor(int &anchor, int ksize) +{ + if (anchor < 0) { - if (anchor < 0) - anchor = ksize >> 1; - - CV_Assert(0 <= anchor && anchor < ksize); + anchor = ksize >> 1; } - inline void normalizeAnchor(Point &anchor, const Size &ksize) + CV_Assert(0 <= anchor && anchor < ksize); +} + +inline void normalizeAnchor(Point &anchor, const Size &ksize) +{ + normalizeAnchor(anchor.x, ksize.width); + normalizeAnchor(anchor.y, ksize.height); +} + +inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size) +{ + if (roi == Rect(0, 0, -1, -1)) { - normalizeAnchor(anchor.x, ksize.width); - normalizeAnchor(anchor.y, ksize.height); + roi = Rect(0, 0, src_size.width, src_size.height); } - inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size) + CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); + CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); + CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height); +} + + +inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false) +{ + int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; + + if (nDivisor) { - if (roi == Rect(0, 0, -1, -1)) - roi = Rect(0, 0, src_size.width, src_size.height); - CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); - CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); - CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height); + *nDivisor = scale; } + Mat temp(kernel.size(), type); + kernel.convertTo(temp, type, scale); + Mat cont_krnl = temp.reshape(1, 1); - inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false) + if (reverse) { - int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; - if (nDivisor) *nDivisor = scale; + int count = cont_krnl.cols >> 1; - Mat temp(kernel.size(), type); - kernel.convertTo(temp, type, scale); - Mat cont_krnl = temp.reshape(1, 1); - - if (reverse) + for (int i = 0; i < count; ++i) { - int count = cont_krnl.cols >> 1; - for (int i = 0; i < count; ++i) - { - std::swap(cont_krnl.at(0, i), cont_krnl.at(0, cont_krnl.cols - 1 - i)); - } + std::swap(cont_krnl.at(0, i), cont_krnl.at(0, cont_krnl.cols - 1 - i)); } - - gpu_krnl.upload(cont_krnl); } + + gpu_krnl.upload(cont_krnl); +} } //////////////////////////////////////////////////////////////////////////////////////////////////// // Filter2D namespace { - class Filter2DEngine_GPU : public FilterEngine_GPU +class Filter2DEngine_GPU : public FilterEngine_GPU +{ +public: + Filter2DEngine_GPU(const Ptr &filter2D_) : filter2D(filter2D_) {} + + virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) { - public: - Filter2DEngine_GPU(const Ptr &filter2D_) : filter2D(filter2D_) {} + Size src_size = src.size(); - virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) - { - Size src_size = src.size(); + // Delete those two clause below which exist before, However, the result is alos correct + // dst.create(src_size, src.type()); + // dst = Scalar(0.0); - // Delete those two clause below which exist before, However, the result is alos correct - // dst.create(src_size, src.type()); - // dst = Scalar(0.0); + normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size); - normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size); + oclMat srcROI = src(roi); + oclMat dstROI = dst(roi); - oclMat srcROI = src(roi); - oclMat dstROI = dst(roi); + (*filter2D)(srcROI, dstROI); + } - (*filter2D)(srcROI, dstROI); - } - - Ptr filter2D; - }; + Ptr filter2D; +}; } Ptr cv::ocl::createFilter2D_GPU(const Ptr filter2D) @@ -275,22 +286,22 @@ Ptr cv::ocl::createFilter2D_GPU(const Ptr filt // Box Filter namespace { - typedef void (*FilterBox_t)(const oclMat & , oclMat & , Size &, const Point, const int); +typedef void (*FilterBox_t)(const oclMat & , oclMat & , Size &, const Point, const int); - class GPUBoxFilter : public BaseFilter_GPU +class GPUBoxFilter : public BaseFilter_GPU +{ +public: + GPUBoxFilter(const Size &ksize_, const Point &anchor_, const int borderType_, FilterBox_t func_) : + BaseFilter_GPU(ksize_, anchor_, borderType_), func(func_) {} + + virtual void operator()(const oclMat &src, oclMat &dst) { - public: - GPUBoxFilter(const Size &ksize_, const Point &anchor_, const int borderType_, FilterBox_t func_) : - BaseFilter_GPU(ksize_, anchor_, borderType_), func(func_) {} + func(src, dst, ksize, anchor, borderType); + } - virtual void operator()(const oclMat &src, oclMat &dst) - { - func(src, dst, ksize, anchor, borderType); - } + FilterBox_t func; - FilterBox_t func; - - }; +}; } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -298,22 +309,22 @@ namespace namespace { - typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point); +typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point); - class MorphFilter_GPU : public BaseFilter_GPU +class MorphFilter_GPU : public BaseFilter_GPU +{ +public: + MorphFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUMorfFilter_t func_) : + BaseFilter_GPU(ksize_, anchor_, BORDER_CONSTANT), kernel(kernel_), func(func_) {} + + virtual void operator()(const oclMat &src, oclMat &dst) { - public: - MorphFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUMorfFilter_t func_) : - BaseFilter_GPU(ksize_, anchor_, BORDER_CONSTANT), kernel(kernel_), func(func_) {} + func(src, dst, kernel, ksize, anchor) ; + } - virtual void operator()(const oclMat &src, oclMat &dst) - { - func(src, dst, kernel, ksize, anchor) ; - } - - oclMat kernel; - GPUMorfFilter_t func; - }; + oclMat kernel; + GPUMorfFilter_t func; +}; } /* @@ -321,14 +332,14 @@ namespace **Extend this if necessary later. **Note that the kernel need to be further refined. */ -void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor) +static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor) { //Normalize the result by default //float alpha = ksize.height * ksize.width; CV_Assert(src.clCxt == dst.clCxt); - CV_Assert( (src.cols == dst.cols) && - (src.rows == dst.rows) ); - CV_Assert( (src.oclchannels() == dst.oclchannels()) ); + CV_Assert((src.cols == dst.cols) && + (src.rows == dst.rows)); + CV_Assert((src.oclchannels() == dst.oclchannels())); int srcStep = src.step1() / src.oclchannels(); int dstStep = dst.step1() / dst.oclchannels(); @@ -342,19 +353,21 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1}; - if(src.type() == CV_8UC1) + if (src.type() == CV_8UC1) { kernelName = "morph_C1_D0"; globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1)); } else { kernelName = "morph"; - CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1)); } + char s[64]; - switch(src.type()) + + switch (src.type()) { case CV_8UC1: sprintf(s, "-D VAL=255"); @@ -373,34 +386,35 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c default: CV_Error(CV_StsUnsupportedFormat, "unsupported type"); } + char compile_option[128]; - sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); + sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], s); vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_x)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_y)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset)); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } //! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4 -void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor) +static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor) { //Normalize the result by default //float alpha = ksize.height * ksize.width; CV_Assert(src.clCxt == dst.clCxt); - CV_Assert( (src.cols == dst.cols) && - (src.rows == dst.rows) ); - CV_Assert( (src.oclchannels() == dst.oclchannels()) ); + CV_Assert((src.cols == dst.cols) && + (src.rows == dst.rows)); + CV_Assert((src.oclchannels() == dst.oclchannels())); int srcStep = src.step1() / src.oclchannels(); int dstStep = dst.step1() / dst.oclchannels(); @@ -414,19 +428,21 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1}; - if(src.type() == CV_8UC1) + if (src.type() == CV_8UC1) { kernelName = "morph_C1_D0"; globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; - CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1)); } else { kernelName = "morph"; - CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1)); } + char s[64]; - switch(src.type()) + + switch (src.type()) { case CV_8UC1: sprintf(s, "-D VAL=0"); @@ -445,21 +461,22 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, default: CV_Error(CV_StsUnsupportedFormat, "unsupported type"); } + char compile_option[128]; - sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); + sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], s); vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_x)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_y)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset)); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } @@ -483,47 +500,48 @@ Ptr cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat namespace { - class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU +class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU +{ +public: + MorphologyFilterEngine_GPU(const Ptr &filter2D_, int iters_) : + Filter2DEngine_GPU(filter2D_), iters(iters_) {} + + virtual void apply(const oclMat &src, oclMat &dst) { - public: - MorphologyFilterEngine_GPU(const Ptr &filter2D_, int iters_) : - Filter2DEngine_GPU(filter2D_), iters(iters_) {} + Filter2DEngine_GPU::apply(src, dst); - virtual void apply(const oclMat &src, oclMat &dst) + //if (iters > 1) + //{ + // Size wholesize; + // Point ofs; + // dst.locateROI(wholesize,ofs); + // int rows = dst.rows, cols = dst.cols; + // dst.adjustROI(ofs.y,-ofs.y-rows+dst.wholerows,ofs.x,-ofs.x-cols+dst.wholecols); + // dst.copyTo(morfBuf); + // dst.adjustROI(-ofs.y,ofs.y+rows-dst.wholerows,-ofs.x,ofs.x+cols-dst.wholecols); + // morfBuf.adjustROI(-ofs.y,ofs.y+rows-dst.wholerows,-ofs.x,ofs.x+cols-dst.wholecols); + // //morfBuf.create(src.size(),src.type()); + // //Filter2DEngine_GPU::apply(dst, morfBuf); + // //morfBuf.copyTo(dst); + //} + for (int i = 1; i < iters; ++i) { - Filter2DEngine_GPU::apply(src, dst); - //if (iters > 1) - //{ - // Size wholesize; - // Point ofs; - // dst.locateROI(wholesize,ofs); - // int rows = dst.rows, cols = dst.cols; - // dst.adjustROI(ofs.y,-ofs.y-rows+dst.wholerows,ofs.x,-ofs.x-cols+dst.wholecols); - // dst.copyTo(morfBuf); - // dst.adjustROI(-ofs.y,ofs.y+rows-dst.wholerows,-ofs.x,ofs.x+cols-dst.wholecols); - // morfBuf.adjustROI(-ofs.y,ofs.y+rows-dst.wholerows,-ofs.x,ofs.x+cols-dst.wholecols); - // //morfBuf.create(src.size(),src.type()); - // //Filter2DEngine_GPU::apply(dst, morfBuf); - // //morfBuf.copyTo(dst); - //} - for(int i = 1; i < iters; ++i) - { - //dst.swap(morfBuf); - Size wholesize; - Point ofs; - dst.locateROI(wholesize, ofs); - int rows = dst.rows, cols = dst.cols; - dst.adjustROI(ofs.y, -ofs.y - rows + dst.wholerows, ofs.x, -ofs.x - cols + dst.wholecols); - dst.copyTo(morfBuf); - dst.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols); - morfBuf.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols); - Filter2DEngine_GPU::apply(morfBuf, dst); - } + //dst.swap(morfBuf); + Size wholesize; + Point ofs; + dst.locateROI(wholesize, ofs); + int rows = dst.rows, cols = dst.cols; + dst.adjustROI(ofs.y, -ofs.y - rows + dst.wholerows, ofs.x, -ofs.x - cols + dst.wholecols); + dst.copyTo(morfBuf); + dst.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols); + morfBuf.adjustROI(-ofs.y, ofs.y + rows - dst.wholerows, -ofs.x, ofs.x + cols - dst.wholecols); + Filter2DEngine_GPU::apply(morfBuf, dst); } + } - int iters; - oclMat morfBuf; - }; + int iters; + oclMat morfBuf; +}; } Ptr cv::ocl::createMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Point &anchor, int iterations) @@ -539,104 +557,113 @@ Ptr cv::ocl::createMorphologyFilter_GPU(int op, int type, cons namespace { - void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue) +void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue) +{ + if ((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue())) { - if((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue())) - { - CV_Error(CV_StsBadArg, "unsupported border type"); - } - Mat kernel; - Size ksize = _kernel.data ? _kernel.size() : Size(3, 3); - - normalizeAnchor(anchor, ksize); - - if (iterations == 0 || _kernel.rows * _kernel.cols == 1) - { - src.copyTo(dst); - return; - } - - dst.create(src.size(), src.type()); - - if (!_kernel.data) - { - kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2)); - anchor = Point(iterations, iterations); - iterations = 1; - } - else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols) - { - anchor = Point(anchor.x * iterations, anchor.y * iterations); - kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1), - ksize.height + iterations * (ksize.height - 1)), anchor); - iterations = 1; - } - else - kernel = _kernel; - - Ptr f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations); - - f->apply(src, dst); + CV_Error(CV_StsBadArg, "unsupported border type"); } + + Mat kernel; + Size ksize = _kernel.data ? _kernel.size() : Size(3, 3); + + normalizeAnchor(anchor, ksize); + + if (iterations == 0 || _kernel.rows *_kernel.cols == 1) + { + src.copyTo(dst); + return; + } + + dst.create(src.size(), src.type()); + + if (!_kernel.data) + { + kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2)); + anchor = Point(iterations, iterations); + iterations = 1; + } + else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols) + { + anchor = Point(anchor.x * iterations, anchor.y * iterations); + kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1), + ksize.height + iterations * (ksize.height - 1)), anchor); + iterations = 1; + } + else + { + kernel = _kernel; + } + + Ptr f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations); + + f->apply(src, dst); +} } -void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, - int borderType, const Scalar &borderValue) +void cv::ocl::erode(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, + int borderType, const Scalar &borderValue) { bool allZero = true; - for(int i = 0; i < kernel.rows * kernel.cols; ++i) - if(kernel.data[i] != 0) + + for (int i = 0; i < kernel.rows * kernel.cols; ++i) + if (kernel.data[i] != 0) + { allZero = false; - if(allZero) + } + + if (allZero) { kernel.data[0] = 1; } + morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue); } -void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, - int borderType, const Scalar &borderValue) +void cv::ocl::dilate(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, + int borderType, const Scalar &borderValue) { morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue); } -void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations, - int borderType, const Scalar &borderValue) +void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations, + int borderType, const Scalar &borderValue) { oclMat temp; - switch( op ) + + switch (op) { case MORPH_ERODE: - erode( src, dst, kernel, anchor, iterations, borderType, borderValue); + erode(src, dst, kernel, anchor, iterations, borderType, borderValue); break; case MORPH_DILATE: - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); + dilate(src, dst, kernel, anchor, iterations, borderType, borderValue); break; case MORPH_OPEN: - erode( src, temp, kernel, anchor, iterations, borderType, borderValue); - dilate( temp, dst, kernel, anchor, iterations, borderType, borderValue); + erode(src, temp, kernel, anchor, iterations, borderType, borderValue); + dilate(temp, dst, kernel, anchor, iterations, borderType, borderValue); break; case CV_MOP_CLOSE: - dilate( src, temp, kernel, anchor, iterations, borderType, borderValue); - erode( temp, dst, kernel, anchor, iterations, borderType, borderValue); + dilate(src, temp, kernel, anchor, iterations, borderType, borderValue); + erode(temp, dst, kernel, anchor, iterations, borderType, borderValue); break; case CV_MOP_GRADIENT: - erode( src, temp, kernel, anchor, iterations, borderType, borderValue); - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); + erode(src, temp, kernel, anchor, iterations, borderType, borderValue); + dilate(src, dst, kernel, anchor, iterations, borderType, borderValue); subtract(dst, temp, dst); break; case CV_MOP_TOPHAT: - erode( src, dst, kernel, anchor, iterations, borderType, borderValue); - dilate( dst, temp, kernel, anchor, iterations, borderType, borderValue); + erode(src, dst, kernel, anchor, iterations, borderType, borderValue); + dilate(dst, temp, kernel, anchor, iterations, borderType, borderValue); subtract(src, temp, dst); break; case CV_MOP_BLACKHAT: - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); - erode( dst, temp, kernel, anchor, iterations, borderType, borderValue); + dilate(src, dst, kernel, anchor, iterations, borderType, borderValue); + erode(dst, temp, kernel, anchor, iterations, borderType, borderValue); subtract(temp, src, dst); break; default: - CV_Error( CV_StsBadArg, "unknown morphological operation" ); + CV_Error(CV_StsBadArg, "unknown morphological operation"); } } @@ -645,33 +672,33 @@ void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &k namespace { - typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, const int); +typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, const int); - class LinearFilter_GPU : public BaseFilter_GPU +class LinearFilter_GPU : public BaseFilter_GPU +{ +public: + LinearFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUFilter2D_t func_, + int borderType_) : + BaseFilter_GPU(ksize_, anchor_, borderType_), kernel(kernel_), func(func_) {} + + virtual void operator()(const oclMat &src, oclMat &dst) { - public: - LinearFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUFilter2D_t func_, - int borderType_) : - BaseFilter_GPU(ksize_, anchor_, borderType_), kernel(kernel_), func(func_) {} + func(src, dst, kernel, ksize, anchor, borderType) ; + } - virtual void operator()(const oclMat &src, oclMat &dst) - { - func(src, dst, kernel, ksize, anchor, borderType) ; - } - - oclMat kernel; - GPUFilter2D_t func; - }; + oclMat kernel; + GPUFilter2D_t func; +}; } -void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, +static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor, const int borderType) { CV_Assert(src.clCxt == dst.clCxt); - CV_Assert( (src.cols == dst.cols) && - (src.rows == dst.rows) ); - CV_Assert( (src.oclchannels() == dst.oclchannels()) ); - CV_Assert( (borderType != 0) ); + CV_Assert((src.cols == dst.cols) && + (src.rows == dst.rows)); + CV_Assert((src.oclchannels() == dst.oclchannels())); + CV_Assert((borderType != 0)); CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); Context *clCxt = src.clCxt; @@ -703,20 +730,20 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, }; vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset_x)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset_y)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_x)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_y)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_y)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_x)); + args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_y)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); } @@ -750,8 +777,10 @@ Ptr cv::ocl::createLinearFilter_GPU(int srcType, int dstType, void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor, int borderType) { - if( ddepth < 0 ) + if (ddepth < 0) + { ddepth = src.depth(); + } dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); @@ -764,50 +793,50 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke namespace { - class SeparableFilterEngine_GPU : public FilterEngine_GPU +class SeparableFilterEngine_GPU : public FilterEngine_GPU +{ +public: + SeparableFilterEngine_GPU(const Ptr &rowFilter_, + const Ptr &columnFilter_) : + rowFilter(rowFilter_), columnFilter(columnFilter_) { - public: - SeparableFilterEngine_GPU(const Ptr &rowFilter_, - const Ptr &columnFilter_) : - rowFilter(rowFilter_), columnFilter(columnFilter_) - { - ksize = Size(rowFilter->ksize, columnFilter->ksize); - anchor = Point(rowFilter->anchor, columnFilter->anchor); - } + ksize = Size(rowFilter->ksize, columnFilter->ksize); + anchor = Point(rowFilter->anchor, columnFilter->anchor); + } - virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) - { - Size src_size = src.size(); - //int src_type = src.type(); + virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) + { + Size src_size = src.size(); + //int src_type = src.type(); - int cn = src.oclchannels(); - //dst.create(src_size, src_type); - dst = Scalar(0.0); - //dstBuf.create(src_size, src_type); - dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn)); - dstBuf = Scalar(0.0); + int cn = src.oclchannels(); + //dst.create(src_size, src_type); + //dst = Scalar(0.0); + //dstBuf.create(src_size, src_type); + dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn)); + //dstBuf = Scalar(0.0); - normalizeROI(roi, ksize, anchor, src_size); + normalizeROI(roi, ksize, anchor, src_size); - srcROI = src(roi); - dstROI = dst(roi); - //dstBufROI = dstBuf(roi); + srcROI = src(roi); + dstROI = dst(roi); + //dstBufROI = dstBuf(roi); - (*rowFilter)(srcROI, dstBuf); - //Mat rm(dstBufROI); - //std::cout << "rm " << rm << endl; - (*columnFilter)(dstBuf, dstROI); - } + (*rowFilter)(srcROI, dstBuf); + //Mat rm(dstBufROI); + //std::cout << "rm " << rm << endl; + (*columnFilter)(dstBuf, dstROI); + } - Ptr rowFilter; - Ptr columnFilter; - Size ksize; - Point anchor; - oclMat dstBuf; - oclMat srcROI; - oclMat dstROI; - oclMat dstBufROI; - }; + Ptr rowFilter; + Ptr columnFilter; + Size ksize; + Point anchor; + oclMat dstBuf; + oclMat srcROI; + oclMat dstROI; + oclMat dstBufROI; +}; } Ptr cv::ocl::createSeparableFilter_GPU(const Ptr &rowFilter, @@ -821,7 +850,7 @@ Ptr cv::ocl::createSeparableFilter_GPU(const Ptr struct index_and_sizeof; @@ -1161,7 +1197,8 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel string kernelName = "row_filter"; char btype[30]; - switch(bordertype) + + switch (bordertype) { case 0: sprintf(btype, "BORDER_CONSTANT"); @@ -1179,15 +1216,17 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel sprintf(btype, "BORDER_REFLECT_101"); break; } + char compile_option[128]; - sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, localThreads[0], localThreads[1], channels, btype); + sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, (int)localThreads[0], (int)localThreads[1], channels, btype); size_t globalThreads[3]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2]; - if(src.depth() == CV_8U) + + if (src.depth() == CV_8U) { - switch(channels) + switch (channels) { case 1: case 3: @@ -1205,18 +1244,19 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel { globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; } + //sanity checks CV_Assert(clCxt == dst.clCxt); CV_Assert(src.cols == dst.cols); CV_Assert(src.oclchannels() == dst.oclchannels()); CV_Assert(ksize == (anchor << 1) + 1); int src_pix_per_row, dst_pix_per_row; - int src_offset_x, src_offset_y, dst_offset_in_pixel; + int src_offset_x, src_offset_y;//, dst_offset_in_pixel; src_pix_per_row = src.step / src.elemSize(); src_offset_x = (src.offset % src.step) / src.elemSize(); src_offset_y = src.offset / src.step; dst_pix_per_row = dst.step / dst.elemSize(); - dst_offset_in_pixel = dst.offset / dst.elemSize(); + //dst_offset_in_pixel = dst.offset / dst.elemSize(); int ridusy = (dst.rows - src.rows) >> 1; vector > args; args.push_back(make_pair(sizeof(cl_mem), &src.data)); @@ -1232,7 +1272,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); - openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); + openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); } Ptr cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) @@ -1263,20 +1303,20 @@ Ptr cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufTyp namespace { - class GpuLinearColumnFilter : public BaseColumnFilter_GPU +class GpuLinearColumnFilter : public BaseColumnFilter_GPU +{ +public: + GpuLinearColumnFilter(int ksize_, int anchor_, const oclMat &kernel_, gpuFilter1D_t func_, int bordertype_) : + BaseColumnFilter_GPU(ksize_, anchor_, bordertype_), kernel(kernel_), func(func_) {} + + virtual void operator()(const oclMat &src, oclMat &dst) { - public: - GpuLinearColumnFilter(int ksize_, int anchor_, const oclMat &kernel_, gpuFilter1D_t func_, int bordertype_) : - BaseColumnFilter_GPU(ksize_, anchor_, bordertype_), kernel(kernel_), func(func_) {} + func(src, dst, kernel, ksize, anchor, bordertype); + } - virtual void operator()(const oclMat &src, oclMat &dst) - { - func(src, dst, kernel, ksize, anchor, bordertype); - } - - oclMat kernel; - gpuFilter1D_t func; - }; + oclMat kernel; + gpuFilter1D_t func; +}; } template @@ -1289,7 +1329,8 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker string kernelName = "col_filter"; char btype[30]; - switch(bordertype) + + switch (bordertype) { case 0: sprintf(btype, "BORDER_CONSTANT"); @@ -1307,56 +1348,59 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker sprintf(btype, "BORDER_REFLECT_101"); break; } + char compile_option[256]; size_t globalThreads[3]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2]; - if(dst.depth() == CV_8U) + + if (dst.depth() == CV_8U) { - switch(channels) + switch (channels) { case 1: globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat"); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat"); break; case 2: globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat"); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat"); break; case 3: case 4: globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat"); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat"); break; } } else { globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - switch(dst.type()) + + switch (dst.type()) { case CV_32SC1: sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float", "int", "convert_int_sat"); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "int", "convert_int_sat"); break; case CV_32SC3: case CV_32SC4: sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat"); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat"); break; case CV_32FC1: sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float", "float", ""); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float", "float", ""); break; case CV_32FC3: case CV_32FC4: sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype, "float4", "float4", ""); + anchor, (int)localThreads[0], (int)localThreads[1], channels, btype, "float4", "float4", ""); break; } } @@ -1367,10 +1411,11 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker CV_Assert(src.oclchannels() == dst.oclchannels()); CV_Assert(ksize == (anchor << 1) + 1); int src_pix_per_row, dst_pix_per_row; - int src_offset_x, src_offset_y, dst_offset_in_pixel; + //int src_offset_x, src_offset_y; + int dst_offset_in_pixel; src_pix_per_row = src.step / src.elemSize(); - src_offset_x = (src.offset % src.step) / src.elemSize(); - src_offset_y = src.offset / src.step; + //src_offset_x = (src.offset % src.step) / src.elemSize(); + //src_offset_y = src.offset / src.step; dst_pix_per_row = dst.step / dst.elemSize(); dst_offset_in_pixel = dst.offset / dst.elemSize(); @@ -1441,20 +1486,25 @@ Ptr cv::ocl::createSeparableLinearFilter_GPU(int srcType, int void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype) { - if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi { - if((bordertype & cv::BORDER_ISOLATED) != 0) + if ((bordertype & cv::BORDER_ISOLATED) != 0) { bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype != cv::BORDER_CONSTANT) && + + if ((bordertype != cv::BORDER_CONSTANT) && (bordertype != cv::BORDER_REPLICATE)) { CV_Error(CV_StsBadArg, "unsupported border type"); } } } - if( ddepth < 0 ) + + if (ddepth < 0) + { ddepth = src.depth(); + } + //CV_Assert(ddepth == src.depth()); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); @@ -1462,12 +1512,12 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat f->apply(src, dst); } -Ptr cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) +Ptr cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType) { Mat kx, ky; - getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); + getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); return createSeparableLinearFilter_GPU(srcType, dstType, - kx, ky, Point(-1, -1), 0, borderType ); + kx, ky, Point(-1, -1), 0, borderType); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1482,15 +1532,20 @@ void cv::ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if (dx == 0) + { kx *= scale; + } else + { ky *= scale; + } } + // Mat kx_, ky_; //ky.convertTo(ky_,CV_32S,1<<8); //kx.convertTo(kx_,CV_32S,1<<8); - sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); + sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType); } void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale, double delta , int bordertype) @@ -1498,14 +1553,18 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, Mat kx, ky; getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F); - if( scale != 1 ) + if (scale != 1) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part - if( dx == 0 ) + if (dx == 0) + { kx *= scale; + } else + { ky *= scale; + } } // Mat kx_, ky_; @@ -1517,7 +1576,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1531,8 +1590,12 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d {2, 0, 2, 0, -8, 0, 2, 0, 2} }; Mat kernel(3, 3, CV_32S, (void *)K[ksize == 3]); + if (scale != 1) + { kernel *= scale; + } + filter2D(src, dst, ddepth, kernel, Point(-1, -1)); } @@ -1544,25 +1607,38 @@ Ptr cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do int depth = CV_MAT_DEPTH(type); if (sigma2 <= 0) + { sigma2 = sigma1; + } // automatic detection of kernel size from sigma if (ksize.width <= 0 && sigma1 > 0) + { ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; - if (ksize.height <= 0 && sigma2 > 0) - ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; + } - CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 ); + if (ksize.height <= 0 && sigma2 > 0) + { + ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; + } + + CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1); sigma1 = std::max(sigma1, 0.0); sigma2 = std::max(sigma2, 0.0); - Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) ); + Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F)); Mat ky; - if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON ) + + if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON) + { ky = kx; + } else - ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) ); + { + ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F)); + } + //Mat kx_, ky_; //kx.convertTo(kx_,CV_32S,1<<8); //ky.convertTo(ky_,CV_32S,1<<8); @@ -1576,26 +1652,36 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si src.copyTo(dst); return; } - if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + + if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi { - if((bordertype & cv::BORDER_ISOLATED) != 0) + if ((bordertype & cv::BORDER_ISOLATED) != 0) { bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype != cv::BORDER_CONSTANT) && + + if ((bordertype != cv::BORDER_CONSTANT) && (bordertype != cv::BORDER_REPLICATE)) { CV_Error(CV_StsBadArg, "unsupported border type"); } } } + dst.create(src.size(), src.type()); - if( bordertype != BORDER_CONSTANT ) + + if (bordertype != BORDER_CONSTANT) { - if( src.rows == 1 ) + if (src.rows == 1) + { ksize.height = 1; - if( src.cols == 1 ) + } + + if (src.cols == 1) + { ksize.width = 1; + } } + Ptr f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype); f->apply(src, dst); } diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 07489157e..5c9b75bf5 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -63,13 +63,13 @@ using namespace std; namespace cv { - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *haarobjectdetect; - extern const char *haarobjectdetectbackup; - extern const char *haarobjectdetect_scaled2; - } +namespace ocl +{ +///////////////////////////OpenCL kernel strings/////////////////////////// +extern const char *haarobjectdetect; +extern const char *haarobjectdetectbackup; +extern const char *haarobjectdetect_scaled2; +} } /* these settings affect the quality of detection: change with care */ @@ -150,7 +150,7 @@ typedef struct int imgoff; float factor; } detect_piramid_info; -#if WIN32 +#ifdef WIN32 #define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT)) typedef _ALIGNED_ON(128) struct GpuHidHaarFeature { @@ -300,40 +300,37 @@ const float icv_stage_threshold_bias = 0.0001f; double globaltime = 0; -CvHaarClassifierCascade * -gpuCreateHaarClassifierCascade( int stage_count ) -{ - CvHaarClassifierCascade *cascade = 0; +// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count ) +// { +// CvHaarClassifierCascade *cascade = 0; - int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier); +// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier); - if( stage_count <= 0 ) - CV_Error( CV_StsOutOfRange, "Number of stages should be positive" ); +// if( stage_count <= 0 ) +// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" ); - cascade = (CvHaarClassifierCascade *)cvAlloc( block_size ); - memset( cascade, 0, block_size ); +// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size ); +// memset( cascade, 0, block_size ); - cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1); - cascade->flags = CV_HAAR_MAGIC_VAL; - cascade->count = stage_count; +// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1); +// cascade->flags = CV_HAAR_MAGIC_VAL; +// cascade->count = stage_count; - return cascade; -} +// return cascade; +// } //static int globalcounter = 0; -void -gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade ) -{ - if( _cascade && *_cascade ) - { - cvFree( _cascade ); - } -} +// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade ) +// { +// if( _cascade && *_cascade ) +// { +// cvFree( _cascade ); +// } +// } /* create more efficient internal representation of haar classifier cascade */ -GpuHidHaarClassifierCascade * -gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier) +static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier) { GpuHidHaarClassifierCascade *out = 0; @@ -522,8 +519,7 @@ gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) -CV_IMPL void -gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade, +static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade, /* const CvArr* _sum, const CvArr* _sqsum, const CvArr* _tilted_sum,*/ @@ -767,8 +763,8 @@ gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade, } /* j */ } } -CV_IMPL void -gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade + +static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade /*double scale=0.0,*/ /*int step*/) { @@ -870,7 +866,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int datasize=0; int totalclassifier=0; - void *out; + //void *out; GpuHidHaarClassifierCascade *gcascade; GpuHidHaarStageClassifier *stage; GpuHidHaarClassifier *classifier; @@ -883,13 +879,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0; // bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0; - //the Intel HD Graphics is unsupported - if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos) - { - cout << " Intel HD GPU device unsupported " << endl; - return NULL; - } - //double t = 0; if( maxSize.height == 0 || maxSize.width == 0 ) { @@ -917,7 +906,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS //gsqsum1 = oclMat( gimg.rows + 1, gimg.cols + 1, CV_32FC1 ); if( !cascade->hid_cascade ) - out = (void *)gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier); + /*out = (void *)*/gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier); if( cascade->hid_cascade->has_tilted_features ) gtilted1 = oclMat( gimg.rows + 1, gimg.cols + 1, CV_32SC1 ); @@ -937,7 +926,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); - if( flags & CV_HAAR_SCALE_IMAGE ) + if( (flags & CV_HAAR_SCALE_IMAGE) && gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos ) { CvSize winSize0 = cascade->orig_window_size; //float scalefactor = 1.1f; @@ -1418,206 +1407,203 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS } -CvHaarClassifierCascade * -gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) -{ - int i; - CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n); - cascade->orig_window_size = orig_window_size; +// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) +// { +// int i; +// CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n); +// cascade->orig_window_size = orig_window_size; - for( i = 0; i < n; i++ ) - { - int j, count, l; - float threshold = 0; - const char *stage = input_cascade[i]; - int dl = 0; +// for( i = 0; i < n; i++ ) +// { +// int j, count, l; +// float threshold = 0; +// const char *stage = input_cascade[i]; +// int dl = 0; - /* tree links */ - int parent = -1; - int next = -1; +// /* tree links */ +// int parent = -1; +// int next = -1; - sscanf( stage, "%d%n", &count, &dl ); - stage += dl; +// sscanf( stage, "%d%n", &count, &dl ); +// stage += dl; - assert( count > 0 ); - cascade->stage_classifier[i].count = count; - cascade->stage_classifier[i].classifier = - (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); +// assert( count > 0 ); +// cascade->stage_classifier[i].count = count; +// cascade->stage_classifier[i].classifier = +// (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); - for( j = 0; j < count; j++ ) - { - CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j; - int k, rects = 0; - char str[100]; +// for( j = 0; j < count; j++ ) +// { +// CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j; +// int k, rects = 0; +// char str[100]; - sscanf( stage, "%d%n", &classifier->count, &dl ); - stage += dl; +// sscanf( stage, "%d%n", &classifier->count, &dl ); +// stage += dl; - classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); - classifier->threshold = (float *) (classifier->haar_feature + classifier->count); - classifier->left = (int *) (classifier->threshold + classifier->count); - classifier->right = (int *) (classifier->left + classifier->count); - classifier->alpha = (float *) (classifier->right + classifier->count); +// classifier->haar_feature = (CvHaarFeature *) cvAlloc( +// classifier->count * ( sizeof( *classifier->haar_feature ) + +// sizeof( *classifier->threshold ) + +// sizeof( *classifier->left ) + +// sizeof( *classifier->right ) ) + +// (classifier->count + 1) * sizeof( *classifier->alpha ) ); +// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); +// classifier->left = (int *) (classifier->threshold + classifier->count); +// classifier->right = (int *) (classifier->left + classifier->count); +// classifier->alpha = (float *) (classifier->right + classifier->count); - for( l = 0; l < classifier->count; l++ ) - { - sscanf( stage, "%d%n", &rects, &dl ); - stage += dl; +// for( l = 0; l < classifier->count; l++ ) +// { +// sscanf( stage, "%d%n", &rects, &dl ); +// stage += dl; - assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX ); +// assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX ); - for( k = 0; k < rects; k++ ) - { - CvRect r; - int band = 0; - sscanf( stage, "%d%d%d%d%d%f%n", - &r.x, &r.y, &r.width, &r.height, &band, - &(classifier->haar_feature[l].rect[k].weight), &dl ); - stage += dl; - classifier->haar_feature[l].rect[k].r = r; - } - sscanf( stage, "%s%n", str, &dl ); - stage += dl; +// for( k = 0; k < rects; k++ ) +// { +// CvRect r; +// int band = 0; +// sscanf( stage, "%d%d%d%d%d%f%n", +// &r.x, &r.y, &r.width, &r.height, &band, +// &(classifier->haar_feature[l].rect[k].weight), &dl ); +// stage += dl; +// classifier->haar_feature[l].rect[k].r = r; +// } +// sscanf( stage, "%s%n", str, &dl ); +// stage += dl; - classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0; +// classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0; - for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ ) - { - memset( classifier->haar_feature[l].rect + k, 0, - sizeof(classifier->haar_feature[l].rect[k]) ); - } +// for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ ) +// { +// memset( classifier->haar_feature[l].rect + k, 0, +// sizeof(classifier->haar_feature[l].rect[k]) ); +// } - sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]), - &(classifier->left[l]), - &(classifier->right[l]), &dl ); - stage += dl; - } - for( l = 0; l <= classifier->count; l++ ) - { - sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl ); - stage += dl; - } - } +// sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]), +// &(classifier->left[l]), +// &(classifier->right[l]), &dl ); +// stage += dl; +// } +// for( l = 0; l <= classifier->count; l++ ) +// { +// sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl ); +// stage += dl; +// } +// } - sscanf( stage, "%f%n", &threshold, &dl ); - stage += dl; +// sscanf( stage, "%f%n", &threshold, &dl ); +// stage += dl; - cascade->stage_classifier[i].threshold = threshold; +// cascade->stage_classifier[i].threshold = threshold; - /* load tree links */ - if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 ) - { - parent = i - 1; - next = -1; - } - stage += dl; +// /* load tree links */ +// if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 ) +// { +// parent = i - 1; +// next = -1; +// } +// stage += dl; - cascade->stage_classifier[i].parent = parent; - cascade->stage_classifier[i].next = next; - cascade->stage_classifier[i].child = -1; +// cascade->stage_classifier[i].parent = parent; +// cascade->stage_classifier[i].next = next; +// cascade->stage_classifier[i].child = -1; - if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) - { - cascade->stage_classifier[parent].child = i; - } - } +// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) +// { +// cascade->stage_classifier[parent].child = i; +// } +// } - return cascade; -} +// return cascade; +// } #ifndef _MAX_PATH #define _MAX_PATH 1024 #endif -CV_IMPL CvHaarClassifierCascade * -gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size ) -{ - const char **input_cascade = 0; - CvHaarClassifierCascade *cascade = 0; +// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size ) +// { +// const char **input_cascade = 0; +// CvHaarClassifierCascade *cascade = 0; - int i, n; - const char *slash; - char name[_MAX_PATH]; - int size = 0; - char *ptr = 0; +// int i, n; +// const char *slash; +// char name[_MAX_PATH]; +// int size = 0; +// char *ptr = 0; - if( !directory ) - CV_Error( CV_StsNullPtr, "Null path is passed" ); +// if( !directory ) +// CV_Error( CV_StsNullPtr, "Null path is passed" ); - n = (int)strlen(directory) - 1; - slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/"; +// n = (int)strlen(directory) - 1; +// slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/"; - /* try to read the classifier from directory */ - for( n = 0; ; n++ ) - { - sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n ); - FILE *f = fopen( name, "rb" ); - if( !f ) - break; - fseek( f, 0, SEEK_END ); - size += ftell( f ) + 1; - fclose(f); - } +// /* try to read the classifier from directory */ +// for( n = 0; ; n++ ) +// { +// sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n ); +// FILE *f = fopen( name, "rb" ); +// if( !f ) +// break; +// fseek( f, 0, SEEK_END ); +// size += ftell( f ) + 1; +// fclose(f); +// } - if( n == 0 && slash[0] ) - return (CvHaarClassifierCascade *)cvLoad( directory ); +// if( n == 0 && slash[0] ) +// return (CvHaarClassifierCascade *)cvLoad( directory ); - if( n == 0 ) - CV_Error( CV_StsBadArg, "Invalid path" ); +// if( n == 0 ) +// CV_Error( CV_StsBadArg, "Invalid path" ); - size += (n + 1) * sizeof(char *); - input_cascade = (const char **)cvAlloc( size ); - ptr = (char *)(input_cascade + n + 1); +// size += (n + 1) * sizeof(char *); +// input_cascade = (const char **)cvAlloc( size ); +// ptr = (char *)(input_cascade + n + 1); - for( i = 0; i < n; i++ ) - { - sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i ); - FILE *f = fopen( name, "rb" ); - if( !f ) - CV_Error( CV_StsError, "" ); - fseek( f, 0, SEEK_END ); - size = ftell( f ); - fseek( f, 0, SEEK_SET ); - fread( ptr, 1, size, f ); - fclose(f); - input_cascade[i] = ptr; - ptr += size; - *ptr++ = '\0'; - } +// for( i = 0; i < n; i++ ) +// { +// sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i ); +// FILE *f = fopen( name, "rb" ); +// if( !f ) +// CV_Error( CV_StsError, "" ); +// fseek( f, 0, SEEK_END ); +// size = ftell( f ); +// fseek( f, 0, SEEK_SET ); +// CV_Assert((size_t)size == fread( ptr, 1, size, f )); +// fclose(f); +// input_cascade[i] = ptr; +// ptr += size; +// *ptr++ = '\0'; +// } - input_cascade[n] = 0; - cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size ); +// input_cascade[n] = 0; +// cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size ); - if( input_cascade ) - cvFree( &input_cascade ); +// if( input_cascade ) +// cvFree( &input_cascade ); - return cascade; -} +// return cascade; +// } -CV_IMPL void -gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade ) -{ - if( _cascade && *_cascade ) - { - int i, j; - CvHaarClassifierCascade *cascade = *_cascade; +// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade ) +// { +// if( _cascade && *_cascade ) +// { +// int i, j; +// CvHaarClassifierCascade *cascade = *_cascade; - for( i = 0; i < cascade->count; i++ ) - { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature ); - cvFree( &cascade->stage_classifier[i].classifier ); - } - gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade ); - cvFree( _cascade ); - } -} +// for( i = 0; i < cascade->count; i++ ) +// { +// for( j = 0; j < cascade->stage_classifier[i].count; j++ ) +// cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature ); +// cvFree( &cascade->stage_classifier[i].classifier ); +// } +// gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade ); +// cvFree( _cascade ); +// } +// } /****************************************************************************************\ @@ -1641,524 +1627,520 @@ gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade ) #define ICV_HAAR_PARENT_NAME "parent" #define ICV_HAAR_NEXT_NAME "next" -int -gpuIsHaarClassifier( const void *struct_ptr ) -{ - return CV_IS_HAAR_CLASSIFIER( struct_ptr ); -} +// static int gpuIsHaarClassifier( const void *struct_ptr ) +// { +// return CV_IS_HAAR_CLASSIFIER( struct_ptr ); +// } -void * -gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) -{ - CvHaarClassifierCascade *cascade = NULL; +// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) +// { +// CvHaarClassifierCascade *cascade = NULL; - char buf[256]; - CvFileNode *seq_fn = NULL; /* sequence */ - CvFileNode *fn = NULL; - CvFileNode *stages_fn = NULL; - CvSeqReader stages_reader; - int n; - int i, j, k, l; - int parent, next; +// char buf[256]; +// CvFileNode *seq_fn = NULL; /* sequence */ +// CvFileNode *fn = NULL; +// CvFileNode *stages_fn = NULL; +// CvSeqReader stages_reader; +// int n; +// int i, j, k, l; +// int parent, next; - stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME ); - if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) ) - CV_Error( CV_StsError, "Invalid stages node" ); +// stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME ); +// if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) ) +// CV_Error( CV_StsError, "Invalid stages node" ); - n = stages_fn->data.seq->total; - cascade = gpuCreateHaarClassifierCascade(n); +// n = stages_fn->data.seq->total; +// cascade = gpuCreateHaarClassifierCascade(n); - /* read size */ - seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME ); - if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 ) - CV_Error( CV_StsError, "size node is not a valid sequence." ); - fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) - CV_Error( CV_StsError, "Invalid size node: width must be positive integer" ); - cascade->orig_window_size.width = fn->data.i; - fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) - CV_Error( CV_StsError, "Invalid size node: height must be positive integer" ); - cascade->orig_window_size.height = fn->data.i; +// /* read size */ +// seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME ); +// if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 ) +// CV_Error( CV_StsError, "size node is not a valid sequence." ); +// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) +// CV_Error( CV_StsError, "Invalid size node: width must be positive integer" ); +// cascade->orig_window_size.width = fn->data.i; +// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) +// CV_Error( CV_StsError, "Invalid size node: height must be positive integer" ); +// cascade->orig_window_size.height = fn->data.i; - cvStartReadSeq( stages_fn->data.seq, &stages_reader ); - for( i = 0; i < n; ++i ) - { - CvFileNode *stage_fn; - CvFileNode *trees_fn; - CvSeqReader trees_reader; +// cvStartReadSeq( stages_fn->data.seq, &stages_reader ); +// for( i = 0; i < n; ++i ) +// { +// CvFileNode *stage_fn; +// CvFileNode *trees_fn; +// CvSeqReader trees_reader; - stage_fn = (CvFileNode *) stages_reader.ptr; - if( !CV_NODE_IS_MAP( stage_fn->tag ) ) - { - sprintf( buf, "Invalid stage %d", i ); - CV_Error( CV_StsError, buf ); - } +// stage_fn = (CvFileNode *) stages_reader.ptr; +// if( !CV_NODE_IS_MAP( stage_fn->tag ) ) +// { +// sprintf( buf, "Invalid stage %d", i ); +// CV_Error( CV_StsError, buf ); +// } - trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME ); - if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag ) - || trees_fn->data.seq->total <= 0 ) - { - sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i ); - CV_Error( CV_StsError, buf ); - } +// trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME ); +// if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag ) +// || trees_fn->data.seq->total <= 0 ) +// { +// sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i ); +// CV_Error( CV_StsError, buf ); +// } - cascade->stage_classifier[i].classifier = - (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total - * sizeof( cascade->stage_classifier[i].classifier[0] ) ); - for( j = 0; j < trees_fn->data.seq->total; ++j ) - { - cascade->stage_classifier[i].classifier[j].haar_feature = NULL; - } - cascade->stage_classifier[i].count = trees_fn->data.seq->total; +// cascade->stage_classifier[i].classifier = +// (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total +// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); +// for( j = 0; j < trees_fn->data.seq->total; ++j ) +// { +// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; +// } +// cascade->stage_classifier[i].count = trees_fn->data.seq->total; - cvStartReadSeq( trees_fn->data.seq, &trees_reader ); - for( j = 0; j < trees_fn->data.seq->total; ++j ) - { - CvFileNode *tree_fn; - CvSeqReader tree_reader; - CvHaarClassifier *classifier; - int last_idx; +// cvStartReadSeq( trees_fn->data.seq, &trees_reader ); +// for( j = 0; j < trees_fn->data.seq->total; ++j ) +// { +// CvFileNode *tree_fn; +// CvSeqReader tree_reader; +// CvHaarClassifier *classifier; +// int last_idx; - classifier = &cascade->stage_classifier[i].classifier[j]; - tree_fn = (CvFileNode *) trees_reader.ptr; - if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 ) - { - sprintf( buf, "Tree node is not a valid sequence." - " (stage %d, tree %d)", i, j ); - CV_Error( CV_StsError, buf ); - } +// classifier = &cascade->stage_classifier[i].classifier[j]; +// tree_fn = (CvFileNode *) trees_reader.ptr; +// if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 ) +// { +// sprintf( buf, "Tree node is not a valid sequence." +// " (stage %d, tree %d)", i, j ); +// CV_Error( CV_StsError, buf ); +// } - classifier->count = tree_fn->data.seq->total; - classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); - classifier->threshold = (float *) (classifier->haar_feature + classifier->count); - classifier->left = (int *) (classifier->threshold + classifier->count); - classifier->right = (int *) (classifier->left + classifier->count); - classifier->alpha = (float *) (classifier->right + classifier->count); +// classifier->count = tree_fn->data.seq->total; +// classifier->haar_feature = (CvHaarFeature *) cvAlloc( +// classifier->count * ( sizeof( *classifier->haar_feature ) + +// sizeof( *classifier->threshold ) + +// sizeof( *classifier->left ) + +// sizeof( *classifier->right ) ) + +// (classifier->count + 1) * sizeof( *classifier->alpha ) ); +// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); +// classifier->left = (int *) (classifier->threshold + classifier->count); +// classifier->right = (int *) (classifier->left + classifier->count); +// classifier->alpha = (float *) (classifier->right + classifier->count); - cvStartReadSeq( tree_fn->data.seq, &tree_reader ); - for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k ) - { - CvFileNode *node_fn; - CvFileNode *feature_fn; - CvFileNode *rects_fn; - CvSeqReader rects_reader; +// cvStartReadSeq( tree_fn->data.seq, &tree_reader ); +// for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k ) +// { +// CvFileNode *node_fn; +// CvFileNode *feature_fn; +// CvFileNode *rects_fn; +// CvSeqReader rects_reader; - node_fn = (CvFileNode *) tree_reader.ptr; - if( !CV_NODE_IS_MAP( node_fn->tag ) ) - { - sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)", - k, i, j ); - CV_Error( CV_StsError, buf ); - } - feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME ); - if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) ) - { - sprintf( buf, "Feature node is not a valid map. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME ); - if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag ) - || rects_fn->data.seq->total < 1 - || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) - { - sprintf( buf, "Rects node is not a valid sequence. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - cvStartReadSeq( rects_fn->data.seq, &rects_reader ); - for( l = 0; l < rects_fn->data.seq->total; ++l ) - { - CvFileNode *rect_fn; - CvRect r; +// node_fn = (CvFileNode *) tree_reader.ptr; +// if( !CV_NODE_IS_MAP( node_fn->tag ) ) +// { +// sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)", +// k, i, j ); +// CV_Error( CV_StsError, buf ); +// } +// feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME ); +// if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) ) +// { +// sprintf( buf, "Feature node is not a valid map. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME ); +// if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag ) +// || rects_fn->data.seq->total < 1 +// || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) +// { +// sprintf( buf, "Rects node is not a valid sequence. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// cvStartReadSeq( rects_fn->data.seq, &rects_reader ); +// for( l = 0; l < rects_fn->data.seq->total; ++l ) +// { +// CvFileNode *rect_fn; +// CvRect r; - rect_fn = (CvFileNode *) rects_reader.ptr; - if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 ) - { - sprintf( buf, "Rect %d is not a valid sequence. " - "(stage %d, tree %d, node %d)", l, i, j, k ); - CV_Error( CV_StsError, buf ); - } +// rect_fn = (CvFileNode *) rects_reader.ptr; +// if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 ) +// { +// sprintf( buf, "Rect %d is not a valid sequence. " +// "(stage %d, tree %d, node %d)", l, i, j, k ); +// CV_Error( CV_StsError, buf ); +// } - fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) - { - sprintf( buf, "x coordinate must be non-negative integer. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); - CV_Error( CV_StsError, buf ); - } - r.x = fn->data.i; - fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) - { - sprintf( buf, "y coordinate must be non-negative integer. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); - CV_Error( CV_StsError, buf ); - } - r.y = fn->data.i; - fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 - || r.x + fn->data.i > cascade->orig_window_size.width ) - { - sprintf( buf, "width must be positive integer and " - "(x + width) must not exceed window width. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); - CV_Error( CV_StsError, buf ); - } - r.width = fn->data.i; - fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 ); - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 - || r.y + fn->data.i > cascade->orig_window_size.height ) - { - sprintf( buf, "height must be positive integer and " - "(y + height) must not exceed window height. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); - CV_Error( CV_StsError, buf ); - } - r.height = fn->data.i; - fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 ); - if( !CV_NODE_IS_REAL( fn->tag ) ) - { - sprintf( buf, "weight must be real number. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); - CV_Error( CV_StsError, buf ); - } +// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) +// { +// sprintf( buf, "x coordinate must be non-negative integer. " +// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); +// CV_Error( CV_StsError, buf ); +// } +// r.x = fn->data.i; +// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) +// { +// sprintf( buf, "y coordinate must be non-negative integer. " +// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); +// CV_Error( CV_StsError, buf ); +// } +// r.y = fn->data.i; +// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 +// || r.x + fn->data.i > cascade->orig_window_size.width ) +// { +// sprintf( buf, "width must be positive integer and " +// "(x + width) must not exceed window width. " +// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); +// CV_Error( CV_StsError, buf ); +// } +// r.width = fn->data.i; +// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 ); +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 +// || r.y + fn->data.i > cascade->orig_window_size.height ) +// { +// sprintf( buf, "height must be positive integer and " +// "(y + height) must not exceed window height. " +// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); +// CV_Error( CV_StsError, buf ); +// } +// r.height = fn->data.i; +// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 ); +// if( !CV_NODE_IS_REAL( fn->tag ) ) +// { +// sprintf( buf, "weight must be real number. " +// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); +// CV_Error( CV_StsError, buf ); +// } - classifier->haar_feature[k].rect[l].weight = (float) fn->data.f; - classifier->haar_feature[k].rect[l].r = r; +// classifier->haar_feature[k].rect[l].weight = (float) fn->data.f; +// classifier->haar_feature[k].rect[l].r = r; - CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader ); - } /* for each rect */ - for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l ) - { - classifier->haar_feature[k].rect[l].weight = 0; - classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 ); - } +// CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader ); +// } /* for each rect */ +// for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l ) +// { +// classifier->haar_feature[k].rect[l].weight = 0; +// classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 ); +// } - fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME); - if( !fn || !CV_NODE_IS_INT( fn->tag ) ) - { - sprintf( buf, "tilted must be 0 or 1. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - classifier->haar_feature[k].tilted = ( fn->data.i != 0 ); - fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME); - if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) - { - sprintf( buf, "threshold must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - classifier->threshold[k] = (float) fn->data.f; - fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME); - if( fn ) - { - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k - || fn->data.i >= tree_fn->data.seq->total ) - { - sprintf( buf, "left node must be valid node number. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - /* left node */ - classifier->left[k] = fn->data.i; - } - else - { - fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME ); - if( !fn ) - { - sprintf( buf, "left node or left value must be specified. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - if( !CV_NODE_IS_REAL( fn->tag ) ) - { - sprintf( buf, "left value must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - /* left value */ - if( last_idx >= classifier->count + 1 ) - { - sprintf( buf, "Tree structure is broken: too many values. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - classifier->left[k] = -last_idx; - classifier->alpha[last_idx++] = (float) fn->data.f; - } - fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME); - if( fn ) - { - if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k - || fn->data.i >= tree_fn->data.seq->total ) - { - sprintf( buf, "right node must be valid node number. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - /* right node */ - classifier->right[k] = fn->data.i; - } - else - { - fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME ); - if( !fn ) - { - sprintf( buf, "right node or right value must be specified. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - if( !CV_NODE_IS_REAL( fn->tag ) ) - { - sprintf( buf, "right value must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - /* right value */ - if( last_idx >= classifier->count + 1 ) - { - sprintf( buf, "Tree structure is broken: too many values. " - "(stage %d, tree %d, node %d)", i, j, k ); - CV_Error( CV_StsError, buf ); - } - classifier->right[k] = -last_idx; - classifier->alpha[last_idx++] = (float) fn->data.f; - } +// fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME); +// if( !fn || !CV_NODE_IS_INT( fn->tag ) ) +// { +// sprintf( buf, "tilted must be 0 or 1. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// classifier->haar_feature[k].tilted = ( fn->data.i != 0 ); +// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME); +// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) +// { +// sprintf( buf, "threshold must be real number. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// classifier->threshold[k] = (float) fn->data.f; +// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME); +// if( fn ) +// { +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k +// || fn->data.i >= tree_fn->data.seq->total ) +// { +// sprintf( buf, "left node must be valid node number. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// /* left node */ +// classifier->left[k] = fn->data.i; +// } +// else +// { +// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME ); +// if( !fn ) +// { +// sprintf( buf, "left node or left value must be specified. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// if( !CV_NODE_IS_REAL( fn->tag ) ) +// { +// sprintf( buf, "left value must be real number. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// /* left value */ +// if( last_idx >= classifier->count + 1 ) +// { +// sprintf( buf, "Tree structure is broken: too many values. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// classifier->left[k] = -last_idx; +// classifier->alpha[last_idx++] = (float) fn->data.f; +// } +// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME); +// if( fn ) +// { +// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k +// || fn->data.i >= tree_fn->data.seq->total ) +// { +// sprintf( buf, "right node must be valid node number. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// /* right node */ +// classifier->right[k] = fn->data.i; +// } +// else +// { +// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME ); +// if( !fn ) +// { +// sprintf( buf, "right node or right value must be specified. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// if( !CV_NODE_IS_REAL( fn->tag ) ) +// { +// sprintf( buf, "right value must be real number. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// /* right value */ +// if( last_idx >= classifier->count + 1 ) +// { +// sprintf( buf, "Tree structure is broken: too many values. " +// "(stage %d, tree %d, node %d)", i, j, k ); +// CV_Error( CV_StsError, buf ); +// } +// classifier->right[k] = -last_idx; +// classifier->alpha[last_idx++] = (float) fn->data.f; +// } - CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader ); - } /* for each node */ - if( last_idx != classifier->count + 1 ) - { - sprintf( buf, "Tree structure is broken: too few values. " - "(stage %d, tree %d)", i, j ); - CV_Error( CV_StsError, buf ); - } +// CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader ); +// } /* for each node */ +// if( last_idx != classifier->count + 1 ) +// { +// sprintf( buf, "Tree structure is broken: too few values. " +// "(stage %d, tree %d)", i, j ); +// CV_Error( CV_StsError, buf ); +// } - CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader ); - } /* for each tree */ +// CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader ); +// } /* for each tree */ - fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME); - if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) - { - sprintf( buf, "stage threshold must be real number. (stage %d)", i ); - CV_Error( CV_StsError, buf ); - } - cascade->stage_classifier[i].threshold = (float) fn->data.f; +// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME); +// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) +// { +// sprintf( buf, "stage threshold must be real number. (stage %d)", i ); +// CV_Error( CV_StsError, buf ); +// } +// cascade->stage_classifier[i].threshold = (float) fn->data.f; - parent = i - 1; - next = -1; +// parent = i - 1; +// next = -1; - fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME ); - if( !fn || !CV_NODE_IS_INT( fn->tag ) - || fn->data.i < -1 || fn->data.i >= cascade->count ) - { - sprintf( buf, "parent must be integer number. (stage %d)", i ); - CV_Error( CV_StsError, buf ); - } - parent = fn->data.i; - fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME ); - if( !fn || !CV_NODE_IS_INT( fn->tag ) - || fn->data.i < -1 || fn->data.i >= cascade->count ) - { - sprintf( buf, "next must be integer number. (stage %d)", i ); - CV_Error( CV_StsError, buf ); - } - next = fn->data.i; +// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME ); +// if( !fn || !CV_NODE_IS_INT( fn->tag ) +// || fn->data.i < -1 || fn->data.i >= cascade->count ) +// { +// sprintf( buf, "parent must be integer number. (stage %d)", i ); +// CV_Error( CV_StsError, buf ); +// } +// parent = fn->data.i; +// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME ); +// if( !fn || !CV_NODE_IS_INT( fn->tag ) +// || fn->data.i < -1 || fn->data.i >= cascade->count ) +// { +// sprintf( buf, "next must be integer number. (stage %d)", i ); +// CV_Error( CV_StsError, buf ); +// } +// next = fn->data.i; - cascade->stage_classifier[i].parent = parent; - cascade->stage_classifier[i].next = next; - cascade->stage_classifier[i].child = -1; +// cascade->stage_classifier[i].parent = parent; +// cascade->stage_classifier[i].next = next; +// cascade->stage_classifier[i].child = -1; - if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) - { - cascade->stage_classifier[parent].child = i; - } +// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) +// { +// cascade->stage_classifier[parent].child = i; +// } - CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader ); - } /* for each stage */ +// CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader ); +// } /* for each stage */ - return cascade; -} +// return cascade; +// } -void -gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr, - CvAttrList attributes ) -{ - int i, j, k, l; - char buf[256]; - const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr; +// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr, +// CvAttrList attributes ) +// { +// int i, j, k, l; +// char buf[256]; +// const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr; - /* TODO: parameters check */ +// /* TODO: parameters check */ - cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes ); +// cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes ); - cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW ); - cvWriteInt( fs, NULL, cascade->orig_window_size.width ); - cvWriteInt( fs, NULL, cascade->orig_window_size.height ); - cvEndWriteStruct( fs ); /* size */ +// cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW ); +// cvWriteInt( fs, NULL, cascade->orig_window_size.width ); +// cvWriteInt( fs, NULL, cascade->orig_window_size.height ); +// cvEndWriteStruct( fs ); /* size */ - cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ ); - for( i = 0; i < cascade->count; ++i ) - { - cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); - sprintf( buf, "stage %d", i ); - cvWriteComment( fs, buf, 1 ); +// cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ ); +// for( i = 0; i < cascade->count; ++i ) +// { +// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); +// sprintf( buf, "stage %d", i ); +// cvWriteComment( fs, buf, 1 ); - cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ ); +// cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ ); - for( j = 0; j < cascade->stage_classifier[i].count; ++j ) - { - CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j]; +// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) +// { +// CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j]; - cvStartWriteStruct( fs, NULL, CV_NODE_SEQ ); - sprintf( buf, "tree %d", j ); - cvWriteComment( fs, buf, 1 ); +// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ ); +// sprintf( buf, "tree %d", j ); +// cvWriteComment( fs, buf, 1 ); - for( k = 0; k < tree->count; ++k ) - { - CvHaarFeature *feature = &tree->haar_feature[k]; +// for( k = 0; k < tree->count; ++k ) +// { +// CvHaarFeature *feature = &tree->haar_feature[k]; - cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); - if( k ) - { - sprintf( buf, "node %d", k ); - } - else - { - sprintf( buf, "root node" ); - } - cvWriteComment( fs, buf, 1 ); +// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); +// if( k ) +// { +// sprintf( buf, "node %d", k ); +// } +// else +// { +// sprintf( buf, "root node" ); +// } +// cvWriteComment( fs, buf, 1 ); - cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP ); +// cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP ); - cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ ); - for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l ) - { - cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW ); - cvWriteInt( fs, NULL, feature->rect[l].r.x ); - cvWriteInt( fs, NULL, feature->rect[l].r.y ); - cvWriteInt( fs, NULL, feature->rect[l].r.width ); - cvWriteInt( fs, NULL, feature->rect[l].r.height ); - cvWriteReal( fs, NULL, feature->rect[l].weight ); - cvEndWriteStruct( fs ); /* rect */ - } - cvEndWriteStruct( fs ); /* rects */ - cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted ); - cvEndWriteStruct( fs ); /* feature */ +// cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ ); +// for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l ) +// { +// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW ); +// cvWriteInt( fs, NULL, feature->rect[l].r.x ); +// cvWriteInt( fs, NULL, feature->rect[l].r.y ); +// cvWriteInt( fs, NULL, feature->rect[l].r.width ); +// cvWriteInt( fs, NULL, feature->rect[l].r.height ); +// cvWriteReal( fs, NULL, feature->rect[l].weight ); +// cvEndWriteStruct( fs ); /* rect */ +// } +// cvEndWriteStruct( fs ); /* rects */ +// cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted ); +// cvEndWriteStruct( fs ); /* feature */ - cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]); +// cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]); - if( tree->left[k] > 0 ) - { - cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] ); - } - else - { - cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME, - tree->alpha[-tree->left[k]] ); - } +// if( tree->left[k] > 0 ) +// { +// cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] ); +// } +// else +// { +// cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME, +// tree->alpha[-tree->left[k]] ); +// } - if( tree->right[k] > 0 ) - { - cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] ); - } - else - { - cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME, - tree->alpha[-tree->right[k]] ); - } +// if( tree->right[k] > 0 ) +// { +// cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] ); +// } +// else +// { +// cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME, +// tree->alpha[-tree->right[k]] ); +// } - cvEndWriteStruct( fs ); /* split */ - } +// cvEndWriteStruct( fs ); /* split */ +// } - cvEndWriteStruct( fs ); /* tree */ - } +// cvEndWriteStruct( fs ); /* tree */ +// } - cvEndWriteStruct( fs ); /* trees */ +// cvEndWriteStruct( fs ); /* trees */ - cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold); - cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent ); - cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next ); +// cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold); +// cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent ); +// cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next ); - cvEndWriteStruct( fs ); /* stage */ - } /* for each stage */ +// cvEndWriteStruct( fs ); /* stage */ +// } /* for each stage */ - cvEndWriteStruct( fs ); /* stages */ - cvEndWriteStruct( fs ); /* root */ -} +// cvEndWriteStruct( fs ); /* stages */ +// cvEndWriteStruct( fs ); /* root */ +// } -void * -gpuCloneHaarClassifier( const void *struct_ptr ) -{ - CvHaarClassifierCascade *cascade = NULL; +// static void * gpuCloneHaarClassifier( const void *struct_ptr ) +// { +// CvHaarClassifierCascade *cascade = NULL; - int i, j, k, n; - const CvHaarClassifierCascade *cascade_src = - (const CvHaarClassifierCascade *) struct_ptr; +// int i, j, k, n; +// const CvHaarClassifierCascade *cascade_src = +// (const CvHaarClassifierCascade *) struct_ptr; - n = cascade_src->count; - cascade = gpuCreateHaarClassifierCascade(n); - cascade->orig_window_size = cascade_src->orig_window_size; +// n = cascade_src->count; +// cascade = gpuCreateHaarClassifierCascade(n); +// cascade->orig_window_size = cascade_src->orig_window_size; - for( i = 0; i < n; ++i ) - { - cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent; - cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next; - cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child; - cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold; +// for( i = 0; i < n; ++i ) +// { +// cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent; +// cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next; +// cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child; +// cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold; - cascade->stage_classifier[i].count = 0; - cascade->stage_classifier[i].classifier = - (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count - * sizeof( cascade->stage_classifier[i].classifier[0] ) ); +// cascade->stage_classifier[i].count = 0; +// cascade->stage_classifier[i].classifier = +// (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count +// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); - cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count; +// cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count; - for( j = 0; j < cascade->stage_classifier[i].count; ++j ) - cascade->stage_classifier[i].classifier[j].haar_feature = NULL; +// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) +// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; - for( j = 0; j < cascade->stage_classifier[i].count; ++j ) - { - const CvHaarClassifier *classifier_src = - &cascade_src->stage_classifier[i].classifier[j]; - CvHaarClassifier *classifier = - &cascade->stage_classifier[i].classifier[j]; +// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) +// { +// const CvHaarClassifier *classifier_src = +// &cascade_src->stage_classifier[i].classifier[j]; +// CvHaarClassifier *classifier = +// &cascade->stage_classifier[i].classifier[j]; - classifier->count = classifier_src->count; - classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); - classifier->threshold = (float *) (classifier->haar_feature + classifier->count); - classifier->left = (int *) (classifier->threshold + classifier->count); - classifier->right = (int *) (classifier->left + classifier->count); - classifier->alpha = (float *) (classifier->right + classifier->count); - for( k = 0; k < classifier->count; ++k ) - { - classifier->haar_feature[k] = classifier_src->haar_feature[k]; - classifier->threshold[k] = classifier_src->threshold[k]; - classifier->left[k] = classifier_src->left[k]; - classifier->right[k] = classifier_src->right[k]; - classifier->alpha[k] = classifier_src->alpha[k]; - } - classifier->alpha[classifier->count] = - classifier_src->alpha[classifier->count]; - } - } +// classifier->count = classifier_src->count; +// classifier->haar_feature = (CvHaarFeature *) cvAlloc( +// classifier->count * ( sizeof( *classifier->haar_feature ) + +// sizeof( *classifier->threshold ) + +// sizeof( *classifier->left ) + +// sizeof( *classifier->right ) ) + +// (classifier->count + 1) * sizeof( *classifier->alpha ) ); +// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); +// classifier->left = (int *) (classifier->threshold + classifier->count); +// classifier->right = (int *) (classifier->left + classifier->count); +// classifier->alpha = (float *) (classifier->right + classifier->count); +// for( k = 0; k < classifier->count; ++k ) +// { +// classifier->haar_feature[k] = classifier_src->haar_feature[k]; +// classifier->threshold[k] = classifier_src->threshold[k]; +// classifier->left[k] = classifier_src->left[k]; +// classifier->right[k] = classifier_src->right[k]; +// classifier->alpha[k] = classifier_src->alpha[k]; +// } +// classifier->alpha[classifier->count] = +// classifier_src->alpha[classifier->count]; +// } +// } - return cascade; -} +// return cascade; +// } #if 0 CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier, @@ -2170,41 +2152,41 @@ CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier, namespace cv { - HaarClassifierCascade::HaarClassifierCascade() {} - HaarClassifierCascade::HaarClassifierCascade(const String &filename) - { - load(filename); - } +HaarClassifierCascade::HaarClassifierCascade() {} +HaarClassifierCascade::HaarClassifierCascade(const String &filename) +{ + load(filename); +} - bool HaarClassifierCascade::load(const String &filename) - { - cascade = Ptr((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0)); - return (CvHaarClassifierCascade *)cascade != 0; - } +bool HaarClassifierCascade::load(const String &filename) +{ + cascade = Ptr((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0)); + return (CvHaarClassifierCascade *)cascade != 0; +} - void HaarClassifierCascade::detectMultiScale( const Mat &image, - Vector &objects, double scaleFactor, - int minNeighbors, int flags, - Size minSize ) - { - MemStorage storage(cvCreateMemStorage(0)); - CvMat _image = image; - CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor, - minNeighbors, flags, minSize ); - Seq(_objects).copyTo(objects); - } +void HaarClassifierCascade::detectMultiScale( const Mat &image, + Vector &objects, double scaleFactor, + int minNeighbors, int flags, + Size minSize ) +{ + MemStorage storage(cvCreateMemStorage(0)); + CvMat _image = image; + CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor, + minNeighbors, flags, minSize ); + Seq(_objects).copyTo(objects); +} - int HaarClassifierCascade::runAt(Point pt, int startStage, int) const - { - return gpuRunHaarClassifierCascade(cascade, pt, startStage); - } +int HaarClassifierCascade::runAt(Point pt, int startStage, int) const +{ + return gpuRunHaarClassifierCascade(cascade, pt, startStage); +} - void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum, - const Mat &tilted, double scale ) - { - CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; - gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); - } +void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum, + const Mat &tilted, double scale ) +{ + CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; + gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); +} } #endif @@ -2497,8 +2479,7 @@ size_t p_offset ) */ -CV_IMPL int -gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, +static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */) { /* @@ -2579,116 +2560,116 @@ CvPoint pt, int start_stage */) namespace cv { - namespace ocl +namespace ocl +{ + +struct gpuHaarDetectObjects_ScaleImage_Invoker +{ + gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade, + int _stripSize, double _factor, + const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, + Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) { + cascade = _cascade; + stripSize = _stripSize; + factor = _factor; + sum1 = _sum1; + sqsum1 = _sqsum1; + norm1 = _norm1; + mask1 = _mask1; + equRect = _equRect; + vec = &_vec; + } - struct gpuHaarDetectObjects_ScaleImage_Invoker + void operator()( const BlockedRange &range ) const + { + Size winSize0 = cascade->orig_window_size; + Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor)); + int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height); + Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); + int x, y, ystep = factor > 2 ? 1 : 2; + + for( y = y1; y < y2; y += ystep ) + for( x = 0; x < ssz.width; x += ystep ) + { + if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 ) + vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor), + winSize.width, winSize.height)); + } + } + + const CvHaarClassifierCascade *cascade; + int stripSize; + double factor; + Mat sum1, sqsum1, *norm1, *mask1; + Rect equRect; + ConcurrentRectVector *vec; +}; + + +struct gpuHaarDetectObjects_ScaleCascade_Invoker +{ + gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade, + Size _winsize, const Range &_xrange, double _ystep, + size_t _sumstep, const int **_p, const int **_pq, + ConcurrentRectVector &_vec ) + { + cascade = _cascade; + winsize = _winsize; + xrange = _xrange; + ystep = _ystep; + sumstep = _sumstep; + p = _p; + pq = _pq; + vec = &_vec; + } + + void operator()( const BlockedRange &range ) const + { + int iy, startY = range.begin(), endY = range.end(); + const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3]; + const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3]; + bool doCannyPruning = p0 != 0; + int sstep = (int)(sumstep / sizeof(p0[0])); + + for( iy = startY; iy < endY; iy++ ) { - gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade, - int _stripSize, double _factor, - const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, - Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) + int ix, y = cvRound(iy * ystep), ixstep = 1; + for( ix = xrange.start; ix < xrange.end; ix += ixstep ) { - cascade = _cascade; - stripSize = _stripSize; - factor = _factor; - sum1 = _sum1; - sqsum1 = _sqsum1; - norm1 = _norm1; - mask1 = _mask1; - equRect = _equRect; - vec = &_vec; - } + int x = cvRound(ix * ystep); // it should really be ystep, not ixstep - void operator()( const BlockedRange &range ) const - { - Size winSize0 = cascade->orig_window_size; - Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor)); - int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height); - Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); - int x, y, ystep = factor > 2 ? 1 : 2; - - for( y = y1; y < y2; y += ystep ) - for( x = 0; x < ssz.width; x += ystep ) - { - if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 ) - vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor), - winSize.width, winSize.height)); - } - } - - const CvHaarClassifierCascade *cascade; - int stripSize; - double factor; - Mat sum1, sqsum1, *norm1, *mask1; - Rect equRect; - ConcurrentRectVector *vec; - }; - - - struct gpuHaarDetectObjects_ScaleCascade_Invoker - { - gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade, - Size _winsize, const Range &_xrange, double _ystep, - size_t _sumstep, const int **_p, const int **_pq, - ConcurrentRectVector &_vec ) - { - cascade = _cascade; - winsize = _winsize; - xrange = _xrange; - ystep = _ystep; - sumstep = _sumstep; - p = _p; - pq = _pq; - vec = &_vec; - } - - void operator()( const BlockedRange &range ) const - { - int iy, startY = range.begin(), endY = range.end(); - const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3]; - const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3]; - bool doCannyPruning = p0 != 0; - int sstep = (int)(sumstep / sizeof(p0[0])); - - for( iy = startY; iy < endY; iy++ ) + if( doCannyPruning ) { - int ix, y = cvRound(iy * ystep), ixstep = 1; - for( ix = xrange.start; ix < xrange.end; ix += ixstep ) + int offset = y * sstep + x; + int s = p0[offset] - p1[offset] - p2[offset] + p3[offset]; + int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset]; + if( s < 100 || sq < 20 ) { - int x = cvRound(ix * ystep); // it should really be ystep, not ixstep - - if( doCannyPruning ) - { - int offset = y * sstep + x; - int s = p0[offset] - p1[offset] - p2[offset] + p3[offset]; - int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset]; - if( s < 100 || sq < 20 ) - { - ixstep = 2; - continue; - } - } - - int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */); - if( result > 0 ) - vec->push_back(Rect(x, y, winsize.width, winsize.height)); - ixstep = result != 0 ? 1 : 2; + ixstep = 2; + continue; } } + + int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */); + if( result > 0 ) + vec->push_back(Rect(x, y, winsize.width, winsize.height)); + ixstep = result != 0 ? 1 : 2; } - - const CvHaarClassifierCascade *cascade; - double ystep; - size_t sumstep; - Size winsize; - Range xrange; - const int **p; - const int **pq; - ConcurrentRectVector *vec; - }; - + } } + + const CvHaarClassifierCascade *cascade; + double ystep; + size_t sumstep; + Size winsize; + Range xrange; + const int **p; + const int **pq; + ConcurrentRectVector *vec; +}; + +} } /* diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 27ce211de..a93f86ecb 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -171,7 +171,7 @@ namespace cv typedef void (*gpuThresh_t)(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type); - void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) + static void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) { CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); Context *clCxt = src.clCxt; @@ -202,7 +202,7 @@ namespace cv openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } - void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) + static void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) { CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); Context *clCxt = src.clCxt; @@ -388,7 +388,7 @@ namespace cv //////////////////////////////////////////////////////////////////////////////////////////// // resize - void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation) + static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation) { CV_Assert( (src.channels() == dst.channels()) ); Context *clCxt = src.clCxt; @@ -593,7 +593,7 @@ namespace cv int dstOffset = dst.offset / dst.elemSize(); int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; - int bordertype_index; + size_t bordertype_index; for(bordertype_index = 0; bordertype_index < sizeof(__bordertype) / sizeof(int); bordertype_index++) { if(__bordertype[bordertype_index] == bordertype) @@ -826,9 +826,9 @@ namespace cv { #define Sd(y,x) (Sd[y*3+x]) #define Dd(y,x) (Dd[y*3+x]) -#define det3(m) (m(0,0)*(m(1,1)*m(2,2) - m(1,2)*m(2,1)) - \ - m(0,1)*(m(1,0)*m(2,2) - m(1,2)*m(2,0)) + \ - m(0,2)*(m(1,0)*m(2,1) - m(1,1)*m(2,0))) +#define det3(m) (m(0,0)*(m(1,1)*m(2,2) - m(1,2)*m(2,1)) - \ + m(0,1)*(m(1,0)*m(2,2) - m(1,2)*m(2,0)) + \ + m(0,2)*(m(1,0)*m(2,1) - m(1,1)*m(2,0))) double *Sd = M; double *Dd = M; double d = det3(Sd); @@ -1018,12 +1018,19 @@ namespace cv int warpInd = (flags & WARP_INVERSE_MAP) >> 4; F coeffs[2][3]; - Mat coeffsMat(2, 3, CV_64F, (void *)coeffs); + + double coeffsM[2*3]; + Mat coeffsMat(2, 3, CV_64F, (void *)coeffsM); M.convertTo(coeffsMat, coeffsMat.type()); if(!warpInd) { - convert_coeffs((F *)(&coeffs[0][0])); + convert_coeffs(coeffsM); } + + for(int i = 0; i < 2; ++i) + for(int j = 0; j < 3; ++j) + coeffs[i][j] = coeffsM[i*3+j]; + warpAffine_gpu(src, dst, coeffs, interpolation); } @@ -1041,13 +1048,19 @@ namespace cv int warpInd = (flags & WARP_INVERSE_MAP) >> 4; double coeffs[3][3]; - Mat coeffsMat(3, 3, CV_64F, (void *)coeffs); + + double coeffsM[3*3]; + Mat coeffsMat(3, 3, CV_64F, (void *)coeffsM); M.convertTo(coeffsMat, coeffsMat.type()); if(!warpInd) { - invert((double *)(&coeffs[0][0])); + invert(coeffsM); } + for(int i = 0; i < 3; ++i) + for(int j = 0; j < 3; ++j) + coeffs[i][j] = coeffsM[i*3+j]; + warpPerspective_gpu(src, dst, coeffs, interpolation); } @@ -1144,7 +1157,7 @@ namespace cv } /////////////////////// corner ////////////////////////////// - void extractCovData(const oclMat &src, oclMat &Dx, oclMat &Dy, + static void extractCovData(const oclMat &src, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, int borderType) { CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1); @@ -1174,7 +1187,7 @@ namespace cv CV_Assert(Dx.offset == 0 && Dy.offset == 0); } - void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, + static void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, oclMat &dst, int border_type) { char borderType[30]; @@ -1258,7 +1271,7 @@ namespace cv corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// - void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) + static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) { CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); CV_Assert( !(dst.step & 0x3) ); @@ -1321,7 +1334,7 @@ namespace cv } - void meanShiftProc_gpu(const oclMat &src, oclMat dstr, oclMat dstsp, int sp, int sr, int maxIter, float eps) + static void meanShiftProc_gpu(const oclMat &src, oclMat dstr, oclMat dstsp, int sp, int sr, int maxIter, float eps) { //sanity checks CV_Assert( (src.cols == dstr.cols) && (src.rows == dstr.rows) && @@ -1398,7 +1411,7 @@ namespace cv const int HISTOGRAM256_BIN_COUNT = 256; } ///////////////////////////////calcHist///////////////////////////////////////////////////////////////// - void calc_sub_hist(const oclMat &mat_src, const oclMat &mat_sub_hist) + static void calc_sub_hist(const oclMat &mat_src, const oclMat &mat_sub_hist) { using namespace histograms; @@ -1477,7 +1490,7 @@ namespace cv openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth); } } - void merge_sub_hist(const oclMat &sub_hist, oclMat &mat_hist) + static void merge_sub_hist(const oclMat &sub_hist, oclMat &mat_hist) { using namespace histograms; @@ -1535,7 +1548,6 @@ namespace cv { int cn = src.channels(); int i, j, maxk, radius; - Size size = src.size(); CV_Assert( (src.channels() == 1 || src.channels() == 3) && src.type() == dst.type() && src.size() == dst.size() && @@ -1632,7 +1644,7 @@ inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } -void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) +static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) { CV_Assert(src.depth() == CV_32FC1); CV_Assert(temp1.depth() == CV_32F); diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index a8814c72d..63916d9a5 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -237,23 +237,13 @@ namespace cv int getDevice(std::vector &oclinfo, int devicetype) { - cl_device_type _devicetype; switch(devicetype) { case CVCL_DEVICE_TYPE_DEFAULT: - _devicetype = CL_DEVICE_TYPE_DEFAULT; - break; case CVCL_DEVICE_TYPE_CPU: - _devicetype = CL_DEVICE_TYPE_CPU; - break; case CVCL_DEVICE_TYPE_GPU: - _devicetype = CL_DEVICE_TYPE_GPU; - break; case CVCL_DEVICE_TYPE_ACCELERATOR: - _devicetype = CL_DEVICE_TYPE_ACCELERATOR; - break; case CVCL_DEVICE_TYPE_ALL: - _devicetype = CL_DEVICE_TYPE_ALL; break; default: CV_Error(CV_GpuApiCallError, "Unkown device type"); @@ -336,7 +326,7 @@ namespace cv size_t extends_size; openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); - CV_Assert(extends_size < EXT_LEN); + CV_Assert(extends_size < (size_t)EXT_LEN); extends_set[EXT_LEN - 1] = 0; memset(oclinfo.impl->extra_options, 0, 512); oclinfo.impl->double_support = 0; @@ -592,7 +582,7 @@ namespace cv size_t binarySize = ftell(fp); fseek(fp, 0, SEEK_SET); char *binary = new char[binarySize]; - fread(binary, binarySize, 1, fp); + CV_Assert(1 == fread(binary, binarySize, 1, fp)); fclose(fp); cl_int status = 0; program = clCreateProgramWithBinary(clCxt->impl->clContext, diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp index e4f16e0ca..d6b402093 100644 --- a/modules/ocl/src/interpolate_frames.cpp +++ b/modules/ocl/src/interpolate_frames.cpp @@ -282,7 +282,7 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture) openCLFree(texture); } -#if CL_VERSION_1_2 +#ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = img.step / img.elemSize(); diff --git a/modules/ocl/src/kernels/arithm_mul.cl b/modules/ocl/src/kernels/arithm_mul.cl index e0cfbd80f..f9f3936a4 100644 --- a/modules/ocl/src/kernels/arithm_mul.cl +++ b/modules/ocl/src/kernels/arithm_mul.cl @@ -16,6 +16,7 @@ // // @Authors // Jia Haipeng, jiahaipeng95@gmail.com +// Dachuan Zhao, dachuan@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -260,3 +261,22 @@ __kernel void arithm_mul_D6 (__global double *src1, int src1_step, int src1_offs } } #endif + +__kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset, + __global float *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1, float scalar) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); + int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + + float data1 = *((__global float *)((__global char *)src1 + src1_index)); + float tmp = data1 * scalar; + + *((__global float *)((__global char *)dst + dst_index)) = tmp; + } +} \ No newline at end of file diff --git a/modules/ocl/src/kernels/cvt_color.cl b/modules/ocl/src/kernels/cvt_color.cl index 6c3868056..952193931 100644 --- a/modules/ocl/src/kernels/cvt_color.cl +++ b/modules/ocl/src/kernels/cvt_color.cl @@ -16,6 +16,7 @@ // // @Authors // Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -48,13 +49,33 @@ #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#define DATA_TYPE UNDEFINED + #if defined (DEPTH_0) +#undef DATA_TYPE #define DATA_TYPE uchar +#define MAX_NUM 255 +#define HALF_MAX 128 +#define SAT_CAST(num) convert_uchar_sat(num) #endif + #if defined (DEPTH_2) +#undef DATA_TYPE #define DATA_TYPE ushort +#define MAX_NUM 65535 +#define HALF_MAX 32768 +#define SAT_CAST(num) convert_ushort_sat(num) #endif +#if defined (DEPTH_5) +#undef DATA_TYPE +#define DATA_TYPE float +#define MAX_NUM 1.0f +#define HALF_MAX 0.5f +#define SAT_CAST(num) (num) +#endif + + #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) enum { @@ -65,6 +86,7 @@ enum B2Y = 1868, BLOCK_SIZE = 256 }; +///////////////////////////////////// RGB <-> GRAY ////////////////////////////////////// __kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) @@ -72,10 +94,203 @@ __kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, const int x = get_global_id(0); const int y = get_global_id(1); + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels * sizeof(DATA_TYPE); - int dst_idx = y * dst_step + x * sizeof(DATA_TYPE); + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x; +#if defined (DEPTH_5) + dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f; +#else dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift); +#endif + } +} + +__kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step, + __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + if (y < rows && x < cols) + { + int src_idx = y * src_step + x; + int dst_idx = y * dst_step + x * 4; + DATA_TYPE val = src[src_idx]; + dst[dst_idx++] = val; + dst[dst_idx++] = val; + dst[dst_idx++] = val; + dst[dst_idx] = MAX_NUM; + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// +__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; +__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; + +__kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YUVCoeffs_f; + const DATA_TYPE Y = rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2]; + const DATA_TYPE Cr = (rgb[bidx] - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE Cb = (rgb[bidx^2] - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YUVCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2], yuv_shift); + const int Cr = CV_DESCALE((rgb[bidx] - Y) * coeffs[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[4] + delta, yuv_shift); +#endif + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( Cr ); + dst[2] = SAT_CAST( Cb ); + } +} + +__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; +__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; + +__kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE yuv[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; + +#if defined (DEPTH_5) + __constant float * coeffs = c_YUV2RGBCoeffs_f; + const float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3]; + const float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1]; + const float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0]; +#else + __constant int * coeffs = c_YUV2RGBCoeffs_i; + const int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift); + const int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift); + const int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift); +#endif + dst[bidx^2] = SAT_CAST( b ); + dst[1] = SAT_CAST( g ); + dst[bidx] = SAT_CAST( r ); + } +} + +__constant int ITUR_BT_601_CY = 1220542; +__constant int ITUR_BT_601_CUB = 2116026; +__constant int ITUR_BT_601_CUG = -409993; +__constant int ITUR_BT_601_CVG = -852492; +__constant int ITUR_BT_601_CVR = 1673527; +__constant int ITUR_BT_601_SHIFT = 20; + +__kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step, + int bidx, int width, int height, __global const uchar* src, __global uchar* dst) +{ + const int x = get_global_id(0); // max_x = width / 2 + const int y = get_global_id(1); // max_y = height/ 2 + + if (y < height / 2 && x < width / 2 ) + { + __global const uchar* ysrc = src + (y << 1) * src_step + (x << 1); + __global const uchar* usrc = src + (height + y) * src_step + (x << 1); + __global uchar* dst1 = dst + (y << 1) * dst_step + (x << 3); + __global uchar* dst2 = dst + ((y << 1) + 1) * dst_step + (x << 3); + int Y1 = ysrc[0]; + int Y2 = ysrc[1]; + int Y3 = ysrc[src_step]; + int Y4 = ysrc[src_step + 1]; + + int U = usrc[0] - 128; + int V = usrc[1] - 128; + + int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; + int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * V + ITUR_BT_601_CUG * U; + int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U; + + Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + dst1[3] = 255; + + Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + dst1[6 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); + dst1[5] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); + dst1[4 + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + dst1[7] = 255; + + Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + dst2[3] = 255; + + Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + dst2[6 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); + dst2[5] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); + dst2[4 + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + dst2[7] = 255; + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// +__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; +__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; + +__kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YCrCbCoeffs_f; + const DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx]; + const DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YCrCbCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift); + const int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift); +#endif + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( Cr ); + dst[2] = SAT_CAST( Cb ); } } diff --git a/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl b/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl index 14b68ea7a..22d3004e2 100644 --- a/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl +++ b/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl @@ -44,75 +44,75 @@ //M*/ // Enter your kernel in this window -#pragma OPENCL EXTENSION cl_amd_printf:enable +//#pragma OPENCL EXTENSION cl_amd_printf:enable #define CV_HAAR_FEATURE_MAX 3 typedef int sumtype; typedef float sqsumtype; -typedef struct __attribute__((aligned (128))) GpuHidHaarFeature +typedef struct __attribute__((aligned(128))) GpuHidHaarFeature { - struct __attribute__((aligned (32))) - { - int p0 __attribute__((aligned (4))); - int p1 __attribute__((aligned (4))); - int p2 __attribute__((aligned (4))); - int p3 __attribute__((aligned (4))); - float weight __attribute__((aligned (4))); - } - rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32))); + struct __attribute__((aligned(32))) +{ + int p0 __attribute__((aligned(4))); + int p1 __attribute__((aligned(4))); + int p2 __attribute__((aligned(4))); + int p3 __attribute__((aligned(4))); + float weight __attribute__((aligned(4))); +} +rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32))); } GpuHidHaarFeature; -typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode +typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode { - int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64))); + int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64))); float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/; float threshold /*__attribute__((aligned (4)))*/; - float alpha[2] __attribute__((aligned (8))); - int left __attribute__((aligned (4))); - int right __attribute__((aligned (4))); + float alpha[2] __attribute__((aligned(8))); + int left __attribute__((aligned(4))); + int right __attribute__((aligned(4))); } GpuHidHaarTreeNode; -typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier +typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier { - int count __attribute__((aligned (4))); - GpuHidHaarTreeNode* node __attribute__((aligned (8))); - float* alpha __attribute__((aligned (8))); + int count __attribute__((aligned(4))); + GpuHidHaarTreeNode *node __attribute__((aligned(8))); + float *alpha __attribute__((aligned(8))); } GpuHidHaarClassifier; -typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier +typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier { - int count __attribute__((aligned (4))); - float threshold __attribute__((aligned (4))); - int two_rects __attribute__((aligned (4))); - int reserved0 __attribute__((aligned (8))); - int reserved1 __attribute__((aligned (8))); - int reserved2 __attribute__((aligned (8))); - int reserved3 __attribute__((aligned (8))); + int count __attribute__((aligned(4))); + float threshold __attribute__((aligned(4))); + int two_rects __attribute__((aligned(4))); + int reserved0 __attribute__((aligned(8))); + int reserved1 __attribute__((aligned(8))); + int reserved2 __attribute__((aligned(8))); + int reserved3 __attribute__((aligned(8))); } GpuHidHaarStageClassifier; -typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade +typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade { - int count __attribute__((aligned (4))); - int is_stump_based __attribute__((aligned (4))); - int has_tilted_features __attribute__((aligned (4))); - int is_tree __attribute__((aligned (4))); - int pq0 __attribute__((aligned (4))); - int pq1 __attribute__((aligned (4))); - int pq2 __attribute__((aligned (4))); - int pq3 __attribute__((aligned (4))); - int p0 __attribute__((aligned (4))); - int p1 __attribute__((aligned (4))); - int p2 __attribute__((aligned (4))); - int p3 __attribute__((aligned (4))); - float inv_window_area __attribute__((aligned (4))); -}GpuHidHaarClassifierCascade; + int count __attribute__((aligned(4))); + int is_stump_based __attribute__((aligned(4))); + int has_tilted_features __attribute__((aligned(4))); + int is_tree __attribute__((aligned(4))); + int pq0 __attribute__((aligned(4))); + int pq1 __attribute__((aligned(4))); + int pq2 __attribute__((aligned(4))); + int pq3 __attribute__((aligned(4))); + int p0 __attribute__((aligned(4))); + int p1 __attribute__((aligned(4))); + int p2 __attribute__((aligned(4))); + int p3 __attribute__((aligned(4))); + float inv_window_area __attribute__((aligned(4))); +} GpuHidHaarClassifierCascade; __kernel void gpuRunHaarClassifierCascade_scaled2( - global GpuHidHaarStageClassifier * stagecascadeptr, - global int4 * info, - global GpuHidHaarTreeNode * nodeptr, - global const int * restrict sum, - global const float * restrict sqsum, - global int4 * candidate, + global GpuHidHaarStageClassifier *stagecascadeptr, + global int4 *info, + global GpuHidHaarTreeNode *nodeptr, + global const int *restrict sum, + global const float *restrict sqsum, + global int4 *candidate, const int step, const int loopcount, const int start_stage, @@ -120,215 +120,167 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( const int end_stage, const int startnode, const int splitnode, - global int4 * p, - //const int4 * pq, - global float * correction, - const int nodecount) + global int4 *p, + //const int4 * pq, + global float *correction, + const int nodecount) { - int grpszx = get_local_size(0); - int grpszy = get_local_size(1); - int grpnumx = get_num_groups(0); - int grpidx=get_group_id(0); - int lclidx = get_local_id(0); - int lclidy = get_local_id(1); - int lcl_sz = mul24(grpszx,grpszy); - int lcl_id = mad24(lclidy,grpszx,lclidx); - __local int lclshare[1024]; - __local int* glboutindex=lclshare+0; - __local int* lclcount=glboutindex+1; - __local int* lcloutindex=lclcount+1; - __local float* partialsum=(__local float*)(lcloutindex+(lcl_sz<<1)); - glboutindex[0]=0; - int outputoff = mul24(grpidx,256); - candidate[outputoff+(lcl_id<<2)] = (int4)0; - candidate[outputoff+(lcl_id<<2)+1] = (int4)0; - candidate[outputoff+(lcl_id<<2)+2] = (int4)0; - candidate[outputoff+(lcl_id<<2)+3] = (int4)0; - for(int scalei = 0; scalei > 16; - int height = scaleinfo1.x & 0xffff; - int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16; - int totalgrp = scaleinfo1.y & 0xffff; - float factor = as_float(scaleinfo1.w); - float correction_t=correction[scalei]; - int ystep=(int)(max(2.0f,factor)+0.5f); - for(int grploop=get_group_id(0);grploop=0.f ? sqrt(variance_norm_factor) : 1.f; - result = 1; - nodecounter = startnode+nodecount*scalei; - for(int stageloop = start_stage; stageloop < split_stage&&result; stageloop++ ) - { - float stage_sum = 0.f; - int4 stageinfo = *(global int4*)(stagecascadeptr+stageloop); - float stagethreshold = as_float(stageinfo.y); - for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ ) - { - __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter); - int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0])); - int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); - int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); - float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); - float nodethreshold = w.w * variance_norm_factor; - info1.x +=p_offset; - info1.z +=p_offset; - info2.x +=p_offset; - info2.z +=p_offset; - float classsum = (sum[mad24(info1.y,step,info1.x)] - sum[mad24(info1.y,step,info1.z)] - - sum[mad24(info1.w,step,info1.x)] + sum[mad24(info1.w,step,info1.z)]) * w.x; - classsum += (sum[mad24(info2.y,step,info2.x)] - sum[mad24(info2.y,step,info2.z)] - - sum[mad24(info2.w,step,info2.x)] + sum[mad24(info2.w,step,info2.z)]) * w.y; - info3.x +=p_offset; - info3.z +=p_offset; - classsum += (sum[mad24(info3.y,step,info3.x)] - sum[mad24(info3.y,step,info3.z)] - - sum[mad24(info3.w,step,info3.x)] + sum[mad24(info3.w,step,info3.z)]) * w.z; - stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - nodecounter++; - } - result=(stage_sum>=stagethreshold); - } - if(result&&(ix0;stageloop++) - { - lclcount[0]=0; - barrier(CLK_LOCAL_MEM_FENCE); - int2 stageinfo=*(global int2*)(stagecascadeptr+stageloop); - float stagethreshold=as_float(stageinfo.y); - int perfscale=queuecount>4?3:2; - int queuecount_loop=(queuecount+(1<>perfscale; - int lcl_compute_win=lcl_sz>>perfscale; - int lcl_compute_win_id=(lcl_id>>(6-perfscale)); - int lcl_loops=(stageinfo.x+lcl_compute_win-1)>>(6-perfscale); - int lcl_compute_id=lcl_id-(lcl_compute_win_id<<(6-perfscale)); - for(int queueloop=0;queueloop>16),step,temp_coord&0xffff); - int tempnodecounter=lcl_compute_id; - float part_sum=0.f; - for(int lcl_loop=0;lcl_loopp[0][0])); - int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); - int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); - float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); - float nodethreshold = w.w * variance_norm_factor; - info1.x +=queue_offset; - info1.z +=queue_offset; - info2.x +=queue_offset; - info2.z +=queue_offset; - float classsum = (sum[mad24(info1.y,step,info1.x)] - sum[mad24(info1.y,step,info1.z)] - - sum[mad24(info1.w,step,info1.x)] + sum[mad24(info1.w,step,info1.z)]) * w.x; - classsum += (sum[mad24(info2.y,step,info2.x)] - sum[mad24(info2.y,step,info2.z)] - - sum[mad24(info2.w,step,info2.x)] + sum[mad24(info2.w,step,info2.z)]) * w.y; + int grpszx = get_local_size(0); + int grpszy = get_local_size(1); + int grpnumx = get_num_groups(0); + int grpidx = get_group_id(0); + int lclidx = get_local_id(0); + int lclidy = get_local_id(1); + int lcl_sz = mul24(grpszx, grpszy); + int lcl_id = mad24(lclidy, grpszx, lclidx); + __local int lclshare[1024]; + __local int *glboutindex = lclshare + 0; + __local int *lclcount = glboutindex + 1; + __local int *lcloutindex = lclcount + 1; + __local float *partialsum = (__local float *)(lcloutindex + (lcl_sz << 1)); + glboutindex[0] = 0; + int outputoff = mul24(grpidx, 256); + candidate[outputoff + (lcl_id << 2)] = (int4)0; + candidate[outputoff + (lcl_id << 2) + 1] = (int4)0; + candidate[outputoff + (lcl_id << 2) + 2] = (int4)0; + candidate[outputoff + (lcl_id << 2) + 3] = (int4)0; - info3.x +=queue_offset; - info3.z +=queue_offset; - classsum += (sum[mad24(info3.y,step,info3.x)] - sum[mad24(info3.y,step,info3.z)] - - sum[mad24(info3.w,step,info3.x)] + sum[mad24(info3.w,step,info3.z)]) * w.z; - part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - tempnodecounter+=lcl_compute_win; - } - partialsum[lcl_id]=part_sum; - barrier(CLK_LOCAL_MEM_FENCE); - for(int i=0;i=stagethreshold&&(lcl_compute_id==0)) - { - int queueindex=atomic_inc(lclcount); - lcloutindex[queueindex<<1]=temp_coord; - lcloutindex[(queueindex<<1)+1]=as_int(variance_norm_factor); - } - lcl_compute_win_id+=(1<>16; - temp=glboutindex[0]; - int4 candidate_result; - candidate_result.zw=(int2)convert_int_rtn(factor*20.f); - candidate_result.x=x; - candidate_result.y=y; - atomic_inc(glboutindex); - candidate[outputoff+temp+lcl_id]=candidate_result; - } - barrier(CLK_LOCAL_MEM_FENCE); - } + for (int scalei = 0; scalei < loopcount; scalei++) + { + int4 scaleinfo1; + scaleinfo1 = info[scalei]; + int width = (scaleinfo1.x & 0xffff0000) >> 16; + int height = scaleinfo1.x & 0xffff; + int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16; + int totalgrp = scaleinfo1.y & 0xffff; + float factor = as_float(scaleinfo1.w); + float correction_t = correction[scalei]; + int ystep = (int)(max(2.0f, factor) + 0.5f); + + for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx) + { + int4 cascadeinfo = p[scalei]; + int grpidy = grploop / grpnumperline; + int grpidx = grploop - mul24(grpidy, grpnumperline); + int ix = mad24(grpidx, grpszx, lclidx); + int iy = mad24(grpidy, grpszy, lclidy); + int x = ix * ystep; + int y = iy * ystep; + lcloutindex[lcl_id] = 0; + lclcount[0] = 0; + int result = 1, nodecounter; + float mean, variance_norm_factor; + //if((ix < width) && (iy < height)) + { + const int p_offset = mad24(y, step, x); + cascadeinfo.x += p_offset; + cascadeinfo.z += p_offset; + mean = (sum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sum[mad24(cascadeinfo.y, step, cascadeinfo.z)] - + sum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sum[mad24(cascadeinfo.w, step, cascadeinfo.z)]) + * correction_t; + variance_norm_factor = sqsum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sqsum[mad24(cascadeinfo.y, step, cascadeinfo.z)] - + sqsum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sqsum[mad24(cascadeinfo.w, step, cascadeinfo.z)]; + variance_norm_factor = variance_norm_factor * correction_t - mean * mean; + variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f; + result = 1; + nodecounter = startnode + nodecount * scalei; + + for (int stageloop = start_stage; stageloop < end_stage && result; stageloop++) + { + float stage_sum = 0.f; + int4 stageinfo = *(global int4 *)(stagecascadeptr + stageloop); + float stagethreshold = as_float(stageinfo.y); + + for (int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++) + { + __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter); + int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0])); + int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0])); + int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0])); + float4 w = *(__global float4 *)(&(currentnodeptr->weight[0])); + float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0])); + float nodethreshold = w.w * variance_norm_factor; + info1.x += p_offset; + info1.z += p_offset; + info2.x += p_offset; + info2.z += p_offset; + float classsum = (sum[mad24(info1.y, step, info1.x)] - sum[mad24(info1.y, step, info1.z)] - + sum[mad24(info1.w, step, info1.x)] + sum[mad24(info1.w, step, info1.z)]) * w.x; + classsum += (sum[mad24(info2.y, step, info2.x)] - sum[mad24(info2.y, step, info2.z)] - + sum[mad24(info2.w, step, info2.x)] + sum[mad24(info2.w, step, info2.z)]) * w.y; + info3.x += p_offset; + info3.z += p_offset; + classsum += (sum[mad24(info3.y, step, info3.x)] - sum[mad24(info3.y, step, info3.z)] - + sum[mad24(info3.w, step, info3.x)] + sum[mad24(info3.w, step, info3.z)]) * w.z; + stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; + nodecounter++; + } + + result = (stage_sum >= stagethreshold); + } + + if (result && (ix < width) && (iy < height)) + { + int queueindex = atomic_inc(lclcount); + lcloutindex[queueindex << 1] = (y << 16) | x; + lcloutindex[(queueindex << 1) + 1] = as_int(variance_norm_factor); + } + + barrier(CLK_LOCAL_MEM_FENCE); + int queuecount = lclcount[0]; + nodecounter = splitnode + nodecount * scalei; + + if (lcl_id < queuecount) + { + int temp = lcloutindex[lcl_id << 1]; + int x = temp & 0xffff; + int y = (temp & (int)0xffff0000) >> 16; + temp = glboutindex[0]; + int4 candidate_result; + candidate_result.zw = (int2)convert_int_rtn(factor * 20.f); + candidate_result.x = x; + candidate_result.y = y; + atomic_inc(glboutindex); + candidate[outputoff + temp + lcl_id] = candidate_result; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + } } - } } -__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode * orinode, global GpuHidHaarTreeNode * newnode,float scale,float weight_scale,int nodenum) +__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum) { - int counter=get_global_id(0); - int tr_x[3],tr_y[3],tr_h[3],tr_w[3],i=0; - GpuHidHaarTreeNode t1 = *(orinode + counter); - #pragma unroll - for(i=0;i<3;i++){ - tr_x[i]=(int)(t1.p[i][0]*scale+0.5f); - tr_y[i]=(int)(t1.p[i][1]*scale+0.5f); - tr_w[i]=(int)(t1.p[i][2]*scale+0.5f); - tr_h[i]=(int)(t1.p[i][3]*scale+0.5f); - } - t1.weight[0]=t1.p[2][0]?-(t1.weight[1]*tr_h[1]*tr_w[1]+t1.weight[2]*tr_h[2]*tr_w[2])/(tr_h[0]*tr_w[0]):-t1.weight[1]*tr_h[1]*tr_w[1]/(tr_h[0]*tr_w[0]); - counter+=nodenum; - #pragma unroll - for(i=0;i<3;i++) - { - newnode[counter].p[i][0]=tr_x[i]; - newnode[counter].p[i][1]=tr_y[i]; - newnode[counter].p[i][2]=tr_x[i]+tr_w[i]; - newnode[counter].p[i][3]=tr_y[i]+tr_h[i]; - newnode[counter].weight[i]=t1.weight[i]*weight_scale; - } - newnode[counter].left=t1.left; - newnode[counter].right=t1.right; - newnode[counter].threshold=t1.threshold; - newnode[counter].alpha[0]=t1.alpha[0]; - newnode[counter].alpha[1]=t1.alpha[1]; + int counter = get_global_id(0); + int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0; + GpuHidHaarTreeNode t1 = *(orinode + counter); +#pragma unroll + + for (i = 0; i < 3; i++) + { + tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f); + tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f); + tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f); + tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f); + } + + t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]); + counter += nodenum; +#pragma unroll + + for (i = 0; i < 3; i++) + { + newnode[counter].p[i][0] = tr_x[i]; + newnode[counter].p[i][1] = tr_y[i]; + newnode[counter].p[i][2] = tr_x[i] + tr_w[i]; + newnode[counter].p[i][3] = tr_y[i] + tr_h[i]; + newnode[counter].weight[i] = t1.weight[i] * weight_scale; + } + + newnode[counter].left = t1.left; + newnode[counter].right = t1.right; + newnode[counter].threshold = t1.threshold; + newnode[counter].alpha[0] = t1.alpha[0]; + newnode[counter].alpha[1] = t1.alpha[1]; } diff --git a/modules/ocl/src/kernels/imgproc_canny.cl b/modules/ocl/src/kernels/imgproc_canny.cl index 1022c3653..5ec446523 100644 --- a/modules/ocl/src/kernels/imgproc_canny.cl +++ b/modules/ocl/src/kernels/imgproc_canny.cl @@ -691,7 +691,7 @@ __kernel } barrier(CLK_LOCAL_MEM_FENCE); - int ind = grp_idy * get_num_groups(0) + grp_idx; + int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx); if(ind < count) { @@ -714,10 +714,10 @@ __kernel } barrier(CLK_LOCAL_MEM_FENCE); - while (s_counter > 0 && s_counter <= stack_size - get_num_groups(0)) + while (s_counter > 0 && s_counter <= stack_size - get_local_size(0)) { const int subTaskIdx = lidx >> 3; - const int portion = min(s_counter, get_num_groups(0) >> 3); + const int portion = min(s_counter, get_local_size(0)>> 3); pos.x = pos.y = 0; @@ -757,7 +757,7 @@ __kernel ind = s_ind; - for (int i = lidx; i < s_counter; i += get_num_groups(0)) + for (int i = lidx; i < s_counter; i += get_local_size(0)) { st2[ind + i] = s_st[i]; } diff --git a/modules/ocl/src/kernels/pyrlk.cl b/modules/ocl/src/kernels/pyrlk.cl index ecdacf3b4..c772be78a 100644 --- a/modules/ocl/src/kernels/pyrlk.cl +++ b/modules/ocl/src/kernels/pyrlk.cl @@ -16,6 +16,7 @@ // // @Authors // Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, bitwangyaoyao@gmail.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,26 +46,6 @@ //#pragma OPENCL EXTENSION cl_amd_printf : enable -__kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, float scalar) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); - - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float tmp = data1 * scalar; - - *((__global float *)((__global char *)dst + dst_index)) = tmp; - } -} - - __kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) { const int x = get_global_id(0); @@ -202,6 +183,7 @@ float linearFilter_float(__global const float* src, int srcStep, int cn, float2 return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5; } +#define BUFFER 64 void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) { smem1[tid] = val1; @@ -209,6 +191,7 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local f smem3[tid] = val3; barrier(CLK_LOCAL_MEM_FENCE); +#if BUFFER > 128 if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; @@ -216,7 +199,9 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local f smem3[tid] = val3 += smem3[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif +#if BUFFER > 64 if (tid < 64) { smem1[tid] = val1 += smem1[tid + 64]; @@ -224,6 +209,7 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local f smem3[tid] = val3 += smem3[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif if (tid < 32) { @@ -263,19 +249,23 @@ void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, smem2[tid] = val2; barrier(CLK_LOCAL_MEM_FENCE); +#if BUFFER > 128 if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; smem2[tid] = val2 += smem2[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif +#if BUFFER > 64 if (tid < 64) { smem1[tid] = val1 += smem1[tid + 64]; smem2[tid] = val2 += smem2[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif if (tid < 32) { @@ -307,17 +297,21 @@ void reduce1(float val1, __local float* smem1, int tid) smem1[tid] = val1; barrier(CLK_LOCAL_MEM_FENCE); +#if BUFFER > 128 if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif +#if BUFFER > 64 if (tid < 64) { smem1[tid] = val1 += smem1[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif if (tid < 32) { @@ -333,60 +327,17 @@ void reduce1(float val1, __local float* smem1, int tid) } #define SCALE (1.0f / (1 << 20)) +#define THRESHOLD 0.01f +#define DIMENSION 21 // Image read mode __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; -__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J, - __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS) +void SetPatch(image2d_t I, float x, float y, + float* Pch, float* Dx, float* Dy, + float* A11, float* A12, float* A22) { - __local float smem1[256]; - __local float smem2[256]; - __local float smem3[256]; - - int c_halfWin_x = (c_winSize_x - 1) / 2; - int c_halfWin_y = (c_winSize_y - 1) / 2; - - const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); - - float2 prevPt = prevPts[get_group_id(0)]; - prevPt.x *= (1.0f / (1 << level)); - prevPt.y *= (1.0f / (1 << level)); - - if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) - { - if (level == 0 && tid == 0) - { - status[get_group_id(0)] = 0; - - //if (calcErr) - // err[get_group_id(0)] = 0; - } - - return; - } - - prevPt.x -= c_halfWin_x; - prevPt.y -= c_halfWin_y; - - // extract the patch from the first image, compute covariation matrix of derivatives - - float A11 = 0; - float A12 = 0; - float A22 = 0; - - float I_patch[21][21]; - float dIdx_patch[21][21]; - float dIdy_patch[21][21]; - - for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) - { - for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) - { - float x = (prevPt.x + xBase + 0.5f); - float y = (prevPt.y + yBase + 0.5f); - - I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x; + *Pch = read_imagef(I, sampler, (float2)(x, y)).x; float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x); @@ -394,158 +345,104 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J, float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x); - dIdx_patch[i][j] = dIdx; - dIdy_patch[i][j] = dIdy; - A11 += dIdx * dIdx; - A12 += dIdx * dIdy; - A22 += dIdy * dIdy; - } - } + *Dx = dIdx; + *Dy = dIdy; - reduce3(A11, A12, A22, smem1, smem2, smem3, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - A11 = smem1[0]; - A12 = smem2[0]; - A22 = smem3[0]; - - float D = A11 * A22 - A12 * A12; - - //if (calcErr && GET_MIN_EIGENVALS && tid == 0) - // err[get_group_id(0)] = minEig; - - if (D < 1.192092896e-07f) - { - if (level == 0 && tid == 0) - status[get_group_id(0)] = 0; - - return; - } - - D = 1.f / D; - - A11 *= D; - A12 *= D; - A22 *= D; - - float2 nextPt = nextPts[get_group_id(0)]; - nextPt.x *= 2.0f; - nextPt.y *= 2.0f; - - nextPt.x -= c_halfWin_x; - nextPt.y -= c_halfWin_y; - - for (int k = 0; k < c_iters; ++k) - { - if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows) - { - if (tid == 0 && level == 0) - status[get_group_id(0)] = 0; - return; - } - - float b1 = 0; - float b2 = 0; - - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float a = (nextPt.x + x + 0.5f); - float b = (nextPt.y + y + 0.5f); - - float I_val = I_patch[i][j]; - float J_val = read_imagef(J, sampler, (float2)(a, b)).x; - - float diff = (J_val - I_val) * 32.0f; - - b1 += diff * dIdx_patch[i][j]; - b2 += diff * dIdy_patch[i][j]; - } - } - - reduce2(b1, b2, smem1, smem2, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - b1 = smem1[0]; - b2 = smem2[0]; - - float2 delta; - delta.x = A12 * b2 - A22 * b1; - delta.y = A12 * b1 - A11 * b2; - - nextPt.x += delta.x; - nextPt.y += delta.y; - - if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) - break; - } - - float errval = 0.0f; - if (calcErr) - { - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float a = (nextPt.x + x + 0.5f); - float b = (nextPt.y + y + 0.5f); - - float I_val = I_patch[i][j]; - float J_val = read_imagef(J, sampler, (float2)(a, b)).x; - - float diff = J_val - I_val; - - errval += fabs((float)diff); - } - } - - reduce1(errval, smem1, tid); - } - - if (tid == 0) - { - nextPt.x += c_halfWin_x; - nextPt.y += c_halfWin_y; - - nextPts[get_group_id(0)] = nextPt; - - //if (calcErr && !GET_MIN_EIGENVALS) - // err[get_group_id(0)] = errval; - } + *A11 += dIdx * dIdx; + *A12 += dIdx * dIdy; + *A22 += dIdy * dIdy; } -__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J, - __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS) + +void GetPatch(image2d_t J, float x, float y, + float* Pch, float* Dx, float* Dy, + float* b1, float* b2) { - __local float smem1[256]; - __local float smem2[256]; - __local float smem3[256]; + float J_val = read_imagef(J, sampler, (float2)(x, y)).x; + float diff = (J_val - *Pch) * 32.0f; + *b1 += diff**Dx; + *b2 += diff**Dy; +} - int c_halfWin_x = (c_winSize_x - 1) / 2; - int c_halfWin_y = (c_winSize_y - 1) / 2; +void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval) +{ + float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch; + *errval += fabs(diff); +} - const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); +void SetPatch4(image2d_t I, const float x, const float y, + float4* Pch, float4* Dx, float4* Dy, + float* A11, float* A12, float* A22) +{ + *Pch = read_imagef(I, sampler, (float2)(x, y)); - float2 prevPt = prevPts[get_group_id(0)]; - prevPt.x *= (1.0f / (1 << level)); - prevPt.y *= (1.0f / (1 << level)); + float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) - + (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1))); + + float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) - + (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1))); + + + *Dx = dIdx; + *Dy = dIdy; + float4 sqIdx = dIdx * dIdx; + *A11 += sqIdx.x + sqIdx.y + sqIdx.z; + sqIdx = dIdx * dIdy; + *A12 += sqIdx.x + sqIdx.y + sqIdx.z; + sqIdx = dIdy * dIdy; + *A22 += sqIdx.x + sqIdx.y + sqIdx.z; +} + +void GetPatch4(image2d_t J, const float x, const float y, + const float4* Pch, const float4* Dx, const float4* Dy, + float* b1, float* b2) +{ + float4 J_val = read_imagef(J, sampler, (float2)(x, y)); + float4 diff = (J_val - *Pch) * 32.0f; + float4 xdiff = diff* *Dx; + *b1 += xdiff.x + xdiff.y + xdiff.z; + xdiff = diff* *Dy; + *b2 += xdiff.x + xdiff.y + xdiff.z; +} + +void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval) +{ + float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch; + *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); +} + + +__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J, + __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err, + const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) +{ + __local float smem1[BUFFER]; + __local float smem2[BUFFER]; + __local float smem3[BUFFER]; + + unsigned int xid=get_local_id(0); + unsigned int yid=get_local_id(1); + unsigned int gid=get_group_id(0); + unsigned int xsize=get_local_size(0); + unsigned int ysize=get_local_size(1); + int xBase, yBase, i, j, k; + + float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1); + + const int tid = mad24(yid, xsize, xid); + + float2 prevPt = prevPts[gid] / (1 << level); if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) { - if (level == 0 && tid == 0) + if (tid == 0 && level == 0) { - status[get_group_id(0)] = 0; - - //if (calcErr) - // err[get_group_id(0)] = 0; + status[gid] = 0; } return; } - - prevPt.x -= c_halfWin_x; - prevPt.y -= c_halfWin_y; + prevPt -= c_halfWin; // extract the patch from the first image, compute covariation matrix of derivatives @@ -553,34 +450,68 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J, float A12 = 0; float A22 = 0; - float4 I_patch[21][21]; - float4 dIdx_patch[21][21]; - float4 dIdy_patch[21][21]; + float I_patch[3][3]; + float dIdx_patch[3][3]; + float dIdy_patch[3][3]; - for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) - { - for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) + yBase=yid; { - float x = (prevPt.x + xBase + 0.5f); - float y = (prevPt.y + yBase + 0.5f); + xBase=xid; + SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f, + &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0], + &A11, &A12, &A22); - I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x; - float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x); + xBase+=xsize; + SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f, + &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1], + &A11, &A12, &A22); - float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x); - - dIdx_patch[i][j] = dIdx; - dIdy_patch[i][j] = dIdy; - - A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z; - A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z; - A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z; + xBase+=xsize; + if(xBase= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows) + if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows) { if (tid == 0 && level == 0) - status[get_group_id(0)] = 0; + status[gid] = 0; return; } float b1 = 0; float b2 = 0; - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float a = (nextPt.x + x + 0.5f); - float b = (nextPt.y + y + 0.5f); + yBase=yid; + { + xBase=xid; + GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f, + &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0], + &b1, &b2); - float4 I_val = I_patch[i][j]; - float4 J_val = read_imagef(J, sampler, (float2)(a, b)).x; - float4 diff = (J_val - I_val) * 32.0f; + xBase+=xsize; + GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f, + &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1], + &b1, &b2); - b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z; - b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z; - } - } + xBase+=xsize; + if(xBase>1, (c_winSize_y - 1)>>1); + + const int tid = mad24(yid, xsize, xid); + + float2 nextPt = prevPts[gid]/(1<= cols || nextPt.y < 0 || nextPt.y >= rows) + { + if (tid == 0 && level == 0) + { + status[gid] = 0; + } + + return; + } + + nextPt -= c_halfWin; + + // extract the patch from the first image, compute covariation matrix of derivatives + + float A11 = 0; + float A12 = 0; + float A22 = 0; + + float4 I_patch[8]; + float4 dIdx_patch[8]; + float4 dIdy_patch[8]; + float4 I_add,Dx_add,Dy_add; + + yBase=yid; + { + xBase=xid; + SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f, + &I_patch[0], &dIdx_patch[0], &dIdy_patch[0], + &A11, &A12, &A22); + + + xBase+=xsize; + SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f, + &I_patch[1], &dIdx_patch[1], &dIdy_patch[1], + &A11, &A12, &A22); + + xBase+=xsize; + if(xBase= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) + { + if (tid == 0 && level == 0) + status[gid] = 0; + return; + } + + float b1 = 0; + float b2 = 0; + + yBase=yid; + { + xBase=xid; + GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f, + &I_patch[0], &dIdx_patch[0], &dIdy_patch[0], + &b1, &b2); + + + xBase+=xsize; + GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f, + &I_patch[1], &dIdx_patch[1], &dIdy_patch[1], + &b1, &b2); + + xBase+=xsize; + if(xBase 128 + + if (tid < 128) + { + smem1[tid] = val1 += smem1[tid + 128]; + smem2[tid] = val2 += smem2[tid + 128]; + smem3[tid] = val3 += smem3[tid + 128]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + +#if BUFFER > 64 + + if (tid < 64) + { + smem1[tid] = val1 += smem1[tid + 64]; + smem2[tid] = val2 += smem2[tid + 64]; + smem3[tid] = val3 += smem3[tid + 64]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + if (tid < 32) + { + smem1[tid] = val1 += smem1[tid + 32]; + smem2[tid] = val2 += smem2[tid + 32]; + smem3[tid] = val3 += smem3[tid + 32]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + smem1[tid] = val1 += smem1[tid + 16]; + smem2[tid] = val2 += smem2[tid + 16]; + smem3[tid] = val3 += smem3[tid + 16]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { + volatile __local float *vmem1 = smem1; + volatile __local float *vmem2 = smem2; + volatile __local float *vmem3 = smem3; + + vmem1[tid] = val1 += vmem1[tid + 8]; + vmem2[tid] = val2 += vmem2[tid + 8]; + vmem3[tid] = val3 += vmem3[tid + 8]; + + vmem1[tid] = val1 += vmem1[tid + 4]; + vmem2[tid] = val2 += vmem2[tid + 4]; + vmem3[tid] = val3 += vmem3[tid + 4]; + + vmem1[tid] = val1 += vmem1[tid + 2]; + vmem2[tid] = val2 += vmem2[tid + 2]; + vmem3[tid] = val3 += vmem3[tid + 2]; + + vmem1[tid] = val1 += vmem1[tid + 1]; + vmem2[tid] = val2 += vmem2[tid + 1]; + vmem3[tid] = val3 += vmem3[tid + 1]; + } +} + +void reduce2(float val1, float val2, __local float *smem1, __local float *smem2, int tid) +{ + smem1[tid] = val1; + smem2[tid] = val2; + barrier(CLK_LOCAL_MEM_FENCE); + +#if BUFFER > 128 + + if (tid < 128) + { + smem1[tid] = val1 += smem1[tid + 128]; + smem2[tid] = val2 += smem2[tid + 128]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + +#if BUFFER > 64 + + if (tid < 64) + { + smem1[tid] = val1 += smem1[tid + 64]; + smem2[tid] = val2 += smem2[tid + 64]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + if (tid < 32) + { + smem1[tid] = val1 += smem1[tid + 32]; + smem2[tid] = val2 += smem2[tid + 32]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + smem1[tid] = val1 += smem1[tid + 16]; + smem2[tid] = val2 += smem2[tid + 16]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { + volatile __local float *vmem1 = smem1; + volatile __local float *vmem2 = smem2; + + vmem1[tid] = val1 += vmem1[tid + 8]; + vmem2[tid] = val2 += vmem2[tid + 8]; + + vmem1[tid] = val1 += vmem1[tid + 4]; + vmem2[tid] = val2 += vmem2[tid + 4]; + + vmem1[tid] = val1 += vmem1[tid + 2]; + vmem2[tid] = val2 += vmem2[tid + 2]; + + vmem1[tid] = val1 += vmem1[tid + 1]; + vmem2[tid] = val2 += vmem2[tid + 1]; + } +} + +void reduce1(float val1, __local float *smem1, int tid) +{ + smem1[tid] = val1; + barrier(CLK_LOCAL_MEM_FENCE); + +#if BUFFER > 128 + + if (tid < 128) + { + smem1[tid] = val1 += smem1[tid + 128]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + +#if BUFFER > 64 + + if (tid < 64) + { + smem1[tid] = val1 += smem1[tid + 64]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + if (tid < 32) + { + smem1[tid] = val1 += smem1[tid + 32]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + volatile __local float *vmem1 = smem1; + + vmem1[tid] = val1 += vmem1[tid + 16]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { + volatile __local float *vmem1 = smem1; + + vmem1[tid] = val1 += vmem1[tid + 8]; + vmem1[tid] = val1 += vmem1[tid + 4]; + vmem1[tid] = val1 += vmem1[tid + 2]; + vmem1[tid] = val1 += vmem1[tid + 1]; + } +} + +#define SCALE (1.0f / (1 << 20)) +#define THRESHOLD 0.01f +#define DIMENSION 21 + +float readImage2Df_C1(__global const float *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) +{ + float2 coor = (float2)(x, y); + + int i0 = clamp((int)floor(coor.x), 0, cols - 1); + int j0 = clamp((int)floor(coor.y), 0, rows - 1); + int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); + int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); + float a = coor.x - floor(coor.x); + float b = coor.y - floor(coor.y); + + return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] + + a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] + + (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] + + a * b * image[mad24(j1, elemCntPerRow, i1)]; +} + +__kernel void lkSparse_C1_D5(__global const float *I, __global const float *J, + __global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, + const int level, const int rows, const int cols, const int elemCntPerRow, + int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) +{ + __local float smem1[BUFFER]; + __local float smem2[BUFFER]; + __local float smem3[BUFFER]; + + float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); + + const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); + + float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); + + if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + prevPt -= c_halfWin; + + // extract the patch from the first image, compute covariation matrix of derivatives + + float A11 = 0; + float A12 = 0; + float A22 = 0; + + float I_patch[1][3]; + float dIdx_patch[1][3]; + float dIdy_patch[1][3]; + + for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) + { + for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) + { + float x = (prevPt.x + xBase); + float y = (prevPt.y + yBase); + + I_patch[i][j] = readImage2Df_C1(I, x, y, rows, cols, elemCntPerRow); + float dIdx = 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - + (3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow)); + + float dIdy = 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - + (3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow)); + + dIdx_patch[i][j] = dIdx; + dIdy_patch[i][j] = dIdy; + + A11 += dIdx * dIdx; + A12 += dIdx * dIdy; + A22 += dIdy * dIdy; + } + } + + reduce3(A11, A12, A22, smem1, smem2, smem3, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + A11 = smem1[0]; + A12 = smem2[0]; + A22 = smem3[0]; + + float D = A11 * A22 - A12 * A12; + + if (D < 1.192092896e-07f) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + D = 1.f / D; + + A11 *= D; + A12 *= D; + A22 *= D; + + float2 nextPt = nextPts[get_group_id(0)]; + nextPt = nextPt * 2.0f - c_halfWin; + + for (int k = 0; k < c_iters; ++k) + { + if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + float b1 = 0; + float b2 = 0; + + for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) + { + for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) + { + float diff = (readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; + + b1 += diff * dIdx_patch[i][j]; + b2 += diff * dIdy_patch[i][j]; + } + } + + reduce2(b1, b2, smem1, smem2, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + b1 = smem1[0]; + b2 = smem2[0]; + + float2 delta; + delta.x = A12 * b2 - A22 * b1; + delta.y = A12 * b1 - A11 * b2; + + nextPt += delta; + + //if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) + // break; + } + + float errval = 0.0f; + + if (calcErr) + { + for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) + { + for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) + { + float diff = readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; + + errval += fabs(diff); + } + } + + reduce1(errval, smem1, tid); + } + + if (tid == 0) + { + nextPt += c_halfWin; + + nextPts[get_group_id(0)] = nextPt; + + if (calcErr) + { + err[get_group_id(0)] = smem1[0] / (c_winSize_x * c_winSize_y); + } + } +} + +float4 readImage2Df_C4(__global const float4 *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) +{ + float2 coor = (float2)(x, y); + + int i0 = clamp((int)floor(coor.x), 0, cols - 1); + int j0 = clamp((int)floor(coor.y), 0, rows - 1); + int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); + int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); + float a = coor.x - floor(coor.x); + float b = coor.y - floor(coor.y); + + return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] + + a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] + + (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] + + a * b * image[mad24(j1, elemCntPerRow, i1)]; +} + +__kernel void lkSparse_C4_D5(__global const float *I, __global const float *J, + __global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, + const int level, const int rows, const int cols, const int elemCntPerRow, + int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) +{ + __local float smem1[BUFFER]; + __local float smem2[BUFFER]; + __local float smem3[BUFFER]; + + float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); + + const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); + + float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); + + if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + prevPt -= c_halfWin; + + // extract the patch from the first image, compute covariation matrix of derivatives + + float A11 = 0; + float A12 = 0; + float A22 = 0; + + float4 I_patch[1][3]; + float4 dIdx_patch[1][3]; + float4 dIdy_patch[1][3]; + + __global float4 *ptrI = (__global float4 *)I; + + for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) + { + for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) + { + float x = (prevPt.x + xBase); + float y = (prevPt.y + yBase); + + I_patch[i][j] = readImage2Df_C4(ptrI, x, y, rows, cols, elemCntPerRow); + + float4 dIdx = 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - + (3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow)); + + float4 dIdy = 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - + (3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow)); + + dIdx_patch[i][j] = dIdx; + dIdy_patch[i][j] = dIdy; + + A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z; + A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z; + A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z; + } + } + + reduce3(A11, A12, A22, smem1, smem2, smem3, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + A11 = smem1[0]; + A12 = smem2[0]; + A22 = smem3[0]; + + float D = A11 * A22 - A12 * A12; + //pD[get_group_id(0)] = D; + + if (D < 1.192092896e-07f) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + D = 1.f / D; + + A11 *= D; + A12 *= D; + A22 *= D; + + float2 nextPt = nextPts[get_group_id(0)]; + + nextPt = nextPt * 2.0f - c_halfWin; + + __global float4 *ptrJ = (__global float4 *)J; + + for (int k = 0; k < c_iters; ++k) + { + if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) + { + if (tid == 0 && level == 0) + { + status[get_group_id(0)] = 0; + } + + return; + } + + float b1 = 0; + float b2 = 0; + + for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) + { + for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) + { + float4 diff = (readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; + + b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z; + b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z; + } + } + + reduce2(b1, b2, smem1, smem2, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + b1 = smem1[0]; + b2 = smem2[0]; + + float2 delta; + delta.x = A12 * b2 - A22 * b1; + delta.y = A12 * b1 - A11 * b2; + + nextPt += delta; + + //if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) + // break; + } + + float errval = 0.0f; + + if (calcErr) + { + for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) + { + for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) + { + float4 diff = readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; + + errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); + } + } + + reduce1(errval, smem1, tid); + } + + if (tid == 0) + { + nextPt += c_halfWin; + nextPts[get_group_id(0)] = nextPt; + + if (calcErr) + { + err[get_group_id(0)] = smem1[0] / (3 * c_winSize_x * c_winSize_y); + } + } +} + +int readImage2Di_C1(__global const int *image, float2 coor, int2 size, const int elemCntPerRow) +{ + int i = clamp((int)floor(coor.x), 0, size.x - 1); + int j = clamp((int)floor(coor.y), 0, size.y - 1); + return image[mad24(j, elemCntPerRow, i)]; +} + +__kernel void lkDense_C1_D0(__global const int *I, __global const int *J, __global float *u, int uStep, __global float *v, int vStep, __global const float *prevU, int prevUStep, __global const float *prevV, int prevVStep, + const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ + const int elemCntPerRow, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) +{ + int c_halfWin_x = (c_winSize_x - 1) / 2; + int c_halfWin_y = (c_winSize_y - 1) / 2; + + const int patchWidth = get_local_size(0) + 2 * c_halfWin_x; + const int patchHeight = get_local_size(1) + 2 * c_halfWin_y; + + __local int smem[8192]; + + __local int *I_patch = smem; + __local int *dIdx_patch = I_patch + patchWidth * patchHeight; + __local int *dIdy_patch = dIdx_patch + patchWidth * patchHeight; + + const int xBase = get_group_id(0) * get_local_size(0); + const int yBase = get_group_id(1) * get_local_size(1); + int2 size = (int2)(cols, rows); + + for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1)) + { + for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0)) + { + float x = xBase - c_halfWin_x + j + 0.5f; + float y = yBase - c_halfWin_y + i + 0.5f; + + I_patch[i * patchWidth + j] = readImage2Di_C1(I, (float2)(x, y), size, elemCntPerRow); + + // Sharr Deriv + + dIdx_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x + 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - + (3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x - 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow)); + + dIdy_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y + 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - + (3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y - 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow)); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // extract the patch from the first image, compute covariation matrix of derivatives + + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x >= cols || y >= rows) + { + return; + } + + int A11i = 0; + int A12i = 0; + int A22i = 0; + + for (int i = 0; i < c_winSize_y; ++i) + { + for (int j = 0; j < c_winSize_x; ++j) + { + int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; + int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; + + A11i += dIdx * dIdx; + A12i += dIdx * dIdy; + A22i += dIdy * dIdy; + } + } + + float A11 = A11i; + float A12 = A12i; + float A22 = A22i; + + float D = A11 * A22 - A12 * A12; + + //if (calcErr && GET_MIN_EIGENVALS) + // (err + y * errStep)[x] = minEig; + + if (D < 1.192092896e-07f) + { + //if (calcErr) + // err(y, x) = 3.402823466e+38f; + + return; + } + + D = 1.f / D; + + A11 *= D; + A12 *= D; + A22 *= D; + + float2 nextPt; + nextPt.x = x + prevU[y / 2 * prevUStep / 4 + x / 2] * 2.0f; + nextPt.y = y + prevV[y / 2 * prevVStep / 4 + x / 2] * 2.0f; + + for (int k = 0; k < c_iters; ++k) + { + if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows) + { + //if (calcErr) + // err(y, x) = 3.402823466e+38f; + + return; + } + + int b1 = 0; + int b2 = 0; + + for (int i = 0; i < c_winSize_y; ++i) + { + for (int j = 0; j < c_winSize_x; ++j) + { + int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; + int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); + + int diff = (iJ - iI) * 32; + + int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; + int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; + + b1 += diff * dIdx; + b2 += diff * dIdy; + } + } + + float2 delta; + delta.x = A12 * b2 - A22 * b1; + delta.y = A12 * b1 - A11 * b2; + + nextPt.x += delta.x; + nextPt.y += delta.y; + + if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) + { + break; + } + } + + u[y * uStep / 4 + x] = nextPt.x - x; + v[y * vStep / 4 + x] = nextPt.y - y; + + if (calcErr) + { + int errval = 0; + + for (int i = 0; i < c_winSize_y; ++i) + { + for (int j = 0; j < c_winSize_x; ++j) + { + int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; + int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); + + errval += abs(iJ - iI); + } + } + + //err[y * errStep / 4 + x] = static_cast(errval) / (c_winSize_x * c_winSize_y); + } +} diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index b2c08afcf..8a2705646 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -99,7 +99,7 @@ namespace cv // Evaluates optimal template's area threshold. If // template's area is less than the threshold, we use naive match // template version, otherwise FFT-based (if available) - int getTemplateThreshold(int method, int depth) + static int getTemplateThreshold(int method, int depth) { switch (method) { diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index b2baed838..b2b8b5ff2 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -126,7 +126,7 @@ namespace cv //////////////////////////////////////////////////////////////////////// // convert_C3C4 -void convert_C3C4(const cl_mem &src, oclMat &dst) +static void convert_C3C4(const cl_mem &src, oclMat &dst) { int dstStep_in_pixel = dst.step1() / dst.oclchannels(); int pixel_end = dst.wholecols * dst.wholerows - 1; @@ -174,7 +174,7 @@ void convert_C3C4(const cl_mem &src, oclMat &dst) } //////////////////////////////////////////////////////////////////////// // convert_C4C3 -void convert_C4C3(const oclMat &src, cl_mem &dst) +static void convert_C4C3(const oclMat &src, cl_mem &dst) { int srcStep_in_pixel = src.step1() / src.oclchannels(); int pixel_end = src.wholecols * src.wholerows - 1; @@ -336,7 +336,7 @@ inline int divUp(int total, int grain) /////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// -void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) +static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && src.rows == dst.rows && src.cols == dst.cols @@ -401,7 +401,7 @@ void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const /////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// -void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta) +static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta) { string kernelName = "convert_to_S"; stringstream idxStr; @@ -472,7 +472,7 @@ oclMat &cv::ocl::oclMat::operator = (const Scalar &s) setTo(s); return *this; } -void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName) +static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName) { vector > args; @@ -642,7 +642,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern default: CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } -#if CL_VERSION_1_2 +#ifdef CL_VERSION_1_2 if(dst.offset == 0 && dst.cols == dst.wholecols) { clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); @@ -668,7 +668,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern #endif } -void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName) +static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); vector > args; diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index b4dafa5b7..b6c99e150 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -62,7 +62,7 @@ namespace cv } // provide additional methods for the user to interact with the command queue after a task is fired - void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], + static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) { diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 11257fdcc..317da5991 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -46,7 +46,7 @@ #ifndef __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__ -#if _MSC_VER >= 1200 +#if defined _MSC_VER && _MSC_VER >= 1200 #pragma warning( disable: 4267 4324 4244 4251 4710 4711 4514 4996 ) #endif diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index c05a7ae4b..96be69b45 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -66,7 +66,7 @@ namespace cv ////////////////////////////////////////////////////////////////////////////// /////////////////////// add subtract multiply divide ///////////////////////// ////////////////////////////////////////////////////////////////////////////// -void pyrdown_run(const oclMat &src, const oclMat &dst) +static void pyrdown_run(const oclMat &src, const oclMat &dst) { CV_Assert(src.type() == dst.type()); diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index dac303c65..0e871068a 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -48,23 +48,24 @@ using namespace cv::ocl; #if !defined (HAVE_OPENCL) -void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { } +void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &) { } void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { } #else /* !defined (HAVE_OPENCL) */ namespace cv { - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *pyrlk; - extern const char *operator_setTo; - extern const char *operator_convertTo; - extern const char *operator_copyToM; - extern const char *arithm_mul; - extern const char *pyr_down; - } +namespace ocl +{ +///////////////////////////OpenCL kernel strings/////////////////////////// +extern const char *pyrlk; +extern const char *pyrlk_no_image; +extern const char *operator_setTo; +extern const char *operator_convertTo; +extern const char *operator_copyToM; +extern const char *arithm_mul; +extern const char *pyr_down; +} } struct dim3 @@ -84,26 +85,26 @@ struct int2 namespace { - void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) +void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) +{ + winSize.width *= cn; + + if (winSize.width > 32 && winSize.width > 2 * winSize.height) { - winSize.width *= cn; - - if (winSize.width > 32 && winSize.width > 2 * winSize.height) - { - block.x = isDeviceArch11 ? 16 : 32; - block.y = 8; - } - else - { - block.x = 16; - block.y = isDeviceArch11 ? 8 : 16; - } - - patch.x = (winSize.width + block.x - 1) / block.x; - patch.y = (winSize.height + block.y - 1) / block.y; - - block.z = patch.z = 1; + block.x = isDeviceArch11 ? 16 : 32; + block.y = 8; } + else + { + block.x = 16; + block.y = isDeviceArch11 ? 8 : 16; + } + + patch.x = (winSize.width + block.x - 1) / block.x; + patch.y = (winSize.height + block.y - 1) / block.y; + + block.z = patch.z = 1; +} } inline int divUp(int total, int grain) @@ -114,7 +115,7 @@ inline int divUp(int total, int grain) /////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// -void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta) +static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta) { string kernelName = "convert_to_S"; stringstream idxStr; @@ -185,7 +186,7 @@ void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double // setTo(s); // return *this; //} -void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string kernelName) +static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string kernelName) { vector > args; @@ -355,7 +356,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string default: CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } -#if CL_VERSION_1_2 +#ifdef CL_VERSION_1_2 if(dst.offset == 0 && dst.cols == dst.wholecols) { clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); @@ -381,7 +382,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string #endif } -oclMat &setTo(oclMat &src, const Scalar &scalar) +static oclMat &setTo(oclMat &src, const Scalar &scalar) { CV_Assert( src.depth() >= 0 && src.depth() <= 6 ); CV_DbgAssert( !src.empty()); @@ -401,48 +402,48 @@ oclMat &setTo(oclMat &src, const Scalar &scalar) /////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// -void copy_to_with_mask_cus(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) -{ - CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && - src.rows == dst.rows && src.cols == dst.cols - && mask.type() == CV_8UC1); +// static void copy_to_with_mask_cus(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) +// { +// CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && +// src.rows == dst.rows && src.cols == dst.cols +// && mask.type() == CV_8UC1); - vector > args; +// vector > args; - std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"}, - {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"}, - {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"}, - {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"} - }; - char compile_option[32]; - sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str()); - size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3]; +// std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"}, +// {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"}, +// {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"}, +// {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"} +// }; +// char compile_option[32]; +// sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str()); +// size_t localThreads[3] = {16, 16, 1}; +// size_t globalThreads[3]; - globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0]; - globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; - globalThreads[2] = 1; +// globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0]; +// globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; +// globalThreads[2] = 1; - int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); - int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); +// int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); +// int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); +// args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); +// args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); +// args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); +// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); - openCLExecuteKernel2(dst.clCxt , &operator_copyToM, kernelName, globalThreads, - localThreads, args, -1, -1, compile_option, CLFLUSH); -} +// openCLExecuteKernel2(dst.clCxt , &operator_copyToM, kernelName, globalThreads, +// localThreads, args, -1, -1, compile_option, CLFLUSH); +// } -void copyTo(const oclMat &src, oclMat &m ) +static void copyTo(const oclMat &src, oclMat &m ) { CV_DbgAssert(!src.empty()); m.create(src.size(), src.type()); @@ -450,20 +451,20 @@ void copyTo(const oclMat &src, oclMat &m ) src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset); } -void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask) -{ - if (mask.empty()) - { - copyTo(src, mat); - } - else - { - mat.create(src.size(), src.type()); - copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask"); - } -} +// static void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask) +// { +// if (mask.empty()) +// { +// copyTo(src, mat); +// } +// else +// { +// mat.create(src.size(), src.type()); +// copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask"); +// } +// } -void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) +static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) { if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { @@ -528,12 +529,12 @@ void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const ch openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH); } -void multiply_cus(const oclMat &src1, oclMat &dst, float scalar) +static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar) { - arithmetic_run(src1, dst, "arithm_muls", &pyrlk, (void *)(&scalar)); + arithmetic_run(src1, dst, "arithm_muls", &arithm_mul, (void *)(&scalar)); } -void pyrdown_run_cus(const oclMat &src, const oclMat &dst) +static void pyrdown_run_cus(const oclMat &src, const oclMat &dst) { CV_Assert(src.type() == dst.type()); @@ -558,7 +559,7 @@ void pyrdown_run_cus(const oclMat &src, const oclMat &dst) openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH); } -void pyrDown_cus(const oclMat &src, oclMat &dst) +static void pyrDown_cus(const oclMat &src, oclMat &dst) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); @@ -581,26 +582,26 @@ void pyrDown_cus(const oclMat &src, oclMat &dst) // //void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask) //{ -// Mat srcTemp; -// Mat dstTemp; -// src.download(srcTemp); -// dst.download(dstTemp); +// Mat srcTemp; +// Mat dstTemp; +// src.download(srcTemp); +// dst.download(dstTemp); // -// int i; -// int j; -// int k; -// for(i = 0; i < srcTemp.rows; i++) -// { -// for(j = 0; j < srcTemp.cols; j++) -// { -// for(k = 0; k < srcTemp.channels(); k++) -// { -// ((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]); -// } -// } -// } +// int i; +// int j; +// int k; +// for(i = 0; i < srcTemp.rows; i++) +// { +// for(j = 0; j < srcTemp.cols; j++) +// { +// for(k = 0; k < srcTemp.channels(); k++) +// { +// ((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]); +// } +// } +// } // -// dst = dstTemp; +// dst = dstTemp; //} // //static inline bool isAligned(const unsigned char* ptr, size_t size) @@ -622,57 +623,57 @@ void pyrDown_cus(const oclMat &src, oclMat &dst) // return; // } // -// Mat srcTemp; -// Mat dstTemp; -// src.download(srcTemp); -// dst.download(dstTemp); +// Mat srcTemp; +// Mat dstTemp; +// src.download(srcTemp); +// dst.download(dstTemp); // -// int x_shifted; +// int x_shifted; // -// int i; -// int j; -// for(i = 0; i < srcTemp.rows; i++) -// { -// const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows; +// int i; +// int j; +// for(i = 0; i < srcTemp.rows; i++) +// { +// const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows; // double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;; // -// for(j = 0; j < srcTemp.cols; j++) -// { -// x_shifted = j * 4; +// for(j = 0; j < srcTemp.cols; j++) +// { +// x_shifted = j * 4; // -// if(x_shifted + 4 - 1 < srcTemp.cols) -// { -// dstRow[x_shifted ] = op(srcRow[x_shifted ]); -// dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]); -// dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]); -// dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]); -// } -// else -// { -// for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x) -// { -// ((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]); -// } -// } -// } -// } +// if(x_shifted + 4 - 1 < srcTemp.cols) +// { +// dstRow[x_shifted ] = op(srcRow[x_shifted ]); +// dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]); +// dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]); +// dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]); +// } +// else +// { +// for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x) +// { +// ((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]); +// } +// } +// } +// } //} // //void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f); //void multiply(const oclMat& src1, double val, oclMat& dst, double scale) //{ // MultiplyScalar op(val, scale); -// //if(src1.channels() == 1 && dst.channels() == 1) -// //{ -// // callT(src1, dst, op, 0); -// //} -// //else -// //{ -// callF(src1, dst, op, 0); -// //} +// //if(src1.channels() == 1 && dst.channels() == 1) +// //{ +// // callT(src1, dst, op, 0); +// //} +// //else +// //{ +// callF(src1, dst, op, 0); +// //} //} -cl_mem bindTexture(const oclMat &mat, int depth, int channels) +static cl_mem bindTexture(const oclMat &mat, int depth, int channels) { cl_mem texture; cl_image_format format; @@ -697,7 +698,7 @@ cl_mem bindTexture(const oclMat &mat, int depth, int channels) { format.image_channel_order = CL_RGBA; } -#if CL_VERSION_1_2 +#ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.step / mat.elemSize(); @@ -729,52 +730,75 @@ cl_mem bindTexture(const oclMat &mat, int depth, int channels) return texture; } -void releaseTexture(cl_mem texture) +static void releaseTexture(cl_mem texture) { openCLFree(texture); } -void lkSparse_run(oclMat &I, oclMat &J, - const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err, bool GET_MIN_EIGENVALS, int ptcount, +static void lkSparse_run(oclMat &I, oclMat &J, + const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; + char platform[256] = {0}; + cl_platform_id pid; + clGetDeviceInfo(*clCxt->impl->devices, CL_DEVICE_PLATFORM, sizeof(pid), &pid, NULL); + clGetPlatformInfo(pid, CL_PLATFORM_NAME, 256, platform, NULL); + std::string namestr = platform; + bool isImageSupported = true; + if(namestr.find("NVIDIA")!=string::npos || namestr.find("Intel")!=string::npos) + isImageSupported = false; + + int elemCntPerRow = I.step / I.elemSize(); string kernelName = "lkSparse"; - size_t localThreads[3] = { 8, 32, 1 }; - size_t globalThreads[3] = { 8 * ptcount, 32, 1}; + + size_t localThreads[3] = { 8, isImageSupported?8:32, 1 }; + size_t globalThreads[3] = { 8 * ptcount, isImageSupported?8:32, 1}; int cn = I.oclchannels(); - bool calcErr; - if (err) + char calcErr; + if (level == 0) { - calcErr = true; + calcErr = 1; } else { - calcErr = false; + calcErr = 0; } - calcErr = true; - - cl_mem ITex = bindTexture(I, I.depth(), cn); - cl_mem JTex = bindTexture(J, J.depth(), cn); vector > args; + cl_mem ITex; + cl_mem JTex; + if (isImageSupported) + { + ITex = bindTexture(I, I.depth(), cn); + JTex = bindTexture(J, J.depth(), cn); + } + else + { + ITex = (cl_mem)I.data; + JTex = (cl_mem)J.data; + } args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex )); - + //cl_mem clmD = clCreateBuffer(clCxt, CL_MEM_READ_WRITE, ptcount * sizeof(float), NULL, NULL); args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&nextPts.step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&status.data )); - //args.push_back( make_pair( sizeof(cl_mem), (void *)&(err->data) )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&err.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&level )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); + if (!isImageSupported) + { + args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); + } args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( make_pair( sizeof(cl_int), (void *)&cn )); @@ -782,27 +806,29 @@ void lkSparse_run(oclMat &I, oclMat &J, args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS )); + //args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS )); - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + if (isImageSupported) + { + openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); - releaseTexture(ITex); - releaseTexture(JTex); + releaseTexture(ITex); + releaseTexture(JTex); + } + else + { + //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n"); + openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + } } void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) { - if (prevImg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos) - { - cout << " Intel HD GPU device unsupported " << endl; - return; - } - if (prevPts.empty()) { nextPts.release(); status.release(); - if (err) err->release(); + //if (err) err->release(); return; } @@ -836,8 +862,15 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next //status.setTo(Scalar::all(1)); setTo(status, Scalar::all(1)); - //if (err) - // ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); + bool errMat = false; + if (!err) + { + err = new oclMat(1, prevPts.cols, CV_32FC1); + errMat = true; + } + else + ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); + //ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err); // build the image pyramids. @@ -872,17 +905,22 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next for (int level = maxLevel; level >= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], - prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols, + prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols, level, /*block, */patch, winSize, iters); } clFinish(prevImg.clCxt->impl->clCmdQueue); + + if(errMat) + delete err; } -void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, +static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters) { Context *clCxt = I.clCxt; + bool isImageSupported = clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos; + int elemCntPerRow = I.step / I.elemSize(); string kernelName = "lkDense"; @@ -901,8 +939,19 @@ void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, calcErr = false; } - cl_mem ITex = bindTexture(I, I.depth(), cn); - cl_mem JTex = bindTexture(J, J.depth(), cn); + cl_mem ITex; + cl_mem JTex; + + if (isImageSupported) + { + ITex = bindTexture(I, I.depth(), cn); + JTex = bindTexture(J, J.depth(), cn); + } + else + { + ITex = (cl_mem)I.data; + JTex = (cl_mem)J.data; + } //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2}; //const int patchWidth = 16 + 2 * halfWin.x; @@ -926,15 +975,27 @@ void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); //args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data )); //args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step )); + if (!isImageSupported) + { + args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); + } args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + if (isImageSupported) + { + openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); - releaseTexture(ITex); - releaseTexture(JTex); + releaseTexture(ITex); + releaseTexture(JTex); + } + else + { + //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n"); + openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + } } void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index e15b06ecc..b071ff870 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -111,52 +111,52 @@ namespace cv //////////////////////////////////////////////////////////////////////////// ////////////////////merge////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// - void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst) - { - Context *clCxt = mat_dst.clCxt; - int channels = mat_dst.oclchannels(); - int depth = mat_dst.depth(); + // static void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst) + // { + // Context *clCxt = mat_dst.clCxt; + // int channels = mat_dst.oclchannels(); + // int depth = mat_dst.depth(); - string kernelName = "merge_vector"; + // string kernelName = "merge_vector"; - int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, - {4, 4, 2, 2, 1, 1, 1}, - {4, 4, 2, 2 , 1, 1, 1}, - {4, 4, 2, 2, 1, 1, 1} - }; + // int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, + // {4, 4, 2, 2, 1, 1, 1}, + // {4, 4, 2, 2 , 1, 1, 1}, + // {4, 4, 2, 2, 1, 1, 1} + // }; - size_t index = indexes[channels - 1][mat_dst.depth()]; - int cols = divUp(mat_dst.cols, index); - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], - divUp(mat_dst.rows, localThreads[1]) *localThreads[1], - 1 - }; + // size_t index = indexes[channels - 1][mat_dst.depth()]; + // int cols = divUp(mat_dst.cols, index); + // size_t localThreads[3] = { 64, 4, 1 }; + // size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + // divUp(mat_dst.rows, localThreads[1]) *localThreads[1], + // 1 + // }; - vector > args; - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step)); - if(n >= 3) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step)); - } - if(n >= 4) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step)); - } + // vector > args; + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step)); + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step)); + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step)); + // if(n >= 3) + // { + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step)); + // } + // if(n >= 4) + // { + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step)); + // } - openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth); - } + // openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth); + // } - void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) + static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) { if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F) { @@ -228,7 +228,7 @@ namespace cv openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth); } - void merge(const oclMat *mat_src, size_t n, oclMat &mat_dst) + static void merge(const oclMat *mat_src, size_t n, oclMat &mat_dst) { CV_Assert(mat_src); CV_Assert(n > 0); @@ -260,51 +260,51 @@ namespace cv //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////split///////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////// - void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst) - { - Context *clCxt = mat_src.clCxt; - int channels = mat_src.oclchannels(); - int depth = mat_src.depth(); + // static void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst) + // { + // Context *clCxt = mat_src.clCxt; + // int channels = mat_src.oclchannels(); + // int depth = mat_src.depth(); - string kernelName = "split_vector"; + // string kernelName = "split_vector"; - int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, - {8, 8, 8, 8, 4, 4, 2}, - {8, 8, 8, 8 , 4, 4, 4}, - {4, 4, 2, 2, 1, 1, 1} - }; + // int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, + // {8, 8, 8, 8, 4, 4, 2}, + // {8, 8, 8, 8 , 4, 4, 4}, + // {4, 4, 2, 2, 1, 1, 1} + // }; - size_t index = indexes[channels - 1][mat_dst[0].depth()]; - int cols = divUp(mat_src.cols, index); - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], - divUp(mat_src.rows, localThreads[1]) *localThreads[1], - 1 - }; + // size_t index = indexes[channels - 1][mat_dst[0].depth()]; + // int cols = divUp(mat_src.cols, index); + // size_t localThreads[3] = { 64, 4, 1 }; + // size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + // divUp(mat_src.rows, localThreads[1]) *localThreads[1], + // 1 + // }; - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step)); - if(channels >= 3) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step)); - } - if(channels >= 4) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step)); - } + // vector > args; + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step)); + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step)); + // if(channels >= 3) + // { + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step)); + // } + // if(channels >= 4) + // { + // args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step)); + // } - openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth); - } - void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) + // openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth); + // } + static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) { if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F) @@ -374,7 +374,7 @@ namespace cv openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth); } - void split(const oclMat &mat_src, oclMat *mat_dst) + static void split(const oclMat &mat_src, oclMat *mat_dst) { CV_Assert(mat_dst); diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp index f42c5ae98..71a7aacd3 100644 --- a/modules/ocl/src/surf.cpp +++ b/modules/ocl/src/surf.cpp @@ -536,7 +536,7 @@ void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture) openCLFree(texture); } -#if CL_VERSION_1_2 +#ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = img.step / img.elemSize(); diff --git a/modules/ocl/test/precomp.hpp b/modules/ocl/test/precomp.hpp index 3f6514f91..c63087101 100644 --- a/modules/ocl/test/precomp.hpp +++ b/modules/ocl/test/precomp.hpp @@ -38,6 +38,15 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + #ifndef __OPENCV_TEST_PRECOMP_HPP__ #define __OPENCV_TEST_PRECOMP_HPP__ diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index bebbc6971..f643864a8 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -1110,8 +1110,8 @@ TEST_P(Phase, Mat) for(int j = 0; j < LOOP_TIMES; j++) { random_roi(); - cv::phase(mat1_roi, mat2_roi, dst_roi, angelInDegrees); - cv::ocl::phase(gmat1, gmat2, gdst, angelInDegrees); + cv::phase(mat1_roi, mat2_roi, dst_roi, angelInDegrees ? true : false); + cv::ocl::phase(gmat1, gmat2, gdst, angelInDegrees ? true : false); cv::Mat cpu_dst; gdst_whole.download(cpu_dst); @@ -1449,8 +1449,8 @@ TEST_P(MagnitudeSqr, Mat) for(int j = 0; j < LOOP_TIMES; j++) { // random_roi(); - int64 start, end; - start = cv::getTickCount(); + // int64 start, end; + // start = cv::getTickCount(); for(int i = 0; i < mat1.rows; ++i) for(int j = 0; j < mat1.cols; ++j) { @@ -1465,7 +1465,7 @@ TEST_P(MagnitudeSqr, Mat) // ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2; } - end = cv::getTickCount(); + // end = cv::getTickCount(); diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index 94014c091..f9c8657d0 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -74,7 +74,7 @@ TEST_P(Blend, Accuracy) else blendLinearGold(img1, img2, weights1, weights2, result_gold); - EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1 : 1e-5f, NULL) + EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f, 0); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp new file mode 100644 index 000000000..202967b7a --- /dev/null +++ b/modules/ocl/test/test_color.cpp @@ -0,0 +1,193 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#ifdef HAVE_OPENCL + +//#define MAT_DEBUG +#ifdef MAT_DEBUG +#define MAT_DIFF(mat, mat2)\ +{\ + for(int i = 0; i < mat.rows; i ++)\ + {\ + for(int j = 0; j < mat.cols; j ++)\ + {\ + cv::Vec4b s = mat.at(i, j);\ + cv::Vec4b s2 = mat2.at(i, j);\ + if(s != s2) printf("*");\ + else printf(".");\ + }\ + puts("\n");\ + }\ +} +#else +#define MAT_DIFF(mat, mat2) +#endif + + +namespace +{ + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// cvtColor +PARAM_TEST_CASE(CvtColor, cv::Size, MatDepth) +{ + cv::Size size; + int depth; + bool useRoi; + + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + depth = GET_PARAM(1); + + img = randomMat(size, CV_MAKE_TYPE(depth, 3), 0.0, depth == CV_32F ? 1.0 : 255.0); + } +}; + +#define CVTCODE(name) cv::COLOR_ ## name +#define TEST_P_CVTCOLOR(name) TEST_P(CvtColor, name)\ +{\ + cv::Mat src = img;\ + cv::ocl::oclMat ocl_img, dst;\ + ocl_img.upload(img);\ + cv::ocl::cvtColor(ocl_img, dst, CVTCODE(name));\ + cv::Mat dst_gold;\ + cv::cvtColor(src, dst_gold, CVTCODE(name));\ + cv::Mat dst_mat;\ + dst.download(dst_mat);\ + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, "");\ +} + +//add new ones here using macro +TEST_P_CVTCOLOR(RGB2GRAY) +TEST_P_CVTCOLOR(BGR2GRAY) +TEST_P_CVTCOLOR(RGBA2GRAY) +TEST_P_CVTCOLOR(BGRA2GRAY) + +TEST_P_CVTCOLOR(RGB2YUV) +TEST_P_CVTCOLOR(BGR2YUV) +TEST_P_CVTCOLOR(YUV2RGB) +TEST_P_CVTCOLOR(YUV2BGR) +TEST_P_CVTCOLOR(RGB2YCrCb) +TEST_P_CVTCOLOR(BGR2YCrCb) + +PARAM_TEST_CASE(CvtColor_Gray2RGB, cv::Size, MatDepth, int) +{ + cv::Size size; + int code; + int depth; + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + depth = GET_PARAM(1); + code = GET_PARAM(2); + img = randomMat(size, CV_MAKETYPE(depth, 1), 0.0, depth == CV_32F ? 1.0 : 255.0); + } +}; +TEST_P(CvtColor_Gray2RGB, Accuracy) +{ + cv::Mat src = img; + cv::ocl::oclMat ocl_img, dst; + ocl_img.upload(src); + cv::ocl::cvtColor(ocl_img, dst, code); + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, code); + cv::Mat dst_mat; + dst.download(dst_mat); + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, ""); +} + + +PARAM_TEST_CASE(CvtColor_YUV420, cv::Size, int) +{ + cv::Size size; + int code; + + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + code = GET_PARAM(1); + img = randomMat(size, CV_8UC1, 0.0, 255.0); + } +}; + +TEST_P(CvtColor_YUV420, Accuracy) +{ + cv::Mat src = img; + cv::ocl::oclMat ocl_img, dst; + ocl_img.upload(src); + cv::ocl::cvtColor(ocl_img, dst, code); + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, code); + cv::Mat dst_mat; + dst.download(dst_mat); + MAT_DIFF(dst_mat, dst_gold); + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, ""); +} + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor, testing::Combine( + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)) + )); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_YUV420, testing::Combine( + testing::Values(cv::Size(128, 45), cv::Size(46, 132), cv::Size(1024, 1023)), + testing::Values((int)CV_YUV2RGBA_NV12, (int)CV_YUV2BGRA_NV12, (int)CV_YUV2RGB_NV12, (int)CV_YUV2BGR_NV12) + )); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_Gray2RGB, testing::Combine( + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), + testing::Values((int)CV_GRAY2BGR, (int)CV_GRAY2BGRA, (int)CV_GRAY2RGB, (int)CV_GRAY2RGBA) + )); +} +#endif diff --git a/modules/ocl/test/test_haar.cpp b/modules/ocl/test/test_haar.cpp index 52ce6a3a3..9bff32466 100644 --- a/modules/ocl/test/test_haar.cpp +++ b/modules/ocl/test/test_haar.cpp @@ -109,15 +109,15 @@ TEST_F(Haar, FaceDetect) //double t = 0; vector faces, oclfaces; - const static Scalar colors[] = { CV_RGB(0, 0, 255), - CV_RGB(0, 128, 255), - CV_RGB(0, 255, 255), - CV_RGB(0, 255, 0), - CV_RGB(255, 128, 0), - CV_RGB(255, 255, 0), - CV_RGB(255, 0, 0), - CV_RGB(255, 0, 255) - } ; + // const static Scalar colors[] = { CV_RGB(0, 0, 255), + // CV_RGB(0, 128, 255), + // CV_RGB(0, 255, 255), + // CV_RGB(0, 255, 0), + // CV_RGB(255, 128, 0), + // CV_RGB(255, 255, 0), + // CV_RGB(255, 0, 0), + // CV_RGB(255, 0, 255) + // } ; Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); MemStorage storage(cvCreateMemStorage(0)); diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 681c05a91..97174ecbd 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -498,11 +498,11 @@ TEST_P(bilateralFilter, Mat) } else { - for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) for(int j = 0; j < LOOP_TIMES; j++) { random_roi(); - if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) + if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) { continue; } @@ -563,7 +563,7 @@ TEST_P(CopyMakeBorder, Mat) } else { - for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) for(int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -911,7 +911,6 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) cv::RNG &rng = TS::ptr()->get_rng(); cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); - cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT); cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); double min = 5, max = 16; diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index 673a5f7fd..3892513b4 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -100,7 +100,7 @@ TEST_P(MatchTemplate8U, Accuracy) EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); -#if PERF_TEST +#ifdef PERF_TEST { P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); @@ -145,7 +145,7 @@ TEST_P(MatchTemplate32F, Accuracy) EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); -#if PERF_TEST +#ifdef PERF_TEST { std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; diff --git a/modules/ocl/test/test_pyrlk.cpp b/modules/ocl/test/test_pyrlk.cpp index b594a3483..7c747ee4f 100644 --- a/modules/ocl/test/test_pyrlk.cpp +++ b/modules/ocl/test/test_pyrlk.cpp @@ -118,9 +118,9 @@ TEST_P(Sparse, Mat) cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void *)&status[0]); d_status.download(status_mat); - //std::vector err(d_err.cols); - //cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]); - //d_err.download(err_mat); + std::vector err(d_err.cols); + cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]); + d_err.download(err_mat); std::vector nextPts_gold; std::vector status_gold; @@ -153,9 +153,9 @@ TEST_P(Sparse, Mat) } } - double bad_ratio = static_cast(mistmatch) / (nextPts.size() * 2); + double bad_ratio = static_cast(mistmatch) / (nextPts.size()); - ASSERT_LE(bad_ratio, 0.05f); + ASSERT_LE(bad_ratio, 0.02f); } diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 4ebf12975..99fd3346d 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -76,12 +76,12 @@ double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ } -//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ -//{ \ -// ASSERT_EQ(mat1.type(), mat2.type()); \ -// ASSERT_EQ(mat1.size(), mat2.size()); \ -// EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ -//} +/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ +{ \ + ASSERT_EQ(mat1.type(), mat2.type()); \ + ASSERT_EQ(mat1.size(), mat2.size()); \ + EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ +}*/ #define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \ { \ diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index 8d4a0ca1a..9db2c8aae 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -80,6 +80,10 @@ set_target_properties(${the_module} PROPERTIES OUTPUT_NAME cv2 SUFFIX ${CVPY_SUFFIX}) +if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_module} PROPERTIES FOLDER "bindings") +endif() + if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function") endif() diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 5381579ce..5061b1147 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -243,9 +243,9 @@ class ClassInfo(object): if decl: self.bases = decl[1].split()[1:] if len(self.bases) > 1: - print "Warning: class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,) - print "Bases: ", " ".join(self.bases) - print "Only the first base class will be used" + print "Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,) + print " Bases: ", " ".join(self.bases) + print " Only the first base class will be used" self.bases = [self.bases[0].strip(",")] #return sys.exit(-1) if self.bases and self.bases[0].startswith("cv::"): diff --git a/modules/ts/include/opencv2/ts/ts.hpp b/modules/ts/include/opencv2/ts/ts.hpp index 094dad4da..a10251d41 100644 --- a/modules/ts/include/opencv2/ts/ts.hpp +++ b/modules/ts/include/opencv2/ts/ts.hpp @@ -547,6 +547,15 @@ struct CV_EXPORTS DefaultRngAuto } +namespace cvtest +{ + +// test images generation functions +CV_EXPORTS void fillGradient(Mat& img, int delta = 5); +CV_EXPORTS void smoothBorder(Mat& img, const Scalar& color, int delta = 3); + +} //namespace cvtest + // fills c with zeros CV_EXPORTS void cvTsZero( CvMat* c, const CvMat* mask=0 ); diff --git a/modules/ts/misc/perf_tests_timing.py b/modules/ts/misc/perf_tests_timing.py index 781bfdfd2..721c0e3e3 100644 --- a/modules/ts/misc/perf_tests_timing.py +++ b/modules/ts/misc/perf_tests_timing.py @@ -56,33 +56,39 @@ if __name__ == "__main__": test_cases[name] = [None] * setsCount test_cases[name][i] = case - testsuits = [] # testsuit name, time, flag for failed tests + testsuits = [] # testsuit name, time, num, flag for failed tests + overall_time = 0 prevGroupName = None suit_time = 0 - has_failed = False + suit_num = 0 + fails_num = 0 for name in sorted(test_cases.iterkeys(), key=alphanum_keyselector): cases = test_cases[name] groupName = next(c for c in cases if c).shortName() if groupName != prevGroupName: if prevGroupName != None: + suit_time = suit_time/60 #from seconds to minutes testsuits.append({'name': prevGroupName, 'time': suit_time, \ - 'failed': has_failed}) - has_failed = False + 'num': suit_num, 'failed': fails_num}) + overall_time += suit_time suit_time = 0 + suit_num = 0 + fails_num = 0 prevGroupName = groupName for i in range(setsCount): case = cases[i] if not case is None: + suit_num += 1 if case.get('status') == 'run': suit_time += case.get('time') if case.get('status') == 'failed': - has_failed = True + fails_num += 1 testsuits.append({'name': prevGroupName, 'time': suit_time, \ - 'failed': has_failed}) + 'num': suit_num, 'failed': fails_num}) if len(testsuits)==0: print 'No testsuits found' @@ -91,17 +97,19 @@ if __name__ == "__main__": tbl = table() # header - tbl.newColumn('name', 'Name of testsuit', align = 'left', cssclass = 'col_name') - tbl.newColumn('time', 'Time (ms)', align = 'left', cssclass = 'col_name') - tbl.newColumn('failed', 'Failed tests', align = 'center', cssclass = 'col_name') + tbl.newColumn('name', 'Testsuit', align = 'left', cssclass = 'col_name') + tbl.newColumn('time', 'Time (min)', align = 'center', cssclass = 'col_name') + tbl.newColumn('num', 'Num of tests', align = 'center', cssclass = 'col_name') + tbl.newColumn('failed', 'Failed', align = 'center', cssclass = 'col_name') # rows for suit in sorted(testsuits, key = lambda suit: suit['time'], reverse = True): tbl.newRow() tbl.newCell('name', suit['name']) tbl.newCell('time', formatValue(suit['time'], '', ''), suit['time']) - if (suit['failed']): - tbl.newCell('failed', 'Yes') + tbl.newCell('num', suit['num']) + if (suit['failed'] != 0): + tbl.newCell('failed', suit['failed']) else: tbl.newCell('failed', ' ') @@ -116,5 +124,6 @@ if __name__ == "__main__": splitter = 15 * '*' print '\n%s\n %s\n%s\n' % (splitter, module_name, splitter) + print 'Overall time: %.2f min\n' % overall_time tbl.consolePrintTable(sys.stdout) print 4 * '\n' \ No newline at end of file diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py index d6e61d327..435171371 100755 --- a/modules/ts/misc/run.py +++ b/modules/ts/misc/run.py @@ -69,6 +69,8 @@ parse_patterns = ( {'name': "ndk_path", 'default': None, 'pattern': re.compile("^(?:ANDROID_NDK|ANDROID_STANDALONE_TOOLCHAIN)?:PATH=(.*)$")}, {'name': "android_abi", 'default': None, 'pattern': re.compile("^ANDROID_ABI:STRING=(.*)$")}, {'name': "android_executable", 'default': None, 'pattern': re.compile("^ANDROID_EXECUTABLE:FILEPATH=(.*android.*)$")}, + {'name': "ant_executable", 'default': None, 'pattern': re.compile("^ANT_EXECUTABLE:FILEPATH=(.*ant.*)$")}, + {'name': "java_test_binary_dir", 'default': None, 'pattern': re.compile("^opencv_test_java_BINARY_DIR:STATIC=(.*)$")}, {'name': "is_x64", 'default': "OFF", 'pattern': re.compile("^CUDA_64_BIT_DEVICE_CODE:BOOL=(ON)$")},#ugly( {'name': "cmake_generator", 'default': None, 'pattern': re.compile("^CMAKE_GENERATOR:INTERNAL=(.+)$")}, {'name': "cxx_compiler", 'default': None, 'pattern': re.compile("^CMAKE_CXX_COMPILER:FILEPATH=(.+)$")}, @@ -431,6 +433,8 @@ class TestSuite(object): if self.tests_dir and os.path.isdir(self.tests_dir): files = glob.glob(os.path.join(self.tests_dir, self.nameprefix + "*")) files = [f for f in files if self.isTest(f)] + if self.ant_executable and self.java_test_binary_dir: + files.append("java") return files return [] @@ -740,6 +744,16 @@ class TestSuite(object): if os.path.isfile(hostlogpath): return hostlogpath return None + elif path == "java": + cmd = [self.ant_executable, "-DjavaLibraryPath=" + self.tests_dir, "buildAndTest"] + + print >> _stderr, "Run command:", " ".join(cmd) + try: + Popen(cmd, stdout=_stdout, stderr=_stderr, cwd = self.java_test_binary_dir + "/.build").wait() + except OSError: + pass + + return None else: cmd = [exe] if self.options.help: diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index 681f9bf2c..4ae9e7ea4 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -47,6 +47,7 @@ #include #if defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64 #include +#define NOMINMAX #include #ifdef _MSC_VER @@ -582,6 +583,79 @@ void TS::printf( int streams, const char* fmt, ... ) TS ts; TS* TS::ptr() { return &ts; } +void fillGradient(Mat& img, int delta) +{ + const int ch = img.channels(); + CV_Assert(!img.empty() && img.depth() == CV_8U && ch <= 4); + + int n = 255 / delta; + int r, c, i; + for(r=0; rtotal; i++ ) { CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i ); + float score = detection.score; CvRect bounding_box = detection.rect; cvRectangle( image, cvPoint(bounding_box.x, bounding_box.y), cvPoint(bounding_box.x + bounding_box.width, bounding_box.y + bounding_box.height), - CV_RGB(255,0,0), 3 ); + CV_RGB(cvRound(255.0f*score),0,0), 3 ); } cvReleaseMemStorage( &storage ); } diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index e901aa869..16017b928 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -180,7 +180,7 @@ _cleanup_: } void detectAndDraw( Mat& img, - cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade, + cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier&, double scale) { int i = 0; diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index 7cee6281e..8462300ed 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -217,10 +217,10 @@ int main(int argc, char* argv[]) perspectiveTransform( obj_corners, scene_corners, H); //-- Draw lines between the corners (the mapped object in the scene - image_2 ) - line( img_matches, scene_corners[0] + Point2f( cpu_img1.cols, 0), scene_corners[1] + Point2f( cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[1] + Point2f( cpu_img1.cols, 0), scene_corners[2] + Point2f( cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[2] + Point2f( cpu_img1.cols, 0), scene_corners[3] + Point2f( cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[3] + Point2f( cpu_img1.cols, 0), scene_corners[0] + Point2f( cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); //-- Show detected matches namedWindow("ocl surf matches", 0);