Merge remote-tracking branch 'origin/master'

This commit is contained in:
Vadim Pisarevsky 2012-08-07 12:54:22 +04:00
commit 0d5c694ec4
31 changed files with 4866 additions and 60 deletions

1
3rdparty/tbb/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
tbb*.tgz

View File

@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" OFF IF (NOT ANDROID AND NOT IOS) )
# OpenCV build components
# ===================================================
@ -396,6 +399,12 @@ if(WITH_OPENCL)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
endif()
if(WITH_OPENCLAMDFFT)
set(HAVE_CLAMDFFT 1)
endif()
if(WITH_OPENCLAMDBLAS)
set(HAVE_CLAMDBLAS 1)
endif()
endif()
# ----------------------------------------------------------------------------

View File

@ -4,7 +4,7 @@
# See home page: http://code.google.com/p/android-cmake/
#
# The file is mantained by the OpenCV project. And also can be found at
# http://code.opencv.org/svn/opencv/trunk/opencv/android/android.toolchain.cmake
# http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
#
# Usage Linux:
# $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
@ -182,6 +182,7 @@
# [+] added mips architecture support
# - modified August 2012
# [+] updated for NDK r8b
# [~] all intermediate files generated by toolchain are moved into CMakeFiles
# ------------------------------------------------------------------------------
cmake_minimum_required( VERSION 2.6.3 )
@ -854,45 +855,48 @@ elseif( X86 )
endif()
#linker flags
list( APPEND ANDROID_SYSTEM_LIB_DIRS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}" "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
if( NOT DEFINED __ndklibspath )
set( __ndklibspath "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/ndklibs/${ANDROID_NDK_ABI_NAME}" )
endif()
list( APPEND ANDROID_SYSTEM_LIB_DIRS "${__ndklibspath}" "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
set( ANDROID_LINKER_FLAGS "" )
#STL
if( ANDROID_USE_STLPORT )
if( EXISTS "${__stlLibPath}/libstlport_static.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstlport_static.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstlport_static.a" "${__ndklibspath}/libstlport_static.a" )
endif()
if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
if( EXISTS "${__ndklibspath}/libstlport_static.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--start-group -lstlport_static" )
endif()
else( ANDROID_USE_STLPORT )
if( EXISTS "${__stlLibPath}/libgnustl_static.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libgnustl_static.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libgnustl_static.a" "${__ndklibspath}/libstdc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/thumb/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/thumb/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/thumb/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
elseif( EXISTS "${__stlLibPath}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
endif()
if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
if( EXISTS "${__ndklibspath}/libstdc++.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lstdc++" )
endif()
#gcc exception & rtti support
if( EXISTS "${__stlLibPath}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
elseif( EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
endif()
if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
if( EXISTS "${__ndklibspath}/libsupc++.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lsupc++" )
endif()
endif( ANDROID_USE_STLPORT )
@ -1038,13 +1042,14 @@ endmacro()
# export toolchain settings for the try_compile() command
if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
set( __toolchain_config "")
foreach( __var ANDROID_ABI ANDROID_FORCE_ARM_BUILD ANDROID_NATIVE_API_LEVEL ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_SET_OBSOLETE_VARIABLES LIBRARY_OUTPUT_PATH_ROOT ANDROID_USE_STLPORT ANDROID_FORBID_SYGWIN ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_FUNCTION_LEVEL_LINKING )
foreach( __var ANDROID_ABI ANDROID_FORCE_ARM_BUILD ANDROID_NATIVE_API_LEVEL ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_SET_OBSOLETE_VARIABLES LIBRARY_OUTPUT_PATH_ROOT ANDROID_USE_STLPORT ANDROID_FORBID_SYGWIN ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_FUNCTION_LEVEL_LINKING __ndklibspath )
if( DEFINED ${__var} )
set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" )\n" )
endif()
endforeach()
file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/android.toolchain.config.cmake" "${__toolchain_config}" )
file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
unset( __toolchain_config )
unset( __ndklibspath )
endif()
@ -1073,6 +1078,7 @@ endif()
# Can be set only at the first run:
# ANDROID_NDK
# ANDROID_STANDALONE_TOOLCHAIN
# ANDROID_TOOLCHAIN_NAME : "arm-linux-androideabi-4.4.3" or "arm-linux-androideabi-4.6" or "mipsel-linux-android-4.4.3" or "mipsel-linux-android-4.6" or "x86-4.4.3" or "x86-4.6"
# Obsolete:
# ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
# ARM_TARGET : superseded by ANDROID_ABI
@ -1105,7 +1111,6 @@ endif()
# ANDROID_COMPILER_VERSION : GCC version used
# ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
# ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
# ANDROID_TOOLCHAIN_NAME : "standalone", "arm-linux-androideabi-4.4.3" or "x86-4.4.3" or something similar.
# ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
# ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
# ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK

View File

@ -2,8 +2,19 @@ if(APPLE)
set(OPENCL_FOUND YES)
set(OPENCL_LIBRARIES "-framework OpenCL")
else()
find_package(OpenCL QUIET)
#find_package(OpenCL QUIET)
if(WITH_OPENCLAMDFFT)
find_path(CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h)
find_library(CLAMDFFT_LIBRARIES
NAMES clAmdFft.Runtime)
endif()
if(WITH_OPENCLAMDBLAS)
find_path(CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h)
find_library(CLAMDBLAS_LIBRARIES
NAMES clAmdBlas)
endif()
# Try AMD/ATI Stream SDK
if (NOT OPENCL_FOUND)
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})

View File

@ -175,6 +175,12 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
/* AMD's OpenCL Fast Fourier Transform Library*/
#cmakedefine HAVE_CLAMDFFT
/* AMD's Basic Linear Algebra Subprograms Library*/
#cmakedefine HAVE_CLAMDBLAS
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT

View File

@ -67,11 +67,12 @@ StereoVar::~StereoVar()
static Mat diffX(Mat &src)
{
register int x, y, cols = src.cols - 1;
int cols = src.cols - 1;
Mat dst(src.size(), src.type());
for(y = 0; y < src.rows; y++){
for(int y = 0; y < src.rows; y++){
const float* pSrc = src.ptr<float>(y);
float* pDst = dst.ptr<float>(y);
int x = 0;
#if CV_SSE2
for (x = 0; x <= cols - 8; x += 8) {
__m128 a0 = _mm_loadu_ps(pSrc + x);

View File

@ -9,16 +9,16 @@ Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
*Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
*Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
*Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
*Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
*Neither the name of the University of Cambridge nor the names of
its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
*Neither the name of the University of Cambridge nor the names of
its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@ -350,7 +350,7 @@ int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
}
int b0 = -a0;
for( k = 0; k < 12; k += 2 )
for( k = 0; k < 8; k += 2 )
{
int b = std::max((int)d[k+1], (int)d[k+2]);
b = std::max(b, (int)d[k+3]);
@ -375,7 +375,10 @@ template<int patternSize>
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
Mat img = _img.getMat();
const int K = patternSize/2, N = patternSize + K + 1, quarterPatternSize = patternSize/4;
const int K = patternSize/2, N = patternSize + K + 1;
#if CV_SSE2
const int quarterPatternSize = patternSize/4;
#endif
int i, j, k, pixel[25];
makeOffsets(pixel, (int)img.step, patternSize);
for(k = patternSize; k < 25; k++)
@ -585,7 +588,7 @@ FastFeatureDetector::FastFeatureDetector( int _threshold, bool _nonmaxSuppressio
FastFeatureDetector::FastFeatureDetector( int _threshold, bool _nonmaxSuppression, int _type )
: threshold(_threshold), nonmaxSuppression(_nonmaxSuppression), type(_type)
{}
void FastFeatureDetector::detectImpl( const Mat& image, vector<KeyPoint>& keypoints, const Mat& mask ) const
{
Mat grayImage = image;

View File

@ -1298,17 +1298,17 @@ public:
maxk(_maxk), space_ofs(_space_ofs), space_weight(_space_weight), color_weight(_color_weight)
{
}
virtual void operator() (const Range& range) const
{
int i, j, cn = dest->channels(), k;
Size size = dest->size();
for( i = range.start; i < range.end; i++ )
{
const uchar* sptr = temp->ptr(i+radius) + radius*cn;
uchar* dptr = dest->ptr(i);
if( cn == 1 )
{
for( j = 0; j < size.width; j++ )
@ -1351,10 +1351,10 @@ public:
}
}
}
private:
const Mat *temp;
Mat *dest;
const Mat *temp;
int radius, maxk, *space_ofs;
float *space_weight, *color_weight;
};
@ -1367,40 +1367,40 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
int cn = src.channels();
int i, j, maxk, radius;
Size size = src.size();
CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
src.type() == dst.type() && src.size() == dst.size() &&
src.data != dst.data );
if( sigma_color <= 0 )
sigma_color = 1;
if( sigma_space <= 0 )
sigma_space = 1;
double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
if( d <= 0 )
radius = cvRound(sigma_space*1.5);
else
radius = d/2;
radius = MAX(radius, 1);
d = radius*2 + 1;
Mat temp;
copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
vector<float> _color_weight(cn*256);
vector<float> _space_weight(d*d);
vector<int> _space_ofs(d*d);
float* color_weight = &_color_weight[0];
float* space_weight = &_space_weight[0];
int* space_ofs = &_space_ofs[0];
// initialize color-related bilateral filter coefficients
for( i = 0; i < 256*cn; i++ )
color_weight[i] = (float)std::exp(i*i*gauss_color_coeff);
// initialize space-related bilateral filter coefficients
for( i = -radius, maxk = 0; i <= radius; i++ )
for( j = -radius; j <= radius; j++ )
@ -1411,7 +1411,7 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff);
space_ofs[maxk++] = (int)(i*temp.step + j*cn);
}
BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight);
parallel_for_(Range(0, size.height), body);
}

View File

@ -29,6 +29,14 @@ if (HAVE_OPENCL)
if(OPENCL_INCLUDE_DIR)
ocv_include_directories(${OPENCL_INCLUDE_DIR})
endif()
if (HAVE_CLAMDFFT)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
endif()
if (HAVE_CLAMDBLAS)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
endif()
endif()
ocv_set_module_sources(

View File

@ -858,7 +858,72 @@ namespace cv
void benchmark_copy_vectorize(const oclMat &src, oclMat &dst);
void benchmark_copy_offset_stride(const oclMat &src, oclMat &dst);
void benchmark_ILP();
//! computes vertical sum, supports only CV_32FC1 images
CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum);
//! performs linear blending of two images
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
// supports only CV_8UC1 source type
CV_EXPORTS void blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result);
/////////////////////////////// Pyramid /////////////////////////////////////
CV_EXPORTS void pyrDown(const oclMat& src, oclMat& dst);
//! upsamples the source image and then smoothes it
CV_EXPORTS void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst);
///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
struct CV_EXPORTS MatchTemplateBuf
{
Size user_block_size;
oclMat imagef, templf;
std::vector<oclMat> images;
std::vector<oclMat> image_sums;
std::vector<oclMat> image_sqsums;
};
//! computes the proximity map for the raster template and the image where the template is searched for
// Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method);
//! computes the proximity map for the raster template and the image where the template is searched for
// Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
#ifdef HAVE_CLAMDFFT
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
// the two functions must be called before/after run any fft library functions.
CV_EXPORTS void fft_setup(); // this will be implicitly invoked
CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
/////////////////////////////////////// DFT /////////////////////////////////////////////////////
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform.
//!
//! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
// support src type of CV32FC1, CV32FC2
// support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
// dft_size is the size of original input, which is used for transformation from complex to real.
// dft_size must be powers of 2, 3 and 5
// real to complex dft requires at least v1.8 clAmdFft
// real to complex dft output is not the same with cpu version
// real to complex and complex to real does not support DFT_ROWS
CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
#endif // HAVE_CLAMDFFT
#ifdef HAVE_CLAMDBLAS
//! implements generalized matrix product algorithm GEMM from BLAS
// The functionality requires clAmdBlas library
// only support type CV_32FC1
// flag GEMM_3_T is not supported
CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
#endif
}
}
#include "opencv2/ocl/matrix_operations.hpp"

98
modules/ocl/src/blend.cpp Normal file
View File

@ -0,0 +1,98 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
oclMat& result){throw_nogpu();}
#else
namespace cv
{
namespace ocl
{
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern const char *blend_linear;
}
}
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
oclMat& result)
{
cv::ocl::Context *ctx = img1.clCxt;
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
int channels = img1.channels();
int depth = img1.depth();
int rows = img1.rows;
int cols = img1.cols;
int istep = img1.step;
int wstep = weights1.step;
size_t globalSize[] = {cols * channels, rows, 1};
size_t localSize[] = {16, 16, 1};
vector< pair<size_t, const void *> > args;
if(globalSize[0]!=0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
std::string kernelName = "BlendLinear";
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
}
}
#endif

View File

@ -0,0 +1,91 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Chunpeng Zhang, chunpeng@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined(HAVE_OPENCL)
void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
#else /*!HAVE_OPENCL */
namespace cv
{
namespace ocl
{
extern const char* imgproc_columnsum;
}
}
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
{
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
Context *clCxt = src.clCxt;
const std::string kernelName = "columnSum";
std::vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
}
#endif

302
modules/ocl/src/fft.cpp Normal file
View File

@ -0,0 +1,302 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDFFT
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
#else
#include <clAmdFft.h>
namespace cv{ namespace ocl {
enum FftType
{
C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
};
struct FftPlan
{
friend void fft_setup();
friend void fft_teardown();
~FftPlan();
protected:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
const Size dft_size;
const int src_step, dst_step;
const int flags;
const FftType type;
clAmdFftPlanHandle plHandle;
static vector<FftPlan*> planStore;
static bool started;
static clAmdFftSetupData * setupData;
public:
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
};
}}
bool cv::ocl::FftPlan::started = false;
vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
void cv::ocl::fft_setup()
{
if(FftPlan::started)
{
return;
}
FftPlan::setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
FftPlan::started = true;
}
void cv::ocl::fft_teardown()
{
if(!FftPlan::started)
{
return;
}
delete FftPlan::setupData;
for(int i = 0; i < FftPlan::planStore.size(); i ++)
{
delete FftPlan::planStore[i];
}
FftPlan::planStore.clear();
openCLSafeCall( clAmdFftTeardown( ) );
FftPlan::started = false;
}
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
{
if(!FftPlan::started)
{
// implicitly do fft setup
fft_setup();
}
bool is_1d_input = (_dft_size.height == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
clAmdFftResultLocation place;
clAmdFftLayout inLayout;
clAmdFftLayout outLayout;
clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
size_t batchSize = is_row_dft?dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1};
clLengthsIn[0] = dft_size.width;
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
clStridesIn[0] = 1;
clStridesOut[0] = 1;
switch(_type)
{
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(std::complex<float>);
clStridesOut[1] = clStridesIn[1];
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / sizeof(std::complex<float>);
break;
case C2R:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / sizeof(std::complex<float>);
clStridesOut[1] = dst_step / sizeof(float);
break;
default:
//std::runtime_error("does not support this convertion!");
cout << "Does not support this convertion!" << endl;
throw exception();
break;
}
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
for(int i = 0; i < planStore.size(); i ++)
{
if(planStore[i]->plHandle == plHandle)
{
planStore.erase(planStore.begin()+ i);
}
}
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
}
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
{
// go through search
for(int i = 0; i < planStore.size(); i ++)
{
FftPlan * plan = planStore[i];
if(
plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height &&
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->type == _type
)
{
return plan->plHandle;
}
}
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
planStore.push_back(newPlan);
return newPlan->plHandle;
}
void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
{
if(dft_size == Size(0,0))
{
dft_size = src.size();
}
// check if the given dft size is of optimal dft size
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
// we don't support DFT_SCALE flag
CV_Assert(!(DFT_SCALE & flags));
bool is_1d_input = (src.rows == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
switch(type)
{
case C2C:
dst.create(src.rows, src.cols, CV_32FC2);
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, dft_size.width, CV_32FC1);
break;
default:
//std::runtime_error("does not support this convertion!");
cout << "Does not support this convertion!" << endl;
throw exception();
break;
}
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
//get the buffersize
size_t buffersize=0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
//allocate the intermediate buffer
cl_mem clMedBuffer=NULL;
if (buffersize)
{
cl_int medstatus;
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
}
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD,
1,
&src.clCxt->impl->clCmdQueue,
0, NULL, NULL,
(cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
if(clMedBuffer)
{
openCLFree(clMedBuffer);
}
}
#endif
#endif //HAVE_CLAMDFFT

161
modules/ocl/src/gemm.cpp Normal file
View File

@ -0,0 +1,161 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDBLAS
#include "clAmdBlas.h"
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
#else
using namespace cv;
void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
const oclMat& src3, double beta, oclMat& dst, int flags)
{
CV_Assert(src1.cols == src2.rows &&
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
if(!src3.empty())
{
src3.copyTo(dst);
}
else
{
dst.create(src1.rows, src2.cols, src1.type());
dst.setTo(Scalar::all(0));
}
openCLSafeCall( clAmdBlasSetup() );
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
const clAmdBlasOrder order = clAmdBlasRowMajor;
const int M = src1.rows;
const int N = src2.cols;
const int K = src1.cols;
int lda = src1.step;
int ldb = src2.step;
int ldc = dst.step;
int offa = src1.offset;
int offb = src2.offset;
int offc = dst.offset;
switch(src1.type())
{
case CV_32FC1:
lda /= sizeof(float);
ldb /= sizeof(float);
ldc /= sizeof(float);
offa /= sizeof(float);
offb /= sizeof(float);
offc /= sizeof(float);
openCLSafeCall
(
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
break;
case CV_64FC1:
lda /= sizeof(double);
ldb /= sizeof(double);
ldc /= sizeof(double);
offa /= sizeof(double);
offb /= sizeof(double);
offc /= sizeof(double);
openCLSafeCall
(
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
break;
case CV_32FC2:
{
lda /= sizeof(std::complex<float>);
ldb /= sizeof(std::complex<float>);
ldc /= sizeof(std::complex<float>);
offa /= sizeof(std::complex<float>);
offb /= sizeof(std::complex<float>);
offc /= sizeof(std::complex<float>);
cl_float2 alpha_2 = {{alpha, 0}};
cl_float2 beta_2 = {{beta, 0}};
openCLSafeCall
(
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
}
break;
case CV_64FC2:
{
lda /= sizeof(std::complex<double>);
ldb /= sizeof(std::complex<double>);
ldc /= sizeof(std::complex<double>);
offa /= sizeof(std::complex<double>);
offb /= sizeof(std::complex<double>);
offc /= sizeof(std::complex<double>);
cl_double2 alpha_2 = {{alpha, 0}};
cl_double2 beta_2 = {{beta, 0}};
openCLSafeCall
(
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
}
break;
}
clAmdBlasTeardown();
}
#endif
#endif

View File

@ -0,0 +1,196 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Liu Liujun, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
__kernel void BlendLinear_C1_D0(
__global uchar *dst,
__global uchar *img1,
__global uchar *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
if (idx < cols && idy < rows)
{
int pos = idy * istep + idx;
int wpos = idy * (wstep /sizeof(float)) + idx;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C3_D0(
__global uchar *dst,
__global uchar *img1,
__global uchar *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 3;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * istep + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C4_D0(
__global uchar *dst,
__global uchar *img1,
__global uchar *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 4;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * istep + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C1_D5(
__global float *dst,
__global float *img1,
__global float *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
if (idx < cols && idy < rows)
{
int pos = idy * (istep / sizeof(float)) + idx;
int wpos = idy * (wstep /sizeof(float)) + idx;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C3_D5(
__global float *dst,
__global float *img1,
__global float *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 3;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * (istep / sizeof(float)) + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
__kernel void BlendLinear_C4_D5(
__global float *dst,
__global float *img1,
__global float *img2,
__global float *weight1,
__global float *weight2,
int rows,
int cols,
int istep,
int wstep
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int x = idx / 4;
int y = idy;
if (x < cols && y < rows)
{
int pos = idy * (istep / sizeof(float)) + idx;
int wpos = idy * (wstep /sizeof(float)) + x;
float w1 = weight1[wpos];
float w2 = weight2[wpos];
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}

View File

@ -0,0 +1,80 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Chunpeng Zhang chunpeng@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
////////////////////////////////////////////////////////////////////
///////////////////////// columnSum ////////////////////////////////
////////////////////////////////////////////////////////////////////
/// CV_32FC1
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
{
const int x = get_global_id(0);
srcStep >>= 2;
dstStep >>= 2;
if (x < srcCols)
{
int srcIdx = x ;
int dstIdx = x ;
float sum = 0;
for (int y = 0; y < srcRows; ++y)
{
sum += src[srcIdx];
dst[dstIdx] = sum;
srcIdx += srcStep;
dstIdx += dstStep;
}
}
}

View File

@ -0,0 +1,824 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
#define TYPE_IMAGE_SQSUM double
#else
#define TYPE_IMAGE_SQSUM ulong
#endif
//////////////////////////////////////////////////
// utilities
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox)
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
// normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one
float normAcc(float num, float denum)
{
if(fabs(num) < denum)
{
return num / denum;
}
if(fabs(num) < denum * 1.125f)
{
return num > 0 ? 1 : -1;
}
return 0;
}
float normAcc_SQDIFF(float num, float denum)
{
if(fabs(num) < denum)
{
return num / denum;
}
if(fabs(num) < denum * 1.125f)
{
return num > 0 ? 1 : -1;
}
return 1;
}
//////////////////////////////////////////////////////////////////////
// normalize
__kernel
void normalizeKernel_C1_D0
(
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global float * res,
ulong tpl_sqsum,
int res_rows,
int res_cols,
int tpl_rows,
int tpl_cols,
int img_sqsums_offset,
int img_sqsums_step,
int res_offset,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
img_sqsums_step /= sizeof(*img_sqsums);
img_sqsums_offset /= sizeof(*img_sqsums);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float image_sqsum_ = (float)(
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum));
}
}
__kernel
void matchTemplate_Prepared_SQDIFF_C1_D0
(
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global float * res,
ulong tpl_sqsum,
int res_rows,
int res_cols,
int tpl_rows,
int tpl_cols,
int img_sqsums_offset,
int img_sqsums_step,
int res_offset,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
img_sqsums_step /= sizeof(*img_sqsums);
img_sqsums_offset /= sizeof(*img_sqsums);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float image_sqsum_ = (float)(
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum;
}
}
__kernel
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
(
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global float * res,
ulong tpl_sqsum,
int res_rows,
int res_cols,
int tpl_rows,
int tpl_cols,
int img_sqsums_offset,
int img_sqsums_step,
int res_offset,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
img_sqsums_step /= sizeof(*img_sqsums);
img_sqsums_offset /= sizeof(*img_sqsums);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float image_sqsum_ = (float)(
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum,
sqrt(image_sqsum_ * tpl_sqsum));
}
}
//////////////////////////////////////////////////
// SQDIFF
__kernel
void matchTemplate_Naive_SQDIFF_C1_D0
(
__global const uchar * img,
__global const uchar * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
int delta;
int sum = 0;
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
delta = img_ptr[j] - tpl_ptr[j];
sum = mad24(delta, delta, sum);
}
}
res[res_idx] = sum;
}
}
__kernel
void matchTemplate_Naive_SQDIFF_C1_D5
(
__global const float * img,
__global const float * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
float delta;
float sum = 0;
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
delta = img_ptr[j] - tpl_ptr[j];
sum = mad(delta, delta, sum);
}
}
res[res_idx] = sum;
}
}
__kernel
void matchTemplate_Naive_SQDIFF_C4_D0
(
__global const uchar4 * img,
__global const uchar4 * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
int4 delta;
int4 sum = (int4)(0, 0, 0, 0);
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
delta.x = img_ptr[j].x - tpl_ptr[j].x;
delta.y = img_ptr[j].y - tpl_ptr[j].y;
delta.z = img_ptr[j].z - tpl_ptr[j].z;
delta.w = img_ptr[j].w - tpl_ptr[j].w;
sum = mad24(delta, delta, sum);
}
}
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
}
}
__kernel
void matchTemplate_Naive_SQDIFF_C4_D5
(
__global const float4 * img,
__global const float4 * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
float4 delta;
float4 sum = (float4)(0, 0, 0, 0);
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
delta.x = img_ptr[j].x - tpl_ptr[j].x;
delta.y = img_ptr[j].y - tpl_ptr[j].y;
delta.z = img_ptr[j].z - tpl_ptr[j].z;
delta.w = img_ptr[j].w - tpl_ptr[j].w;
sum = mad(delta, delta, sum);
}
}
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
}
}
//////////////////////////////////////////////////
// CCORR
__kernel
void matchTemplate_Naive_CCORR_C1_D0
(
__global const uchar * img,
__global const uchar * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
int sum = 0;
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
sum = mad24(img_ptr[j], tpl_ptr[j], sum);
}
}
res[res_idx] = sum;
}
}
__kernel
void matchTemplate_Naive_CCORR_C1_D5
(
__global const float * img,
__global const float * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
float sum = 0;
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
sum = mad(img_ptr[j], tpl_ptr[j], sum);
}
}
res[res_idx] = sum;
}
}
__kernel
void matchTemplate_Naive_CCORR_C4_D0
(
__global const uchar4 * img,
__global const uchar4 * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
int4 sum = (int4)(0, 0, 0, 0);
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
}
}
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
}
}
__kernel
void matchTemplate_Naive_CCORR_C4_D5
(
__global const float4 * img,
__global const float4 * tpl,
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int img_offset,
int tpl_offset,
int res_offset,
int img_step,
int tpl_step,
int res_step
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int i,j;
float4 sum = (float4)(0, 0, 0, 0);
img_step /= sizeof(*img);
img_offset /= sizeof(*img);
tpl_step /= sizeof(*tpl);
tpl_offset /= sizeof(*tpl);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
for(i = 0; i < tpl_rows; i ++)
{
// get specific rows of img data
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum);
}
}
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
}
}
//////////////////////////////////////////////////
// CCOFF
__kernel
void matchTemplate_Prepared_CCOFF_C1_D0
(
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int res_offset,
int res_step,
__global const uint * img_sums,
int img_sums_offset,
int img_sums_step,
float tpl_sum
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
img_sums_offset /= sizeof(*img_sums);
img_sums_step /= sizeof(*img_sums);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float sum = (float)(
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
res[res_idx] -= sum * tpl_sum;
}
}
__kernel
void matchTemplate_Prepared_CCOFF_C4_D0
(
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int res_offset,
int res_step,
__global const uint * img_sums_c0,
__global const uint * img_sums_c1,
__global const uint * img_sums_c2,
__global const uint * img_sums_c3,
int img_sums_offset,
int img_sums_step,
float tpl_sum_c0,
float tpl_sum_c1,
float tpl_sum_c2,
float tpl_sum_c3
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
img_sums_offset /= sizeof(*img_sums_c0);
img_sums_step /= sizeof(*img_sums_c0);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float ccorr = res[res_idx];
ccorr -= tpl_sum_c0*(float)(
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
ccorr -= tpl_sum_c1*(float)(
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
ccorr -= tpl_sum_c2*(float)(
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
ccorr -= tpl_sum_c3*(float)(
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
res[res_idx] = ccorr;
}
}
__kernel
void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
(
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int res_offset,
int res_step,
float weight,
__global const uint * img_sums,
int img_sums_offset,
int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums,
int img_sqsums_offset,
int img_sqsums_step,
float tpl_sum,
float tpl_sqsum
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
img_sqsums_step /= sizeof(*img_sqsums);
img_sqsums_offset /= sizeof(*img_sqsums);
img_sums_offset /= sizeof(*img_sums);
img_sums_step /= sizeof(*img_sums);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float image_sum_ = (float)(
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
float image_sqsum_ = (float)(
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum,
sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_)));
}
}
__kernel
void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
(
__global float * res,
int img_rows,
int img_cols,
int tpl_rows,
int tpl_cols,
int res_rows,
int res_cols,
int res_offset,
int res_step,
float weight,
__global const uint * img_sums_c0,
__global const uint * img_sums_c1,
__global const uint * img_sums_c2,
__global const uint * img_sums_c3,
int img_sums_offset,
int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3,
int img_sqsums_offset,
int img_sqsums_step,
float tpl_sum_c0,
float tpl_sum_c1,
float tpl_sum_c2,
float tpl_sum_c3,
float tpl_sqsum
)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
img_sqsums_step /= sizeof(*img_sqsums_c0);
img_sqsums_offset /= sizeof(*img_sqsums_c0);
img_sums_offset /= sizeof(*img_sums_c0);
img_sums_step /= sizeof(*img_sums_c0);
res_step /= sizeof(*res);
res_offset /= sizeof(*res);
int res_idx = mad24(gidy, res_step, res_offset + gidx);
if(gidx < res_cols && gidy < res_rows)
{
float image_sum_c0 = (float)(
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
float image_sum_c1 = (float)(
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
float image_sum_c2 = (float)(
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
float image_sum_c3 = (float)(
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
float image_sqsum_c0 = (float)(
(img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)]));
float image_sqsum_c1 = (float)(
(img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)]));
float image_sqsum_c2 = (float)(
(img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)]));
float image_sqsum_c3 = (float)(
(img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) -
(img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)]));
float num = res[res_idx] -
image_sum_c0 * tpl_sum_c0 -
image_sum_c1 * tpl_sum_c1 -
image_sum_c2 * tpl_sum_c2 -
image_sum_c3 * tpl_sum_c3;
float denum = sqrt( tpl_sqsum * (
image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 +
image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 +
image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 +
image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3)
);
res[res_idx] = normAcc(num, denum);
}
}

View File

@ -0,0 +1,500 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
uchar round_uchar_uchar(uchar v)
{
return v;
}
uchar round_uchar_int(int v)
{
return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0);
}
uchar round_uchar_float(float v)
{
if(v - convert_int_sat_rte(v) > 1e-6 || v - convert_int_sat_rte(v) < -1e-6)
{
if(((int)v + 1) - (v + 0.5f) < 1e-6 && ((int)v + 1) - (v + 0.5f) > -1e-6)
{
v = (int)v + 0.51f;
}
}
int iv = convert_int_sat_rte(v);
return round_uchar_int(iv);
}
uchar4 round_uchar4_uchar4(uchar4 v)
{
return v;
}
uchar4 round_uchar4_int4(int4 v)
{
uchar4 result;
result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0);
result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0);
result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0);
result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0);
return result;
}
uchar4 round_uchar4_float4(float4 v)
{
if(v.x - convert_int_sat_rte(v.x) > 1e-6 || v.x - convert_int_sat_rte(v.x) < -1e-6)
{
if(((int)(v.x) + 1) - (v.x + 0.5f) < 1e-6 && ((int)(v.x) + 1) - (v.x + 0.5f) > -1e-6)
{
v.x = (int)(v.x) + 0.51f;
}
}
if(v.y - convert_int_sat_rte(v.y) > 1e-6 || v.y - convert_int_sat_rte(v.y) < -1e-6)
{
if(((int)(v.y) + 1) - (v.y + 0.5f) < 1e-6 && ((int)(v.y) + 1) - (v.y + 0.5f) > -1e-6)
{
v.y = (int)(v.y) + 0.51f;
}
}
if(v.z - convert_int_sat_rte(v.z) > 1e-6 || v.z - convert_int_sat_rte(v.z) < -1e-6)
{
if(((int)(v.z) + 1) - (v.z + 0.5f) < 1e-6 && ((int)(v.z) + 1) - (v.z + 0.5f) > -1e-6)
{
v.z = (int)(v.z) + 0.51f;
}
}
if(v.w - convert_int_sat_rte(v.w) > 1e-6 || v.w - convert_int_sat_rte(v.w) < -1e-6)
{
if(((int)(v.w) + 1) - (v.w + 0.5f) < 1e-6 && ((int)(v.w) + 1) - (v.w + 0.5f) > -1e-6)
{
v.w = (int)(v.w) + 0.51f;
}
}
int4 iv = convert_int4_sat_rte(v);
return round_uchar4_int4(iv);
}
int idx_row_low(int y, int last_row)
{
if(y < 0)
{
y = -y;
}
return y % (last_row + 1);
}
int idx_row_high(int y, int last_row)
{
int i;
int j;
if(last_row - y < 0)
{
i = (y - last_row);
}
else
{
i = (last_row - y);
}
if(last_row - i < 0)
{
j = i - last_row;
}
else
{
j = last_row - i;
}
return j % (last_row + 1);
}
int idx_row(int y, int last_row)
{
return idx_row_low(idx_row_high(y, last_row), last_row);
}
int idx_col_low(int x, int last_col)
{
if(x < 0)
{
x = -x;
}
return x % (last_col + 1);
}
int idx_col_high(int x, int last_col)
{
int i;
int j;
if(last_col - x < 0)
{
i = (x - last_col);
}
else
{
i = (last_col - x);
}
if(last_col - i < 0)
{
j = i - last_col;
}
else
{
j = last_col - i;
}
return j % (last_col + 1);
}
int idx_col(int x, int last_col)
{
return idx_col_low(idx_col_high(x, last_col), last_col);
}
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
{
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
const int y = get_group_id(1);
__local float smem[256 + 4];
float sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
sum = 0;
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0;
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0;
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]);
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
smem[4 + get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0;
sum = sum + 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep + dst_x] = round_uchar_float(sum);
}
}
__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstOffset, int dstCols)
{
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
const int y = get_group_id(1);
__local float4 smem[256 + 4];
float4 sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
sum = 0;
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0;
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0;
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
smem[4 + get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0;
sum = sum + co3 * smem[2 + tid2 - 2];
sum = sum + co2 * smem[2 + tid2 - 1];
sum = sum + co1 * smem[2 + tid2 ];
sum = sum + co2 * smem[2 + tid2 + 1];
sum = sum + co3 * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum);
}
}
__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float *dst, int dstStep, int dstOffset, int dstCols)
{
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
const int y = get_group_id(1);
__local float smem[256 + 4];
float sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
sum = 0;
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0;
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0;
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)];
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
smem[4 + get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0;
sum = sum + 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 4 + dst_x] = sum;
}
}
__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstOffset, int dstCols)
{
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
const int y = get_group_id(1);
__local float4 smem[256 + 4];
float4 sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
sum = 0;
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0;
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0;
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)];
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
smem[4 + get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0;
sum = sum + co3 * smem[2 + tid2 - 2];
sum = sum + co2 * smem[2 + tid2 - 1];
sum = sum + co1 * smem[2 + tid2 ];
sum = sum + co2 * smem[2 + tid2 + 1];
sum = sum + co3 * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 16 + dst_x] = sum;
}
}

View File

@ -0,0 +1,750 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
//#pragma OPENCL EXTENSION cl_amd_printf : enable
uchar get_valid_uchar(uchar data)
{
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC1 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float s_srcPatch[10][10];
__local float s_dstPatch[20][16];
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum = 0;
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
if(eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC1 /////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float s_srcPatch[10][10];
__local float s_dstPatch[20][16];
srcStep = srcStep >> 1;
dstStep = dstStep >> 1;
srcOffset = srcOffset >> 1;
dstOffset = dstOffset >> 1;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum = 0;
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
if(eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC1 /////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float s_srcPatch[10][10];
__local float s_dstPatch[20][16];
srcOffset = srcOffset >> 2;
dstOffset = dstOffset >> 2;
srcStep = srcStep >> 2;
dstStep = dstStep >> 2;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum = 0;
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
if(eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
float4 covert_uchar4_to_float4(uchar4 data)
{
float4 f4Data = {0,0,0,0};
f4Data.x = (float)data.x;
f4Data.y = (float)data.y;
f4Data.z = (float)data.z;
f4Data.w = (float)data.w;
return f4Data;
}
uchar4 convert_float4_to_uchar4(float4 data)
{
uchar4 u4Data;
u4Data.x = get_valid_uchar(data.x);
u4Data.y = get_valid_uchar(data.y);
u4Data.z = get_valid_uchar(data.z);
u4Data.w = get_valid_uchar(data.w);
return u4Data;
}
float4 int_x_float4(int leftOpr,float4 rightOpr)
{
float4 result = {0,0,0,0};
result.x = rightOpr.x * leftOpr;
result.y = rightOpr.y * leftOpr;
result.z = rightOpr.z * leftOpr;
result.w = rightOpr.w * leftOpr;
return result;
}
float4 float4_x_float4(float4 leftOpr,float4 rightOpr)
{
float4 result;
result.x = leftOpr.x * rightOpr.x;
result.y = leftOpr.y * rightOpr.y;
result.z = leftOpr.z * rightOpr.z;
result.w = leftOpr.w * rightOpr.w;
return result;
}
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float4 s_srcPatch[10][10];
__local float4 s_dstPatch[20][16];
srcOffset >>= 2;
dstOffset >>= 2;
srcStep >>= 2;
dstStep >>= 2;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if(eveny)
{
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_float4_to_uchar4(int_x_float4(4.0f,sum));
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
float4 covert_ushort4_to_float4(ushort4 data)
{
float4 f4Data = {0,0,0,0};
f4Data.x = (float)data.x;
f4Data.y = (float)data.y;
f4Data.z = (float)data.z;
f4Data.w = (float)data.w;
return f4Data;
}
ushort4 convert_float4_to_ushort4(float4 data)
{
ushort4 u4Data;
u4Data.x = (float)data.x;
u4Data.y = (float)data.y;
u4Data.z = (float)data.z;
u4Data.w = (float)data.w;
return u4Data;
}
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float4 s_srcPatch[10][10];
__local float4 s_dstPatch[20][16];
srcOffset >>= 3;
dstOffset >>= 3;
srcStep >>= 3;
dstStep >>= 3;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if(eveny)
{
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_float4_to_ushort4(int_x_float4(4.0f,sum));
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float4 s_srcPatch[10][10];
__local float4 s_dstPatch[20][16];
srcOffset >>= 4;
dstOffset >>= 4;
srcStep >>= 4;
dstStep >>= 4;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float4)(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if(eveny)
{
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = 4.0f * sum;
}
}

View File

@ -0,0 +1,560 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
#define EXT_FP64 0
#if !defined (HAVE_OPENCL)
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
#else
//helper routines
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *match_template;
}
}
namespace cv { namespace ocl
{
void matchTemplate_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplate_SQDIFF_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplate_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplate_CCORR_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplate_CCOFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplate_CCOFF_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
void matchTemplateNaive_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
void matchTemplateNaive_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
int getTemplateThreshold(int method, int depth)
{
switch (method)
{
case CV_TM_CCORR:
if (depth == CV_32F) return 250;
if (depth == CV_8U) return 300;
break;
case CV_TM_SQDIFF:
if (depth == CV_32F) return MAXSHORT; // do naive SQDIFF for CV_32F
if (depth == CV_8U) return 300;
break;
}
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
return 0;
}
//////////////////////////////////////////////////////////////////////
// SQDIFF
void matchTemplate_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
{
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
return;
}
else
{
// TODO
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
}
}
void matchTemplate_SQDIFF_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
matchTemplate_CCORR(image,templ,result,buf);
buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
#if EXT_FP64 && SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else
Mat sqr_mat = templ.reshape(1);
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
#endif
Context *clCxt = image.clCxt;
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
}
void matchTemplateNaive_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
string kernelName = "matchTemplate_Naive_SQDIFF";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
}
//////////////////////////////////////////////////////////////////////
// CCORR
void matchTemplate_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
{
matchTemplateNaive_CCORR(image, templ, result, image.channels());
return;
}
else
{
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
if(image.depth() == CV_8U && templ.depth() == CV_8U)
{
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
}
CV_Assert(image.channels() == 1);
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
}
}
void matchTemplate_CCORR_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
matchTemplate_CCORR(image,templ,result,buf);
buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
#if EXT_FP64 && SQRSUM_FIXED
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#elif EXT_FP64
oclMat templ_c1 = templ.reshape(1);
multiply(templ_c1, templ_c1, templ_c1);
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
#else
Mat m_templ_c1 = templ.reshape(1);
multiply(m_templ_c1, m_templ_c1, m_templ_c1);
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
#endif
Context *clCxt = image.clCxt;
string kernelName = "normalizeKernel";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
}
void matchTemplateNaive_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
string kernelName = "matchTemplate_Naive_CCORR";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
}
//////////////////////////////////////////////////////////////////////
// CCOFF
void matchTemplate_CCOFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
matchTemplate_CCORR(image,templ,result,buf);
Context *clCxt = image.clCxt;
string kernelName;
kernelName = "matchTemplate_Prepared_CCOFF";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
// to be continued in the following section
if(image.channels() == 1)
{
buf.image_sums.resize(1);
// FIXME: temp fix for incorrect integral kernel
oclMat tmp_oclmat;
integral(image, buf.image_sums[0], tmp_oclmat);
float templ_sum = 0;
#if EXT_FP64
templ_sum = (float)sum(templ)[0] / templ.size().area();
#else
Mat o_templ = templ;
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
#endif
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
}
else
{
Vec4f templ_sum = Vec4f::all(0);
#if EXT_FP64
split(image,buf.images);
templ_sum = sum(templ) / templ.size().area();
#else
// temp fix for non-double supported machine
Mat o_templ = templ, o_image = image;
vector<Mat> o_mat_vector;
o_mat_vector.resize(image.channels());
buf.images.resize(image.channels());
split(o_image, o_mat_vector);
for(int i = 0; i < o_mat_vector.size(); i ++)
{
buf.images[i] = oclMat(o_mat_vector[i]);
}
templ_sum = sum(o_templ) / templ.size().area();
#endif
buf.image_sums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++)
{
// FIXME: temp fix for incorrect integral kernel
oclMat omat_temp;
integral(buf.images[i], buf.image_sums[i], omat_temp);
}
switch(image.channels())
{
case 4:
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
break;
default:
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
break;
}
}
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
}
void matchTemplate_CCOFF_NORMED(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
{
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
float scale = 1.f/templ.size().area();
Context *clCxt = image.clCxt;
string kernelName;
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {32, 8, 1};
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
// to be continued in the following section
if(image.channels() == 1)
{
buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
float templ_sum = 0;
float templ_sqsum = 0;
#if EXT_FP64
templ_sum = (float)sum(templ)[0];
#if SQRSUM_FIXED
templ_sqsum = sqrSum(templ);
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = sum(templ_sqr)[0];
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
templ_sum *= scale;
#else
// temp fix for non-double supported machine
Mat o_templ = templ;
templ_sum = (float)sum(o_templ)[0];
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
templ_sum *= scale;
#endif
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
}
else
{
Vec4f templ_sum = Vec4f::all(0);
Vec4f templ_sqsum = Vec4f::all(0);
#if EXT_FP64
split(image,buf.images);
templ_sum = sum(templ);
#if SQRSUM_FIXED
templ_sqsum = sqrSum(templ);
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = sum(templ_sqr);
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
#else
// temp fix for non-double supported machine
Mat o_templ = templ, o_image = image;
vector<Mat> o_mat_vector;
o_mat_vector.resize(image.channels());
buf.images.resize(image.channels());
split(o_image, o_mat_vector);
for(int i = 0; i < o_mat_vector.size(); i ++)
{
buf.images[i] = oclMat(o_mat_vector[i]);
}
templ_sum = sum(o_templ);
templ_sqsum = sum(o_templ.mul(o_templ));
#endif
float templ_sqsum_sum = 0;
for(int i = 0; i < image.channels(); i ++)
{
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
}
templ_sum *= scale;
buf.image_sums.resize(buf.images.size());
buf.image_sqsums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++)
{
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
}
switch(image.channels())
{
case 4:
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
break;
default:
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
break;
}
}
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
}
}/*ocl*/} /*cv*/
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
{
MatchTemplateBuf buf;
matchTemplate(image,templ, result, method,buf);
}
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
{
CV_Assert(image.type() == templ.type());
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
const Caller callers[] = {
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
};
Caller caller = callers[method];
CV_Assert(caller);
caller(image, templ, result, buf);
}
#endif //

115
modules/ocl/src/pyrdown.cpp Normal file
View File

@ -0,0 +1,115 @@
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
using std::cout;
using std::endl;
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *pyr_down;
}
}
//////////////////////////////////////////////////////////////////////////////
/////////////////////// add subtract multiply divide /////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename T>
void pyrdown_run(const oclMat &src, const oclMat &dst)
{
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
CV_Assert(src.type() == dst.type());
CV_Assert(src.depth() != CV_8S);
Context *clCxt = src.clCxt;
//int channels = dst.channels();
//int depth = dst.depth();
string kernelName = "pyrDown";
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1}
//};
//size_t vector_length = vector_lengths[channels-1][depth];
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { src.cols, dst.rows, 1};
//int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
}
void pyrdown_run(const oclMat &src, const oclMat &dst)
{
switch(src.depth())
{
case 0:
pyrdown_run<unsigned char>(src, dst);
break;
case 1:
pyrdown_run<char>(src, dst);
break;
case 2:
pyrdown_run<unsigned short>(src, dst);
break;
case 3:
pyrdown_run<short>(src, dst);
break;
case 4:
pyrdown_run<int>(src, dst);
break;
case 5:
pyrdown_run<float>(src, dst);
break;
case 6:
pyrdown_run<double>(src, dst);
break;
default:
break;
}
}
//////////////////////////////////////////////////////////////////////////////
// pyrDown
void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
//src.step = src.rows;
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
//dst.step = dst.rows;
pyrdown_run(src, dst);
}

88
modules/ocl/src/pyrup.cpp Normal file
View File

@ -0,0 +1,88 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/* Haar features calculation */
//#define EMU
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
#ifndef HAVE_OPENCL
void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
#else
namespace cv { namespace ocl
{
extern const char *pyr_up;
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
{
dst.create(src.rows * 2, src.cols * 2, src.type());
Context *clCxt = src.clCxt;
const std::string kernelName = "pyrUp";
std::vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
}
}};
#endif // HAVE_OPENCL

View File

@ -0,0 +1,83 @@
#include "precomp.hpp"
#include <iomanip>
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
template <typename T>
void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
{
result_gold.create(img1.size(), img1.type());
int cn = img1.channels();
for (int y = 0; y < img1.rows; ++y)
{
const float* weights1_row = weights1.ptr<float>(y);
const float* weights2_row = weights2.ptr<float>(y);
const T* img1_row = img1.ptr<T>(y);
const T* img2_row = img2.ptr<T>(y);
T* result_gold_row = result_gold.ptr<T>(y);
for (int x = 0; x < img1.cols * cn; ++x)
{
float w1 = weights1_row[x / cn];
float w2 = weights2_row[x / cn];
result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
}
}
}
PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/)
{
std::vector<cv::ocl::Info> oclinfo;
cv::Size size;
int type;
bool useRoi;
virtual void SetUp()
{
//devInfo = GET_PARAM(0);
size = GET_PARAM(0);
type = GET_PARAM(1);
/*useRoi = GET_PARAM(3);*/
int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
CV_Assert(devnums > 0);
}
};
TEST_P(Blend, Accuracy)
{
int depth = CV_MAT_DEPTH(type);
cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat dst(size, type);
gimg1.upload(img1);
gimg2.upload(img2);
gweights1.upload(weights1);
gweights2.upload(weights2);
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
cv::Mat result;
cv::Mat result_gold;
dst.download(result);
if (depth == CV_8U)
blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
else
blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1 : 1e-5f, NULL)
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
));

View File

@ -0,0 +1,108 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Chunpeng Zhang chunpeng@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
///////////////////////////////////////////////////////////////////////////////
/// ColumnSum
#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////
// ColumnSum
PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
{
cv::Size size;
cv::Mat src;
bool useRoi;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
useRoi = GET_PARAM(1);
int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
CV_Assert(devnums > 0);
}
};
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
//cv::Mat src(size,CV_32FC1);
//cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi);
cv::ocl::oclMat d_dst = loadMat(src,useRoi);
cv::ocl::columnSum(loadMat(src,useRoi),d_dst);
cv::Mat dst(d_dst);
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(0, j);
float res = dst.at<float>(0, j);
ASSERT_NEAR(res, gold, 1e-5);
}
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
float res = dst.at<float>(i, j);
ASSERT_NEAR(res, gold, 1e-5);
}
}
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true))));
#endif

View File

@ -0,0 +1,97 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
{
cv::Size dft_size;
bool dft_rows;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
int devnums = getDevice(oclinfo);
CV_Assert(devnums > 0);
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
}
};
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
cv::Mat b_gold;
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
cv::dft(a, b_gold, flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
testing::Values(false, true)));
#endif // HAVE_CLAMDFFT

View File

@ -0,0 +1,85 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Accuracy)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat dst;
cv::ocl::oclMat ocl_dst;
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif

View File

@ -0,0 +1,172 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#define PERF_TEST 0
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size;
cv::Size templ_size;
int cn;
int method;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
CV_Assert(devnums > 0);
}
};
TEST_P(MatchTemplate8U, Accuracy)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
cv::Mat dst_gold;
cv::matchTemplate(image, templ, dst_gold, method);
char sss [100] = "";
cv::Mat mat_dst;
dst.download(mat_dst);
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
#if PERF_TEST
{
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
}
#endif // PERF_TEST
}
PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size;
cv::Size templ_size;
int cn;
int method;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
CV_Assert(devnums > 0);
}
};
TEST_P(MatchTemplate32F, Accuracy)
{
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
cv::Mat dst_gold;
cv::matchTemplate(image, templ, dst_gold, method);
char sss [100] = "";
cv::Mat mat_dst;
dst.download(mat_dst);
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
#if PERF_TEST
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
std::cout << "Channels: " << cn << std::endl;
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
}
#endif // PERF_TEST
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
testing::Combine(
DIFFERENT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4)),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
DIFFERENT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));

View File

@ -0,0 +1,295 @@
///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
//#define PRINT_CPU_TIME 1000
//#define PRINT_TIME
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, bool)
{
int type;
cv::Scalar val;
//src mat
cv::Mat mat1;
cv::Mat mat2;
cv::Mat mask;
cv::Mat dst;
cv::Mat dst1; //bak, for two outputs
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int src2x;
int src2y;
int dstx;
int dsty;
int maskx;
int masky;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat mat2_roi;
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
//ocl mat with roi
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gmat2;
cv::ocl::oclMat gdst;
cv::ocl::oclMat gdst1; //bak
cv::ocl::oclMat gmask;
virtual void SetUp()
{
type = GET_PARAM(0);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat1 = randomMat(rng, size, type, 5, 16, false);
mat2 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
dst1 = randomMat(rng, size, type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
int devnums = getDevice(oclinfo);
CV_Assert(devnums > 0);
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
}
void Cleanup()
{
mat1.release();
mat2.release();
mask.release();
dst.release();
dst1.release();
mat1_roi.release();
mat2_roi.release();
mask_roi.release();
dst_roi.release();
dst1_roi.release();
gdst_whole.release();
gdst1_whole.release();
gmat1.release();
gmat2.release();
gdst.release();
gdst1.release();
gmask.release();
}
void random_roi()
{
cv::RNG &rng = TS::ptr()->get_rng();
#ifdef RANDOMROI
//randomize ROI
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gdst1_whole = dst1;
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
gmat2 = mat2_roi;
gmask = mask_roi; //end
}
};
#define VARNAME(A) string(#A);
void PrePrint()
{
//for(int i = 0; i < MHEIGHT; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH; k++)
// {
// printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
// }
// printf("\n");
//}
}
void PostPrint()
{
//dst_roi.convertTo(dst_roi,CV_32S);
//cpu_dst.convertTo(cpu_dst,CV_32S);
//dst_roi -= cpu_dst;
//cpu_dst -= dst_roi;
//for(int i = 0; i < MHEIGHT / 2; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH / 2; k++)
// {
// if(gmat1.depth() == 0)
// {
// if(gmat1.channels() == 1)
// {
// printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
// }
// else
// {
// printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
// }
// }
// else if(gmat1.depth() == 5)
// {
// printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
// }
// }
// printf("\n");
//}
//for(int i = 0; i < MHEIGHT / 2; i++)
//{
// printf("(%d) ", i);
// for(int k = 0; k < MWIDTH / 2; k++)
// {
// if(gmat1.depth() == 0)
// {
// if(gmat1.channels() == 1)
// {
// printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
// }
// else
// {
// printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
// }
// }
// else if(gmat1.depth() == 5)
// {
// printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
// }
// }
// printf("\n");
//}
}
////////////////////////////////PyrDown/////////////////////////////////////////////////
//struct PyrDown : ArithmTestBase {};
TEST_P(PyrDown, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::pyrDown(mat1_roi, dst_roi);
cv::ocl::pyrDown(gmat1, gdst);
cv::Mat cpu_dst;
gdst.download(cpu_dst);
char s[1024];
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s);
Cleanup();
}
}
//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
#endif // HAVE_OPENCL

View File

@ -0,0 +1,91 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencv2/core/core.hpp"
#ifdef HAVE_OPENCL
PARAM_TEST_CASE(PyrUp,cv::Size,int)
{
cv::Size size;
int type;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
CV_Assert(devnums > 0);
size = GET_PARAM(0);
type = GET_PARAM(1);
}
};
TEST_P(PyrUp,Accuracy)
{
cv::Mat src = randomMat(size,type);
cv::Mat dst_gold;
cv::pyrUp(src,dst_gold);
cv::ocl::oclMat dst;
cv::ocl::oclMat srcMat(src);
cv::ocl::pyrUp(srcMat,dst);
char s[100]={0};
EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
}
#if 1
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
testing::Values(cv::Size(32, 32)),
testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4),
MatType(CV_16UC4),MatType(CV_32FC4))));
#endif
#endif // HAVE_OPENCL

View File

@ -337,10 +337,9 @@ class RunInfo(object):
def getSvnVersion(self, path, name):
if not path:
setattr(self, name, None)
return
if not self.svnversion_path and hostos == 'nt':
self.tryGetSvnVersionWithTortoise(path, name)
val = None
elif not self.svnversion_path and hostos == 'nt':
val = self.tryGetSvnVersionWithTortoise(path, name)
else:
svnversion = self.svnversion_path
if not svnversion:
@ -348,11 +347,14 @@ class RunInfo(object):
try:
output = Popen([svnversion, "-n", path], stdout=PIPE, stderr=PIPE).communicate()
if not output[1]:
setattr(self, name, output[0])
val = output[0]
else:
setattr(self, name, None)
val = None
except OSError:
setattr(self, name, None)
val = None
if val:
val = val.replace(" ", "_")
setattr(self, name, val)
def tryGetSvnVersionWithTortoise(self, path, name):
try:
@ -371,9 +373,9 @@ class RunInfo(object):
tmpfile = open(tmpfilename2, "r")
version = tmpfile.read()
tmpfile.close()
setattr(self, name, version)
return version
except:
setattr(self, name, None)
return None
finally:
if dir:
shutil.rmtree(dir)

View File

@ -440,8 +440,7 @@ bool BackgroundSubtractorGMG::HistogramFeatureGMG::operator ==(HistogramFeatureG
std::vector<size_t>::iterator color_a;
std::vector<size_t>::iterator color_b;
std::vector<size_t>::iterator color_a_end = this->color.end();
std::vector<size_t>::iterator color_b_end = rhs.color.end();
for (color_a = color.begin(),color_b =rhs.color.begin();color_a!=color_a_end;++color_a,++color_b)
for (color_a = color.begin(), color_b = rhs.color.begin(); color_a != color_a_end; ++color_a, ++color_b)
{
if (*color_a != *color_b)
{