ocl: fix compilation warnings, update openCLExecuteKernelInterop
This commit is contained in:
parent
e3b42ed137
commit
d26f62569d
@ -6,6 +6,7 @@ get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME)
|
||||
|
||||
set(STR_CPP "// This file is auto-generated. Do not edit!
|
||||
|
||||
#include \"precomp.hpp\"
|
||||
#include \"${OUTPUT_HPP_NAME}\"
|
||||
|
||||
namespace cv
|
||||
@ -16,6 +17,8 @@ namespace ocl
|
||||
|
||||
set(STR_HPP "// This file is auto-generated. Do not edit!
|
||||
|
||||
#include \"opencv2/ocl/private/util.hpp\"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
|
@ -221,6 +221,33 @@ namespace cv
|
||||
//! set where binary cache to be saved to
|
||||
CV_EXPORTS void setBinaryPath(const char *path);
|
||||
|
||||
struct ProgramSource
|
||||
{
|
||||
const char* name;
|
||||
const char* programStr;
|
||||
const char* programHash;
|
||||
|
||||
// Cache in memory by name (should be unique). Caching on disk disabled.
|
||||
inline ProgramSource(const char* _name, const char* _programStr)
|
||||
: name(_name), programStr(_programStr), programHash(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
// Cache in memory by name (should be unique). Caching on disk uses programHash mark.
|
||||
inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
|
||||
: name(_name), programStr(_programStr), programHash(_programHash)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
//! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
||||
//! Deprecated, will be replaced
|
||||
CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
|
||||
const cv::ocl::ProgramSource& source, string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
std::vector< std::pair<size_t, const void *> > &args,
|
||||
int channels, int depth, const char *build_options);
|
||||
|
||||
class CV_EXPORTS oclMatExpr;
|
||||
//////////////////////////////// oclMat ////////////////////////////////
|
||||
class CV_EXPORTS oclMat
|
||||
|
@ -189,24 +189,6 @@ inline size_t roundUp(size_t sz, size_t n)
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
||||
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
|
||||
const cv::ocl::ProgramEntry* source, string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
std::vector< std::pair<size_t, const void *> > &args,
|
||||
int channels, int depth, const char *build_options,
|
||||
bool finish = true, bool measureKernelTime = false,
|
||||
bool cleanUp = true);
|
||||
|
||||
//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
||||
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
|
||||
const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
std::vector< std::pair<size_t, const void *> > &args,
|
||||
int channels, int depth, const char *build_options,
|
||||
bool finish = true, bool measureKernelTime = false,
|
||||
bool cleanUp = true);
|
||||
|
||||
}//namespace ocl
|
||||
}//namespace cv
|
||||
|
||||
|
@ -302,28 +302,27 @@ void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, stri
|
||||
total_kernel_time = 0;
|
||||
cout << "-------------------------------------" << endl;
|
||||
|
||||
cout << setiosflags(ios::left) << setw(15) << "excute time";
|
||||
cout << setiosflags(ios::left) << setw(15) << "lauch time";
|
||||
cout << setiosflags(ios::left) << setw(15) << "execute time";
|
||||
cout << setiosflags(ios::left) << setw(15) << "launch time";
|
||||
cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl;
|
||||
int i = 0;
|
||||
for(i = 0; i < RUN_TIMES; i++)
|
||||
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||
build_options);
|
||||
|
||||
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||
cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
|
||||
void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
|
||||
bool finish, bool measureKernelTime, bool cleanUp)
|
||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
|
||||
|
||||
{
|
||||
//construct kernel name
|
||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||
//for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char)
|
||||
stringstream idxStr;
|
||||
if(channels != -1)
|
||||
idxStr << "_C" << channels;
|
||||
@ -331,63 +330,27 @@ double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* sou
|
||||
idxStr << "_D" << depth;
|
||||
kernelName += idxStr.str();
|
||||
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
|
||||
std::string name = std::string("custom_") + source.name;
|
||||
ProgramEntry program = { name.c_str(), source.programStr, source.programHash };
|
||||
cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options);
|
||||
|
||||
double kernelTime = 0.0;
|
||||
|
||||
if( globalThreads != NULL)
|
||||
CV_Assert(globalThreads != NULL);
|
||||
if ( localThreads != NULL)
|
||||
{
|
||||
if ( localThreads != NULL)
|
||||
{
|
||||
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
|
||||
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
|
||||
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
|
||||
|
||||
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
|
||||
}
|
||||
for(size_t i = 0; i < args.size(); i ++)
|
||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||
|
||||
if(measureKernelTime == false)
|
||||
{
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||
localThreads, 0, NULL, NULL));
|
||||
}
|
||||
else
|
||||
{
|
||||
cl_event event = NULL;
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||
localThreads, 0, NULL, &event));
|
||||
|
||||
cl_ulong end_time, queue_time;
|
||||
|
||||
openCLSafeCall(clWaitForEvents(1, &event));
|
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &end_time, 0));
|
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
|
||||
sizeof(cl_ulong), &queue_time, 0));
|
||||
|
||||
kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
|
||||
}
|
||||
for(size_t i = 0; i < args.size(); i ++)
|
||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||
|
||||
if(finish)
|
||||
{
|
||||
clFinish(getClCommandQueue(ctx));
|
||||
}
|
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||
localThreads, 0, NULL, NULL));
|
||||
|
||||
if(cleanUp)
|
||||
{
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
|
||||
return kernelTime;
|
||||
clFinish(getClCommandQueue(ctx));
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
}
|
||||
|
||||
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||
|
@ -67,7 +67,6 @@
|
||||
|
||||
namespace cv { namespace ocl {
|
||||
|
||||
#define MAX_PROG_CACHE_SIZE 1024
|
||||
/*
|
||||
* The binary caching system to eliminate redundant program source compilation.
|
||||
* Strictly, this is not a cache because we do not implement evictions right now.
|
||||
@ -291,7 +290,7 @@ struct ProgramFileCache
|
||||
bool writeConfigurationToFile(const string& options, std::vector<char>& buf)
|
||||
{
|
||||
if (hash_ == NULL)
|
||||
return true; // don't save dynamic kernels
|
||||
return true; // don't save programs without hash
|
||||
|
||||
if (!f.is_open())
|
||||
{
|
||||
@ -469,7 +468,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
|
||||
{
|
||||
stringstream src_sign;
|
||||
|
||||
src_sign << (int64)(source->programStr);
|
||||
src_sign << source->name;
|
||||
src_sign << getClContext(ctx);
|
||||
if (NULL != build_options)
|
||||
{
|
||||
@ -514,15 +513,10 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
|
||||
cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
|
||||
|
||||
//Cache the binary for future use if build_options is null
|
||||
if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE)
|
||||
{
|
||||
cv::AutoLock lockCache(mutexCache);
|
||||
this->addProgram(src_sign.str(), program);
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Warning: code cache has been full.\n";
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
|
80
modules/ocl/test/test_api.cpp
Normal file
80
modules/ocl/test/test_api.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem
|
||||
|
||||
TEST(TestAPI, openCLExecuteKernelInterop)
|
||||
{
|
||||
cv::RNG rng;
|
||||
Size sz(10000, 1);
|
||||
cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false);
|
||||
|
||||
cv::ocl::oclMat gpuMat(cpuMat);
|
||||
cv::ocl::oclMat gpuMatDst(sz, CV_32FC4);
|
||||
|
||||
const char* kernelStr =
|
||||
"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n"
|
||||
" int x = get_global_id(0);\n"
|
||||
" dst[x] = src[x];\n"
|
||||
"}\n";
|
||||
|
||||
cv::ocl::ProgramSource program("test_interop", kernelStr);
|
||||
|
||||
using namespace std;
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data ));
|
||||
|
||||
size_t globalThreads[3] = { sz.width, 1, 1 };
|
||||
cv::ocl::openCLExecuteKernelInterop(
|
||||
gpuMat.clCxt,
|
||||
program,
|
||||
"test_kernel",
|
||||
globalThreads, NULL, args,
|
||||
-1, -1,
|
||||
"");
|
||||
|
||||
cv::Mat dst;
|
||||
gpuMatDst.download(dst);
|
||||
|
||||
EXPECT_LE(checkNorm(cpuMat, dst), 1e-3);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user