ocl: Move static oclMat variables into FarnebackOpticalFlow class

Move some static functions into the FarnebackOpticalFlow class as well,
so they can access these new class variables.

oclMat objects cannot be declared statically, because their destructor
depends on the statically defined __module variable from cl_context.cpp.
Since statically defined variables in separate compilation units have
an undefined destruction order there is always the possibility the
__module will be destructed before an oclMat object, which results
in a segfault.
This commit is contained in:
Tom Stellard 2014-11-13 11:16:10 -05:00
parent eedde323ce
commit 42b1bd56cc
2 changed files with 106 additions and 97 deletions

View File

@ -1473,6 +1473,16 @@ namespace cv
void releaseMemory(); void releaseMemory();
private: private:
void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf);
void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst);
void polynomialExpansionOcl(
const oclMat &src, int polyN, oclMat &dst);
void gaussianBlur5Ocl(
const oclMat &src, int ksizeHalf, oclMat &dst);
void prepareGaussian( void prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg, int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55); double &ig11, double &ig03, double &ig33, double &ig55);
@ -1490,6 +1500,11 @@ namespace cv
oclMat frames_[2]; oclMat frames_[2];
oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
std::vector<oclMat> pyramid0_, pyramid1_; std::vector<oclMat> pyramid0_, pyramid1_;
float ig[4];
oclMat gMat;
oclMat xgMat;
oclMat xxgMat;
oclMat gKerMat;
}; };
//////////////// build warping maps //////////////////// //////////////// build warping maps ////////////////////

View File

@ -57,77 +57,6 @@ namespace cv {
namespace ocl { namespace ocl {
namespace optflow_farneback namespace optflow_farneback
{ {
oclMat g;
oclMat xg;
oclMat xxg;
oclMat gKer;
float ig[4];
inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
{
cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
gKer.upload(t_gKer);
}
static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
{
string kernelName("gaussianBlur");
#ifdef ANDROID
size_t localThreads[3] = { 128, 1, 1 };
#else
size_t localThreads[3] = { 256, 1, 1 };
#endif
size_t globalThreads[3] = { src.cols, src.rows, 1 };
int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
CV_Assert(dst.size() == src.size());
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1);
}
static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
{
string kernelName("polynomialExpansion");
#ifdef ANDROID
size_t localThreads[3] = { 128, 1, 1 };
#else
size_t localThreads[3] = { 256, 1, 1 };
#endif
size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
int smem_size = 3 * localThreads[0] * sizeof(float);
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
char opt [128];
sprintf(opt, "-D polyN=%d", polyN);
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1, opt);
}
static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
{ {
@ -207,8 +136,83 @@ static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); globalThreads, localThreads, args, -1, -1);
} }
}
}
} // namespace cv { namespace ocl { namespace optflow_farneback
static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = oclMat(rows, cols, type);
}
void cv::ocl::FarnebackOpticalFlow::setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
{
cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
gKerMat.upload(t_gKer);
}
void cv::ocl::FarnebackOpticalFlow::gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
{
string kernelName("gaussianBlur");
#ifdef ANDROID
size_t localThreads[3] = { 128, 1, 1 };
#else
size_t localThreads[3] = { 256, 1, 1 };
#endif
size_t globalThreads[3] = { src.cols, src.rows, 1 };
int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
CV_Assert(dst.size() == src.size());
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data));
args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::FarnebackOpticalFlow::polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
{
string kernelName("polynomialExpansion");
#ifdef ANDROID
size_t localThreads[3] = { 128, 1, 1 };
#else
size_t localThreads[3] = { 256, 1, 1 };
#endif
size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
int smem_size = 3 * localThreads[0] * sizeof(float);
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gMat.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xgMat.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxgMat.data));
args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
char opt [128];
sprintf(opt, "-D polyN=%d", polyN);
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1, opt);
}
void cv::ocl::FarnebackOpticalFlow::gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
{ {
string kernelName("gaussianBlur5"); string kernelName("gaussianBlur5");
int height = src.rows / 5; int height = src.rows / 5;
@ -223,7 +227,7 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data));
args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
@ -234,16 +238,6 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); globalThreads, localThreads, args, -1, -1);
} }
}
}
} // namespace cv { namespace ocl { namespace optflow_farneback
static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = oclMat(rows, cols, type);
}
cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow() cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow()
{ {
@ -343,14 +337,14 @@ void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s
cv::Mat t_xg(1, n + 1, CV_32FC1, xg); cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg); cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
optflow_farneback::g.upload(t_g); gMat.upload(t_g);
optflow_farneback::xg.upload(t_xg); xgMat.upload(t_xg);
optflow_farneback::xxg.upload(t_xxg); xxgMat.upload(t_xxg);
optflow_farneback::ig[0] = static_cast<float>(ig11); ig[0] = static_cast<float>(ig11);
optflow_farneback::ig[1] = static_cast<float>(ig03); ig[1] = static_cast<float>(ig03);
optflow_farneback::ig[2] = static_cast<float>(ig33); ig[2] = static_cast<float>(ig33);
optflow_farneback::ig[3] = static_cast<float>(ig55); ig[3] = static_cast<float>(ig55);
} }
void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
@ -372,7 +366,7 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
{ {
optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM); gaussianBlur5Ocl(M, blockSize/2, bufM);
swap(M, bufM); swap(M, bufM);
@ -491,8 +485,8 @@ void cv::ocl::FarnebackOpticalFlow::operator ()(
if (fastPyramids) if (fastPyramids)
{ {
optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]); polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]); polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
} }
else else
{ {
@ -508,13 +502,13 @@ void cv::ocl::FarnebackOpticalFlow::operator ()(
}; };
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2); setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]); gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR); resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]); polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
} }
} }
@ -523,7 +517,7 @@ void cv::ocl::FarnebackOpticalFlow::operator ()(
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
{ {
Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F); Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2); setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
} }
for (int i = 0; i < numIters; i++) for (int i = 0; i < numIters; i++)
{ {