Merge pull request #2464 from KonstantinMatskevich:ocl_stereobm_experiment
This commit is contained in:
commit
6e44f05e6f
77
modules/calib3d/perf/opencl/perf_stereobm.cpp
Normal file
77
modules/calib3d/perf/opencl/perf_stereobm.cpp
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "perf_precomp.hpp"
|
||||||
|
#include "opencv2/ts/ocl_perf.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
namespace cvtest {
|
||||||
|
namespace ocl {
|
||||||
|
|
||||||
|
typedef std::tr1::tuple<int, int> StereoBMFixture_t;
|
||||||
|
typedef TestBaseWithParam<StereoBMFixture_t> StereoBMFixture;
|
||||||
|
|
||||||
|
OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64, 128), OCL_PERF_ENUM(11,21) ) )
|
||||||
|
{
|
||||||
|
const int n_disp = get<0>(GetParam()), winSize = get<1>(GetParam());
|
||||||
|
UMat left, right, disp;
|
||||||
|
|
||||||
|
imread(getDataPath("gpu/stereobm/aloe-L.png"), IMREAD_GRAYSCALE).copyTo(left);
|
||||||
|
imread(getDataPath("gpu/stereobm/aloe-R.png"), IMREAD_GRAYSCALE).copyTo(right);
|
||||||
|
ASSERT_FALSE(left.empty());
|
||||||
|
ASSERT_FALSE(right.empty());
|
||||||
|
|
||||||
|
declare.in(left, right);
|
||||||
|
|
||||||
|
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize );
|
||||||
|
bm->setPreFilterType(bm->PREFILTER_XSOBEL);
|
||||||
|
bm->setTextureThreshold(0);
|
||||||
|
|
||||||
|
OCL_TEST_CYCLE() bm->compute(left, right, disp);
|
||||||
|
|
||||||
|
SANITY_CHECK(disp, 1e-3, ERROR_RELATIVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
}//ocl
|
||||||
|
}//cvtest
|
||||||
|
#endif
|
297
modules/calib3d/src/opencl/stereobm.cl
Normal file
297
modules/calib3d/src/opencl/stereobm.cl
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
////////////////////////////////////////// stereoBM //////////////////////////////////////////////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifdef csize
|
||||||
|
|
||||||
|
#define MAX_VAL 32767
|
||||||
|
|
||||||
|
void calcDisp(__local short * cost, __global short * disp, int uniquenessRatio, int mindisp, int ndisp, int w,
|
||||||
|
__local int * bestDisp, __local int * bestCost, int d, int x, int y, int cols, int rows, int wsz2)
|
||||||
|
{
|
||||||
|
short FILTERED = (mindisp - 1)<<4;
|
||||||
|
int best_disp = *bestDisp, best_cost = *bestCost, best_disp_back = ndisp - best_disp - 1;
|
||||||
|
|
||||||
|
short c = cost[0];
|
||||||
|
|
||||||
|
int thresh = best_cost + (best_cost * uniquenessRatio/100);
|
||||||
|
bool notUniq = ( (c <= thresh) && (d < (best_disp_back - 1) || d > (best_disp_back + 1) ) );
|
||||||
|
|
||||||
|
if(notUniq)
|
||||||
|
*bestCost = FILTERED;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
if( *bestCost != FILTERED && x < cols-wsz2-mindisp && y < rows-wsz2 && d == best_disp_back)
|
||||||
|
{
|
||||||
|
int y3 = (best_disp_back > 0) ? cost[-w] : cost[w],
|
||||||
|
y2 = c,
|
||||||
|
y1 = (best_disp_back < ndisp-1) ? cost[w] : cost[-w];
|
||||||
|
int d_aprox = y3+y1-2*y2 + abs(y3-y1);
|
||||||
|
disp[0] = (short)(((best_disp_back + mindisp)*256 + (d_aprox != 0 ? (y3-y1)*256/d_aprox : 0) + 15) >> 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int calcLocalIdx(int x, int y, int d, int w)
|
||||||
|
{
|
||||||
|
return d*2*w + (w - 1 - y + x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void calcNewCoordinates(int * x, int * y, int nthread)
|
||||||
|
{
|
||||||
|
int oldX = *x - (1-nthread), oldY = *y;
|
||||||
|
*x = (oldX == oldY) ? (0*nthread + (oldX + 2)*(1-nthread) ) : (oldX+1)*(1-nthread) + (oldX+1)*nthread;
|
||||||
|
*y = (oldX == oldY) ? (0*(1-nthread) + (oldY + 1)*nthread) : oldY + 1*(1-nthread);
|
||||||
|
}
|
||||||
|
|
||||||
|
short calcCostBorder(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y, int nthread,
|
||||||
|
int wsz2, short * costbuf, int * h, int cols, int d, short cost, int winsize)
|
||||||
|
{
|
||||||
|
int head = (*h)%wsz;
|
||||||
|
__global const uchar * left, * right;
|
||||||
|
int idx = mad24(y+wsz2*(2*nthread-1), cols, x+wsz2*(1-2*nthread));
|
||||||
|
left = leftptr + idx;
|
||||||
|
right = rightptr + (idx - d);
|
||||||
|
int shift = 1*nthread + cols*(1-nthread);
|
||||||
|
|
||||||
|
short costdiff = 0;
|
||||||
|
for(int i = 0; i < winsize; i++)
|
||||||
|
{
|
||||||
|
costdiff += abs( left[0] - right[0] );
|
||||||
|
left += shift;
|
||||||
|
right += shift;
|
||||||
|
}
|
||||||
|
cost += costdiff - costbuf[head];
|
||||||
|
costbuf[head] = costdiff;
|
||||||
|
(*h) = (*h)%wsz + 1;
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
short calcCostInside(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y,
|
||||||
|
int wsz2, int cols, int d, short cost_up_left, short cost_up, short cost_left,
|
||||||
|
int winsize)
|
||||||
|
{
|
||||||
|
__global const uchar * left, * right;
|
||||||
|
int idx = mad24(y-wsz2-1, cols, x-wsz2-1);
|
||||||
|
left = leftptr + idx;
|
||||||
|
right = rightptr + (idx - d);
|
||||||
|
int idx2 = winsize*cols;
|
||||||
|
|
||||||
|
uchar corrner1 = abs(left[0] - right[0]),
|
||||||
|
corrner2 = abs(left[winsize] - right[winsize]),
|
||||||
|
corrner3 = abs(left[idx2] - right[idx2]),
|
||||||
|
corrner4 = abs(left[idx2 + winsize] - right[idx2 + winsize]);
|
||||||
|
|
||||||
|
return cost_up + cost_left - cost_up_left + corrner1 -
|
||||||
|
corrner2 - corrner3 + corrner4;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void stereoBM(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr,
|
||||||
|
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
|
||||||
|
int preFilterCap, int textureTreshold, int uniquenessRatio, int sizeX, int sizeY, int winsize)
|
||||||
|
{
|
||||||
|
int gx = get_global_id(0)*sizeX;
|
||||||
|
int gy = get_global_id(1)*sizeY;
|
||||||
|
int lz = get_local_id(2);
|
||||||
|
|
||||||
|
int nthread = lz/ndisp;
|
||||||
|
int d = lz%ndisp;
|
||||||
|
int wsz2 = wsz/2;
|
||||||
|
|
||||||
|
__global short * disp;
|
||||||
|
__global const uchar * left, * right;
|
||||||
|
|
||||||
|
__local short costFunc[csize];
|
||||||
|
__local short * cost;
|
||||||
|
__local int best_disp[2];
|
||||||
|
__local int best_cost[2];
|
||||||
|
best_cost[nthread] = MAX_VAL;
|
||||||
|
|
||||||
|
short costbuf[wsz];
|
||||||
|
int head = 0;
|
||||||
|
|
||||||
|
int shiftX = wsz2 + ndisp + mindisp - 1;
|
||||||
|
int shiftY = wsz2;
|
||||||
|
|
||||||
|
int x = gx + shiftX, y = gy + shiftY, lx = 0, ly = 0;
|
||||||
|
|
||||||
|
int costIdx = calcLocalIdx(lx, ly, d, sizeY);
|
||||||
|
cost = costFunc + costIdx;
|
||||||
|
|
||||||
|
short tempcost = 0;
|
||||||
|
if(x < cols-wsz2-mindisp && y < rows-wsz2)
|
||||||
|
{
|
||||||
|
int shift = 1*nthread + cols*(1-nthread);
|
||||||
|
for(int i = 0; i < winsize; i++)
|
||||||
|
{
|
||||||
|
int idx = mad24(y-wsz2+i*nthread, cols, x-wsz2+i*(1-nthread));
|
||||||
|
left = leftptr + idx;
|
||||||
|
right = rightptr + (idx - d);
|
||||||
|
short costdiff = 0;
|
||||||
|
for(int j = 0; j < winsize; j++)
|
||||||
|
{
|
||||||
|
costdiff += abs( left[0] - right[0] );
|
||||||
|
left += shift;
|
||||||
|
right += shift;
|
||||||
|
}
|
||||||
|
if(nthread==1)
|
||||||
|
{
|
||||||
|
tempcost += costdiff;
|
||||||
|
}
|
||||||
|
costbuf[head] = costdiff;
|
||||||
|
head++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(nthread==1)
|
||||||
|
{
|
||||||
|
cost[0] = tempcost;
|
||||||
|
atomic_min(best_cost+nthread, tempcost);
|
||||||
|
}
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
if(best_cost[1] == tempcost)
|
||||||
|
best_disp[1] = ndisp - d - 1;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
|
||||||
|
disp = (__global short *)(dispptr + dispIdx);
|
||||||
|
calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
|
||||||
|
best_disp + 1, best_cost+1, d, x, y, cols, rows, wsz2);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
lx = 1 - nthread;
|
||||||
|
ly = nthread;
|
||||||
|
|
||||||
|
for(int i = 0; i < sizeY*sizeX/2; i++)
|
||||||
|
{
|
||||||
|
x = (lx < sizeX) ? gx + shiftX + lx : cols;
|
||||||
|
y = (ly < sizeY) ? gy + shiftY + ly : rows;
|
||||||
|
|
||||||
|
best_cost[nthread] = MAX_VAL;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
costIdx = calcLocalIdx(lx, ly, d, sizeY);
|
||||||
|
cost = costFunc + costIdx;
|
||||||
|
|
||||||
|
if(x < cols-wsz2-mindisp && y < rows-wsz2 )
|
||||||
|
{
|
||||||
|
tempcost = ( ly*(1-nthread) + lx*nthread == 0 ) ?
|
||||||
|
calcCostBorder(leftptr, rightptr, x, y, nthread, wsz2, costbuf, &head, cols, d,
|
||||||
|
cost[2*nthread-1], winsize) :
|
||||||
|
calcCostInside(leftptr, rightptr, x, y, wsz2, cols, d,
|
||||||
|
cost[0], cost[1], cost[-1], winsize);
|
||||||
|
}
|
||||||
|
cost[0] = tempcost;
|
||||||
|
atomic_min(best_cost + nthread, tempcost);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
if(best_cost[nthread] == tempcost)
|
||||||
|
best_disp[nthread] = ndisp - d - 1;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
|
||||||
|
disp = (__global short *)(dispptr + dispIdx);
|
||||||
|
|
||||||
|
calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
|
||||||
|
best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
calcNewCoordinates(&lx, &ly, nthread);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/////////////////////////////////////// Norm Prefiler ////////////////////////////////////////////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__kernel void prefilter_norm(__global unsigned char *input, __global unsigned char *output,
|
||||||
|
int rows, int cols, int prefilterCap, int winsize, int scale_g, int scale_s)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1);
|
||||||
|
int wsz2 = winsize/2;
|
||||||
|
|
||||||
|
if(x < cols && y < rows)
|
||||||
|
{
|
||||||
|
int cov1 = input[ max(y-1, 0) * cols + x] * 1 +
|
||||||
|
input[y * cols + max(x-1,0)] * 1 + input[ y * cols + x] * 4 + input[y * cols + min(x+1, cols-1)] * 1 +
|
||||||
|
input[min(y+1, rows-1) * cols + x] * 1;
|
||||||
|
int cov2 = 0;
|
||||||
|
for(int i = -wsz2; i < wsz2+1; i++)
|
||||||
|
for(int j = -wsz2; j < wsz2+1; j++)
|
||||||
|
cov2 += input[clamp(y+i, 0, rows-1) * cols + clamp(x+j, 0, cols-1)];
|
||||||
|
|
||||||
|
int res = (cov1*scale_g - cov2*scale_s)>>10;
|
||||||
|
res = min(clamp(res, -prefilterCap, prefilterCap) + prefilterCap, 255);
|
||||||
|
output[y * cols + x] = res & 0xFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
////////////////////////////////////// Sobel Prefiler ////////////////////////////////////////////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned char *output,
|
||||||
|
int rows, int cols, int prefilterCap)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1);
|
||||||
|
if(x < cols && y < rows)
|
||||||
|
{
|
||||||
|
output[y * cols + x] = min(prefilterCap, 255) & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(x < cols && y < rows && x > 0 && !((y == rows-1)&(rows%2==1) ) )
|
||||||
|
{
|
||||||
|
int cov = input[ ((y > 0) ? y-1 : y+1) * cols + (x-1)] * (-1) + input[ ((y > 0) ? y-1 : y+1) * cols + ((x<cols-1) ? x+1 : x-1)] * (1) +
|
||||||
|
input[ (y) * cols + (x-1)] * (-2) + input[ (y) * cols + ((x<cols-1) ? x+1 : x-1)] * (2) +
|
||||||
|
input[((y<rows-1)?(y+1):(y-1))* cols + (x-1)] * (-1) + input[((y<rows-1)?(y+1):(y-1))* cols + ((x<cols-1) ? x+1 : x-1)] * (1);
|
||||||
|
|
||||||
|
cov = min(clamp(cov, -prefilterCap, prefilterCap) + prefilterCap, 255);
|
||||||
|
output[y * cols + x] = cov & 0xFF;
|
||||||
|
}
|
||||||
|
}
|
@ -49,6 +49,8 @@
|
|||||||
|
|
||||||
#include "opencv2/core/private.hpp"
|
#include "opencv2/core/private.hpp"
|
||||||
|
|
||||||
|
#include "opencv2/core/ocl.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||||
#include "opencv2/calib3d/calib3d_tegra.hpp"
|
#include "opencv2/calib3d/calib3d_tegra.hpp"
|
||||||
#else
|
#else
|
||||||
|
@ -48,6 +48,7 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include "opencl_kernels.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@ -85,6 +86,26 @@ struct StereoBMParams
|
|||||||
int dispType;
|
int dispType;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsize, int prefilterCap)
|
||||||
|
{
|
||||||
|
ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc);
|
||||||
|
if(k.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
|
||||||
|
scale_g *= scale_s;
|
||||||
|
|
||||||
|
UMat input = _input.getUMat(), output;
|
||||||
|
_output.create(input.size(), input.type());
|
||||||
|
output = _output.getUMat();
|
||||||
|
|
||||||
|
size_t globalThreads[3] = { input.cols, input.rows, 1 };
|
||||||
|
|
||||||
|
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols,
|
||||||
|
prefilterCap, winsize, scale_g, scale_s);
|
||||||
|
|
||||||
|
return k.run(2, globalThreads, NULL, false);
|
||||||
|
}
|
||||||
|
|
||||||
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
|
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
|
||||||
{
|
{
|
||||||
@ -149,6 +170,22 @@ static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int prefilterCap)
|
||||||
|
{
|
||||||
|
ocl::Kernel k("prefilter_xsobel", ocl::calib3d::stereobm_oclsrc);
|
||||||
|
if(k.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UMat input = _input.getUMat(), output;
|
||||||
|
_output.create(input.size(), input.type());
|
||||||
|
output = _output.getUMat();
|
||||||
|
|
||||||
|
size_t globalThreads[3] = { input.cols, input.rows, 1 };
|
||||||
|
|
||||||
|
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);
|
||||||
|
|
||||||
|
return k.run(2, globalThreads, NULL, false);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
|
prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
|
||||||
@ -534,7 +571,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
|||||||
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
|
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
|
||||||
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
|
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
|
||||||
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep;
|
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep;
|
||||||
|
|
||||||
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
|
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
|
||||||
{
|
{
|
||||||
int lval = lptr[0];
|
int lval = lptr[0];
|
||||||
@ -617,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
|||||||
mind = d;
|
mind = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
|
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
|
||||||
if( tsum < textureThreshold )
|
if( tsum < textureThreshold )
|
||||||
{
|
{
|
||||||
@ -651,6 +688,25 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
|
||||||
|
{
|
||||||
|
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
|
||||||
|
{
|
||||||
|
if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
|
||||||
|
return false;
|
||||||
|
if(!ocl_prefilter_norm( right0, right, state->preFilterSize, state->preFilterCap))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(!ocl_prefilter_xsobel( left0, left, state->preFilterCap ))
|
||||||
|
return false;
|
||||||
|
if(!ocl_prefilter_xsobel( right0, right, state->preFilterCap))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
struct PrefilterInvoker : public ParallelLoopBody
|
struct PrefilterInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
|
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
|
||||||
@ -679,6 +735,51 @@ struct PrefilterInvoker : public ParallelLoopBody
|
|||||||
StereoBMParams* state;
|
StereoBMParams* state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool ocl_stereobm( InputArray _left, InputArray _right,
|
||||||
|
OutputArray _disp, StereoBMParams* state)
|
||||||
|
{
|
||||||
|
int ndisp = state->numDisparities;
|
||||||
|
int mindisp = state->minDisparity;
|
||||||
|
int wsz = state->SADWindowSize;
|
||||||
|
int wsz2 = wsz/2;
|
||||||
|
|
||||||
|
int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2;
|
||||||
|
|
||||||
|
ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) );
|
||||||
|
if(k.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UMat left = _left.getUMat(), right = _right.getUMat();
|
||||||
|
int cols = left.cols, rows = left.rows;
|
||||||
|
|
||||||
|
_disp.create(_left.size(), CV_16S);
|
||||||
|
_disp.setTo((mindisp - 1)<<4);
|
||||||
|
Rect roi = Rect(Point(wsz2 + mindisp + ndisp - 1, wsz2), Point(cols-wsz2-mindisp, rows-wsz2) );
|
||||||
|
UMat disp = (_disp.getUMat())(roi);
|
||||||
|
|
||||||
|
int globalX = disp.cols/sizeX, globalY = disp.rows/sizeY;
|
||||||
|
globalX += (disp.cols%sizeX) > 0 ? 1 : 0;
|
||||||
|
globalY += (disp.rows%sizeY) > 0 ? 1 : 0;
|
||||||
|
size_t globalThreads[3] = { globalX, globalY, N};
|
||||||
|
size_t localThreads[3] = {1, 1, N};
|
||||||
|
|
||||||
|
int idx = 0;
|
||||||
|
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
|
||||||
|
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right));
|
||||||
|
idx = k.set(idx, ocl::KernelArg::WriteOnlyNoSize(disp));
|
||||||
|
idx = k.set(idx, rows);
|
||||||
|
idx = k.set(idx, cols);
|
||||||
|
idx = k.set(idx, mindisp);
|
||||||
|
idx = k.set(idx, ndisp);
|
||||||
|
idx = k.set(idx, state->preFilterCap);
|
||||||
|
idx = k.set(idx, state->textureThreshold);
|
||||||
|
idx = k.set(idx, state->uniquenessRatio);
|
||||||
|
idx = k.set(idx, sizeX);
|
||||||
|
idx = k.set(idx, sizeY);
|
||||||
|
idx = k.set(idx, wsz);
|
||||||
|
|
||||||
|
return k.run(3, globalThreads, localThreads, false);
|
||||||
|
}
|
||||||
|
|
||||||
struct FindStereoCorrespInvoker : public ParallelLoopBody
|
struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
@ -776,21 +877,18 @@ public:
|
|||||||
|
|
||||||
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
|
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
|
||||||
{
|
{
|
||||||
Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
|
|
||||||
int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
|
int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
|
||||||
|
Size leftsize = leftarr.size();
|
||||||
|
|
||||||
if (left0.size() != right0.size())
|
if (leftarr.size() != rightarr.size())
|
||||||
CV_Error( Error::StsUnmatchedSizes, "All the images must have the same size" );
|
CV_Error( Error::StsUnmatchedSizes, "All the images must have the same size" );
|
||||||
|
|
||||||
if (left0.type() != CV_8UC1 || right0.type() != CV_8UC1)
|
if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
|
||||||
CV_Error( Error::StsUnsupportedFormat, "Both input images must have CV_8UC1" );
|
CV_Error( Error::StsUnsupportedFormat, "Both input images must have CV_8UC1" );
|
||||||
|
|
||||||
if (dtype != CV_16SC1 && dtype != CV_32FC1)
|
if (dtype != CV_16SC1 && dtype != CV_32FC1)
|
||||||
CV_Error( Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format" );
|
CV_Error( Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format" );
|
||||||
|
|
||||||
disparr.create(left0.size(), dtype);
|
|
||||||
Mat disp0 = disparr.getMat();
|
|
||||||
|
|
||||||
if( params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
|
if( params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
|
||||||
params.preFilterType != PREFILTER_XSOBEL )
|
params.preFilterType != PREFILTER_XSOBEL )
|
||||||
CV_Error( Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE" );
|
CV_Error( Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE" );
|
||||||
@ -802,7 +900,7 @@ public:
|
|||||||
CV_Error( Error::StsOutOfRange, "preFilterCap must be within 1..63" );
|
CV_Error( Error::StsOutOfRange, "preFilterCap must be within 1..63" );
|
||||||
|
|
||||||
if( params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
|
if( params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
|
||||||
params.SADWindowSize >= std::min(left0.cols, left0.rows) )
|
params.SADWindowSize >= std::min(leftsize.width, leftsize.height) )
|
||||||
CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" );
|
CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" );
|
||||||
|
|
||||||
if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 )
|
if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 )
|
||||||
@ -814,6 +912,28 @@ public:
|
|||||||
if( params.uniquenessRatio < 0 )
|
if( params.uniquenessRatio < 0 )
|
||||||
CV_Error( Error::StsOutOfRange, "uniqueness ratio must be non-negative" );
|
CV_Error( Error::StsOutOfRange, "uniqueness ratio must be non-negative" );
|
||||||
|
|
||||||
|
int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
|
||||||
|
|
||||||
|
if(ocl::useOpenCL() && disparr.isUMat() && params.textureThreshold == 0)
|
||||||
|
{
|
||||||
|
UMat left, right;
|
||||||
|
if(ocl_prefiltering(leftarr, rightarr, left, right, ¶ms))
|
||||||
|
{
|
||||||
|
if(ocl_stereobm(left, right, disparr, ¶ms))
|
||||||
|
{
|
||||||
|
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
|
||||||
|
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
|
||||||
|
if (dtype == CV_32F)
|
||||||
|
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << DISPARITY_SHIFT), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
|
||||||
|
disparr.create(left0.size(), dtype);
|
||||||
|
Mat disp0 = disparr.getMat();
|
||||||
|
|
||||||
preFilteredImg0.create( left0.size(), CV_8U );
|
preFilteredImg0.create( left0.size(), CV_8U );
|
||||||
preFilteredImg1.create( left0.size(), CV_8U );
|
preFilteredImg1.create( left0.size(), CV_8U );
|
||||||
cost.create( left0.size(), CV_16S );
|
cost.create( left0.size(), CV_16S );
|
||||||
@ -828,7 +948,6 @@ public:
|
|||||||
int lofs = std::max(ndisp - 1 + mindisp, 0);
|
int lofs = std::max(ndisp - 1 + mindisp, 0);
|
||||||
int rofs = -std::min(ndisp - 1 + mindisp, 0);
|
int rofs = -std::min(ndisp - 1 + mindisp, 0);
|
||||||
int width1 = width - rofs - ndisp + 1;
|
int width1 = width - rofs - ndisp + 1;
|
||||||
int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
|
|
||||||
|
|
||||||
if( lofs >= width || rofs >= width || width1 < 1 )
|
if( lofs >= width || rofs >= width || width1 < 1 )
|
||||||
{
|
{
|
||||||
@ -870,6 +989,7 @@ public:
|
|||||||
slidingSumBuf.create( 1, bufSize, CV_8U );
|
slidingSumBuf.create( 1, bufSize, CV_8U );
|
||||||
|
|
||||||
uchar *_buf = slidingSumBuf.data;
|
uchar *_buf = slidingSumBuf.data;
|
||||||
|
|
||||||
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1);
|
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1);
|
||||||
|
|
||||||
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
|
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
|
||||||
|
97
modules/calib3d/test/opencl/test_stereobm.cpp
Normal file
97
modules/calib3d/test/opencl/test_stereobm.cpp
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "test_precomp.hpp"
|
||||||
|
#include "cvconfig.h"
|
||||||
|
#include "opencv2/ts/ocl_test.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
namespace cvtest {
|
||||||
|
namespace ocl {
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(StereoBMFixture, int, int)
|
||||||
|
{
|
||||||
|
int n_disp;
|
||||||
|
int winSize;
|
||||||
|
Mat left, right, disp;
|
||||||
|
UMat uleft, uright, udisp;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
n_disp = GET_PARAM(0);
|
||||||
|
winSize = GET_PARAM(1);
|
||||||
|
|
||||||
|
left = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE);
|
||||||
|
right = readImage("gpu/stereobm/aloe-R.png", IMREAD_GRAYSCALE);
|
||||||
|
|
||||||
|
ASSERT_FALSE(left.empty());
|
||||||
|
ASSERT_FALSE(right.empty());
|
||||||
|
|
||||||
|
left.copyTo(uleft);
|
||||||
|
right.copyTo(uright);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Near(double eps = 0.0)
|
||||||
|
{
|
||||||
|
EXPECT_MAT_NEAR_RELATIVE(disp, udisp, eps);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
OCL_TEST_P(StereoBMFixture, StereoBM)
|
||||||
|
{
|
||||||
|
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize);
|
||||||
|
bm->setPreFilterType(bm->PREFILTER_XSOBEL);
|
||||||
|
bm->setTextureThreshold(0);
|
||||||
|
|
||||||
|
OCL_OFF(bm->compute(left, right, disp));
|
||||||
|
OCL_ON(bm->compute(uleft, uright, udisp));
|
||||||
|
|
||||||
|
Near(1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(32, 64, 128),
|
||||||
|
testing::Values(11, 21)));
|
||||||
|
}//ocl
|
||||||
|
}//cvtest
|
||||||
|
|
||||||
|
#endif //HAVE_OPENCL
|
Loading…
x
Reference in New Issue
Block a user