added optimization
This commit is contained in:
parent
b5f717b6b3
commit
bfc843a5f5
@ -68,7 +68,7 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
|
|||||||
|
|
||||||
OCL_TEST_CYCLE() bm->compute(left, right, disp);
|
OCL_TEST_CYCLE() bm->compute(left, right, disp);
|
||||||
|
|
||||||
SANITY_CHECK(disp, 1e-2, ERROR_RELATIVE);
|
SANITY_CHECK(disp, 0.05, ERROR_RELATIVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
}//ocl
|
}//ocl
|
||||||
|
@ -45,9 +45,102 @@
|
|||||||
////////////////////////////////////////// stereoBM //////////////////////////////////////////////
|
////////////////////////////////////////// stereoBM //////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifdef csize
|
||||||
|
|
||||||
|
__kernel void stereoBM_opt(__global const uchar * left, __global const uchar * right, __global uchar * dispptr,
|
||||||
|
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
|
||||||
|
int preFilterCap, int winsize, int textureTreshold, int uniquenessRatio)
|
||||||
|
{
|
||||||
|
int total_x = get_global_id(0);
|
||||||
|
int gx = get_group_id(0), x = gx*ndisp;
|
||||||
|
int y = get_global_id(1);
|
||||||
|
int d = get_local_id(0) + mindisp;
|
||||||
|
int wsz2 = winsize/2;
|
||||||
|
short FILTERED = (mindisp - 1)<<4;
|
||||||
|
__local int cost[csize];
|
||||||
|
int textsum[tsize];
|
||||||
|
if( total_x<cols && y<rows && d<ndisp)
|
||||||
|
{
|
||||||
|
int dispIdx = mad24(y, disp_step, disp_offset + total_x*(int)sizeof(short) );
|
||||||
|
__global short * disp = (__global short*)(dispptr + dispIdx);
|
||||||
|
disp[0] = FILTERED;
|
||||||
|
|
||||||
|
if( (total_x > ndisp-1) && (y > wsz2-1) && (total_x < cols + ndisp - cols%ndisp) && (y < rows - wsz2))
|
||||||
|
{
|
||||||
|
for(; (x <= ndisp+mindisp+wsz2-2); x++)
|
||||||
|
{
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] = INT_MAX;
|
||||||
|
textsum[x%(gx*ndisp)] = INT_MAX;
|
||||||
|
}
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] = 0;
|
||||||
|
textsum[x%(gx*ndisp)] = 0;
|
||||||
|
for(int i = -wsz2; i < wsz2+1; i++)
|
||||||
|
for(int j = -wsz2; j < wsz2+1; j++)
|
||||||
|
{
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )]
|
||||||
|
- right[min( y+i, rows-1 ) * cols + min( x+j-d, cols-1 )] );
|
||||||
|
textsum[x%(gx*ndisp)] += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )] - preFilterCap );
|
||||||
|
}
|
||||||
|
x++;
|
||||||
|
for(; (x < gx*ndisp + ndisp) && (x < cols-wsz2-mindisp); x++)
|
||||||
|
{
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] = cost[(d-mindisp)+ndisp*((x-1)%(gx*ndisp))];
|
||||||
|
textsum[x%(gx*ndisp)] = textsum[(x-1)%(gx*ndisp)];
|
||||||
|
for(int i = -wsz2; i < wsz2+1; i++)
|
||||||
|
{
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] += -abs( left[min( y+i, rows-1 ) * cols + min( x-wsz2-1, cols-1 )]
|
||||||
|
- right[min( y+i, rows-1 ) * cols + min( x-wsz2-1-d, cols-1 )] ) +
|
||||||
|
abs( left[min( y+i, rows-1 ) * cols + min( x+wsz2, cols-1 )]
|
||||||
|
- right[min( y+i, rows-1 ) * cols + min( x+wsz2-d, cols-1 )] );
|
||||||
|
textsum[x%(gx*ndisp)] += -abs( left[min( y+i, rows-1 ) * cols + min( x-wsz2-1, cols-1 )] - preFilterCap ) +
|
||||||
|
abs( left[min( y+i, rows-1 ) * cols + min( x+wsz2, cols-1 )] - preFilterCap );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(; (x > cols - (cols-1)%ndisp - 1) && (x < cols + ndisp - (cols-1)%ndisp - 1); x++)
|
||||||
|
{
|
||||||
|
cost[(d-mindisp)+ndisp*(x%(gx*ndisp))] = INT_MAX;
|
||||||
|
textsum[x%(gx*ndisp)] = INT_MAX;
|
||||||
|
}
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
int best_disp = FILTERED, best_cost = INT_MAX-1;
|
||||||
|
for(int i = 0; (i < ndisp); i++)
|
||||||
|
{
|
||||||
|
best_cost = (cost[i + ndisp*(d-mindisp)] < best_cost) ? cost[i + ndisp*(d-mindisp)] : best_cost;
|
||||||
|
best_disp = (best_cost == cost[i + ndisp*(d-mindisp)]) ? i+mindisp : best_disp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int thresh = best_cost + (best_cost * uniquenessRatio/100);
|
||||||
|
for(int i = 0; (i < ndisp) && (uniquenessRatio > 0); i++)
|
||||||
|
{
|
||||||
|
best_disp = ( (cost[i + ndisp*(d-mindisp)] <= thresh) && (i < best_disp - mindisp - 1 || i > best_disp - mindisp + 1) ) ?
|
||||||
|
FILTERED : best_disp;
|
||||||
|
}
|
||||||
|
|
||||||
|
disp[0] = textsum[d-mindisp] < textureTreshold ? (FILTERED) : (best_disp == FILTERED) ? (short)(best_disp) : (short)(best_disp);
|
||||||
|
|
||||||
|
if( best_disp != FILTERED )
|
||||||
|
{
|
||||||
|
int y1 = (best_disp > mindisp) ? cost[(best_disp-mindisp-1) + ndisp*(d-mindisp)] :
|
||||||
|
cost[(best_disp-mindisp+1) + ndisp*(d-mindisp)],
|
||||||
|
y2 = cost[(best_disp-mindisp) + ndisp*(d-mindisp)],
|
||||||
|
y3 = (best_disp < mindisp+ndisp-1) ? cost[(best_disp-mindisp+1) + ndisp*(d-mindisp)] :
|
||||||
|
cost[(best_disp-mindisp-1) + ndisp*(d-mindisp)];
|
||||||
|
float a = (y3 - ((best_disp+1)*(y2-y1) + best_disp*y1 - (best_disp-1)*y2)/(best_disp - (best_disp-1)) )/
|
||||||
|
((best_disp+1)*((best_disp+1) - (best_disp-1) - best_disp) + (best_disp-1)*best_disp);
|
||||||
|
float b = (y2 - y1)/(best_disp - (best_disp-1)) - a*((best_disp-1)+best_disp);
|
||||||
|
disp[0] = (y1 == y2 || y3 == y2) ? (short)(best_disp*16) :(short)(-b/(2*a)*16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SIZE
|
#ifdef SIZE
|
||||||
|
|
||||||
__kernel void stereoBM(__global const uchar * left, __global const uchar * right, __global uchar * dispptr,
|
__kernel void stereoBM_BF(__global const uchar * left, __global const uchar * right, __global uchar * dispptr,
|
||||||
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
|
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
|
||||||
int preFilterCap, int winsize, int textureTreshold, int uniquenessRatio)
|
int preFilterCap, int winsize, int textureTreshold, int uniquenessRatio)
|
||||||
{
|
{
|
||||||
|
@ -736,10 +736,39 @@ struct PrefilterInvoker : public ParallelLoopBody
|
|||||||
StereoBMParams* state;
|
StereoBMParams* state;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool ocl_stereo( InputArray _left, InputArray _right,
|
static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
|
||||||
OutputArray _disp, StereoBMParams* state)
|
OutputArray _disp, StereoBMParams* state)
|
||||||
{
|
{
|
||||||
ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D SIZE=%d", state->numDisparities ) );
|
int ndisp = state->numDisparities;
|
||||||
|
ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d", ndisp*ndisp, ndisp) );
|
||||||
|
if(k.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UMat left = _left.getUMat(), right = _right.getUMat();
|
||||||
|
_disp.create(_left.size(), CV_16S);
|
||||||
|
UMat disp = _disp.getUMat();
|
||||||
|
|
||||||
|
size_t globalThreads[3] = { left.cols, left.rows, 1 };
|
||||||
|
size_t localThreads[3] = {ndisp, 1, 1};
|
||||||
|
|
||||||
|
int idx = 0;
|
||||||
|
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
|
||||||
|
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right));
|
||||||
|
idx = k.set(idx, ocl::KernelArg::WriteOnly(disp));
|
||||||
|
idx = k.set(idx, state->minDisparity);
|
||||||
|
idx = k.set(idx, ndisp);
|
||||||
|
idx = k.set(idx, state->preFilterCap);
|
||||||
|
idx = k.set(idx, state->SADWindowSize);
|
||||||
|
idx = k.set(idx, state->textureThreshold);
|
||||||
|
idx = k.set(idx, state->uniquenessRatio);
|
||||||
|
|
||||||
|
return k.run(2, globalThreads, localThreads, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
|
||||||
|
OutputArray _disp, StereoBMParams* state)
|
||||||
|
{
|
||||||
|
ocl::Kernel k("stereoBM_BF", ocl::calib3d::stereobm_oclsrc, cv::format("-D SIZE=%d", state->numDisparities ) );
|
||||||
if(k.empty())
|
if(k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -763,6 +792,15 @@ static bool ocl_stereo( InputArray _left, InputArray _right,
|
|||||||
return k.run(2, globalThreads, NULL, false);
|
return k.run(2, globalThreads, NULL, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ocl_stereo(InputArray _left, InputArray _right,
|
||||||
|
OutputArray _disp, StereoBMParams* state)
|
||||||
|
{
|
||||||
|
if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(int) )
|
||||||
|
return ocl_stereobm_opt(_left, _right, _disp, state);
|
||||||
|
else
|
||||||
|
return ocl_stereobm_bf(_left, _right, _disp, state);
|
||||||
|
}
|
||||||
|
|
||||||
struct FindStereoCorrespInvoker : public ParallelLoopBody
|
struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
|
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
|
||||||
|
@ -85,11 +85,11 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
|
|||||||
OCL_OFF(bm->compute(left, right, disp));
|
OCL_OFF(bm->compute(left, right, disp));
|
||||||
OCL_ON(bm->compute(uleft, uright, udisp));
|
OCL_ON(bm->compute(uleft, uright, udisp));
|
||||||
|
|
||||||
Near(1e-2);
|
Near(0.05);
|
||||||
}
|
}
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(128),
|
OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(32, 64, 128),
|
||||||
testing::Values(15)));
|
testing::Values(11, 21)));
|
||||||
}//ocl
|
}//ocl
|
||||||
}//cvtest
|
}//cvtest
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user