Merge pull request #1731 from perping:2.4_haar
This commit is contained in:
commit
2767be9a5e
@ -1059,11 +1059,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
{
|
||||
sz = sizev[i];
|
||||
factor = scalev[i];
|
||||
int ystep = cvRound(std::max(2., factor));
|
||||
int equRect_x = (int)(factor * gcascade->p0 + 0.5);
|
||||
int equRect_y = (int)(factor * gcascade->p1 + 0.5);
|
||||
int equRect_w = (int)(factor * gcascade->p3 + 0.5);
|
||||
int equRect_h = (int)(factor * gcascade->p2 + 0.5);
|
||||
double ystep = std::max(2., factor);
|
||||
int equRect_x = cvRound(factor * gcascade->p0);
|
||||
int equRect_y = cvRound(factor * gcascade->p1);
|
||||
int equRect_w = cvRound(factor * gcascade->p3);
|
||||
int equRect_h = cvRound(factor * gcascade->p2);
|
||||
p[i].s[0] = equRect_x;
|
||||
p[i].s[1] = equRect_y;
|
||||
p[i].s[2] = equRect_x + equRect_w;
|
||||
@ -1676,9 +1676,9 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
|
||||
{
|
||||
sz = sizev[i];
|
||||
factor = scalev[i];
|
||||
int ystep = cvRound(std::max(2., factor));
|
||||
int width = (cols - 1 - sz.width + ystep - 1) / ystep;
|
||||
int height = (rows - 1 - sz.height + ystep - 1) / ystep;
|
||||
double ystep = cv::max(2.,factor);
|
||||
int width = cvRound((cols - 1 - sz.width + ystep - 1) / ystep);
|
||||
int height = cvRound((rows - 1 - sz.height + ystep - 1) / ystep);
|
||||
int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
|
||||
int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
// Erping Pang, erping@multicorewareinc.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
@ -320,7 +321,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int glb_x = grpoffx + (lcl_x<<2);
|
||||
int glb_y = grpoffy + lcl_y;
|
||||
|
||||
int glb_off = mad24(min(glb_y, height - 1),pixelstep,glb_x);
|
||||
int glb_off = mad24(min(glb_y, height + WINDOWSIZE - 1),pixelstep,glb_x);
|
||||
int4 data = *(__global int4*)&sum[glb_off];
|
||||
int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2);
|
||||
|
||||
@ -420,12 +421,23 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
|
||||
result = (stage_sum >= stagethreshold);
|
||||
}
|
||||
|
||||
if(result && (x < width) && (y < height))
|
||||
if(factor < 2)
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
|
||||
if(result && lclidx %2 ==0 && lclidy %2 ==0 )
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(result)
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx;
|
||||
lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor);
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
@ -548,11 +560,27 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int y = mad24(grpidy,grpszy,((temp & (int)0xffff0000) >> 16));
|
||||
temp = glboutindex[0];
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor*20.f);
|
||||
candidate_result.x = convert_int_rtn(x*factor);
|
||||
candidate_result.y = convert_int_rtn(y*factor);
|
||||
candidate_result.zw = (int2)convert_int_rte(factor*20.f);
|
||||
candidate_result.x = convert_int_rte(x*factor);
|
||||
candidate_result.y = convert_int_rte(y*factor);
|
||||
atomic_inc(glboutindex);
|
||||
candidate[outputoff+temp+lcl_id] = candidate_result;
|
||||
|
||||
int i = outputoff+temp+lcl_id;
|
||||
if(candidate[i].z == 0)
|
||||
{
|
||||
candidate[i] = candidate_result;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i=i+1;;i++)
|
||||
{
|
||||
if(candidate[i].z == 0)
|
||||
{
|
||||
candidate[i] = candidate_result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}//end for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
|
||||
|
@ -18,6 +18,7 @@
|
||||
// Wu Xinglong, wxl370@126.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
// Erping Pang, erping@multicorewareinc.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
@ -141,7 +142,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
int totalgrp = scaleinfo1.y & 0xffff;
|
||||
float factor = as_float(scaleinfo1.w);
|
||||
float correction_t = correction[scalei];
|
||||
int ystep = (int)(max(2.0f, factor) + 0.5f);
|
||||
float ystep = max(2.0f, factor);
|
||||
|
||||
for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx)
|
||||
{
|
||||
@ -150,8 +151,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
int grpidx = grploop - mul24(grpidy, grpnumperline);
|
||||
int ix = mad24(grpidx, grpszx, lclidx);
|
||||
int iy = mad24(grpidy, grpszy, lclidy);
|
||||
int x = ix * ystep;
|
||||
int y = iy * ystep;
|
||||
int x = round(ix * ystep);
|
||||
int y = round(iy * ystep);
|
||||
lcloutindex[lcl_id] = 0;
|
||||
lclcount[0] = 0;
|
||||
int nodecounter;
|
||||
@ -242,7 +243,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (result && (ix < width) && (iy < height))
|
||||
if (result)
|
||||
{
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex] = (y << 16) | x;
|
||||
@ -257,10 +258,26 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
int y = (temp & (int)0xffff0000) >> 16;
|
||||
temp = atomic_inc(glboutindex);
|
||||
int4 candidate_result;
|
||||
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f);
|
||||
candidate_result.zw = (int2)convert_int_rte(factor * 20.f);
|
||||
candidate_result.x = x;
|
||||
candidate_result.y = y;
|
||||
candidate[outputoff + temp + lcl_id] = candidate_result;
|
||||
|
||||
int i = outputoff+temp+lcl_id;
|
||||
if(candidate[i].z == 0)
|
||||
{
|
||||
candidate[i] = candidate_result;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i=i+1;;i++)
|
||||
{
|
||||
if(candidate[i].z == 0)
|
||||
{
|
||||
candidate[i] = candidate_result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -283,7 +300,7 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
|
||||
tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f);
|
||||
}
|
||||
|
||||
t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]);
|
||||
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
|
||||
counter += nodenum;
|
||||
#pragma unroll
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user