fixed failure
This commit is contained in:
parent
c3fa7974e6
commit
34b9bd61a2
@ -54,40 +54,30 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
__global__ void buildPointList(const DevMem2Db src, unsigned int* list)
|
__global__ void buildPointList(const DevMem2Db src, unsigned int* list)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * 32 * PIXELS_PER_THREAD + threadIdx.x;
|
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
|
||||||
const int y = blockIdx.y * 4 + threadIdx.y;
|
__shared__ unsigned int s_qsize[4];
|
||||||
|
__shared__ unsigned int s_start[4];
|
||||||
|
|
||||||
|
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
|
||||||
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (y >= src.rows)
|
if (y >= src.rows)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
volatile int qindex = -1;
|
if (threadIdx.x == 0)
|
||||||
__shared__ volatile int s_qindex[4];
|
s_qsize[threadIdx.y] = 0;
|
||||||
__shared__ volatile int s_qstart[4];
|
|
||||||
s_qindex[threadIdx.y] = -1;
|
|
||||||
|
|
||||||
__shared__ volatile unsigned int s_queue[4][32 * PIXELS_PER_THREAD];
|
__syncthreads();
|
||||||
|
|
||||||
// fill the queue
|
// fill the queue
|
||||||
for (int i = 0; i < PIXELS_PER_THREAD; ++i)
|
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
|
||||||
{
|
{
|
||||||
const int xx = i * blockDim.x + x;
|
|
||||||
|
|
||||||
if (xx >= src.cols)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (src(y, xx))
|
if (src(y, xx))
|
||||||
{
|
{
|
||||||
const unsigned int queue_val = (y << 16) | xx;
|
const unsigned int val = (y << 16) | xx;
|
||||||
|
int qidx = Emulation::smem::atomicInc(&s_qsize[threadIdx.y], (unsigned int)(-1));
|
||||||
do {
|
s_queues[threadIdx.y][qidx] = val;
|
||||||
qindex++;
|
|
||||||
s_qindex[threadIdx.y] = qindex;
|
|
||||||
s_queue[threadIdx.y][qindex] = queue_val;
|
|
||||||
} while (s_queue[threadIdx.y][qindex] != queue_val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// reload index from smem (last thread to write to smem will have updated it)
|
|
||||||
qindex = s_qindex[threadIdx.y];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
@ -96,31 +86,27 @@ namespace cv { namespace gpu { namespace device
|
|||||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||||
{
|
{
|
||||||
// find how many items are stored in each list
|
// find how many items are stored in each list
|
||||||
int total_index = 0;
|
unsigned int total_size = 0;
|
||||||
#pragma unroll
|
for (int i = 0; i < blockDim.y; ++i)
|
||||||
for (int i = 0; i < 4; ++i)
|
|
||||||
{
|
{
|
||||||
s_qstart[i] = total_index;
|
s_start[i] = total_size;
|
||||||
total_index += (s_qindex[i] + 1u);
|
total_size += s_qsize[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
//calculate the offset in the global list
|
//calculate the offset in the global list
|
||||||
const unsigned int global_offset = atomicAdd(&g_counter, total_index);
|
const unsigned int global_offset = atomicAdd(&g_counter, total_size);
|
||||||
#pragma unroll
|
for (int i = 0; i < blockDim.y; ++i)
|
||||||
for (int i = 0; i < 4; ++i)
|
s_start[i] += global_offset;
|
||||||
s_qstart[i] += global_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
// copy local queues to global queue
|
// copy local queues to global queue
|
||||||
for(int i = 0; i <= qindex; i += 32)
|
const unsigned int qsize = s_qsize[threadIdx.y];
|
||||||
|
for(int i = threadIdx.x; i < qsize; i += blockDim.x)
|
||||||
{
|
{
|
||||||
if(i + threadIdx.x > qindex)
|
unsigned int val = s_queues[threadIdx.y][i];
|
||||||
break;
|
list[s_start[threadIdx.y] + i] = val;
|
||||||
|
|
||||||
unsigned int qvalue = s_queue[threadIdx.y][i + threadIdx.x];
|
|
||||||
list[s_qstart[threadIdx.y] + i + threadIdx.x] = qvalue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,8 +61,24 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf,
|
|||||||
CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
|
CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
|
||||||
|
|
||||||
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf);
|
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf);
|
||||||
|
|
||||||
unsigned int count = buildPointList_gpu(src, buf.ptr<unsigned int>());
|
unsigned int count = buildPointList_gpu(src, buf.ptr<unsigned int>());
|
||||||
|
// unsigned int count = 0;
|
||||||
|
// {
|
||||||
|
// cv::Mat h_src(src);
|
||||||
|
// cv::Mat h_buf(1, src.size().area(), CV_32SC1);
|
||||||
|
// for (int y = 0; y < h_src.rows; ++y)
|
||||||
|
// {
|
||||||
|
// for (int x = 0; x < h_src.cols; ++x)
|
||||||
|
// {
|
||||||
|
// if (h_src.at<uchar>(y, x))
|
||||||
|
// {
|
||||||
|
// const unsigned int val = (y << 16) | x;
|
||||||
|
// h_buf.ptr<unsigned int>()[count++] = val;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// buf.upload(h_buf);
|
||||||
|
// }
|
||||||
|
|
||||||
const int numangle = cvRound(CV_PI / theta);
|
const int numangle = cvRound(CV_PI / theta);
|
||||||
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
|
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
|
||||||
@ -70,6 +86,7 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf,
|
|||||||
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum);
|
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum);
|
||||||
accum.setTo(cv::Scalar::all(0));
|
accum.setTo(cv::Scalar::all(0));
|
||||||
|
|
||||||
|
if (count > 0)
|
||||||
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta);
|
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user