integrate pre-Kepler architectures
This commit is contained in:
parent
aa92be34d6
commit
08910e81af
@ -209,6 +209,7 @@ __device void CascadeInvoker<Policy>::detect(Detection* objects, const uint ndet
|
|||||||
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
||||||
float impact = leaves[(st + threadIdx.x) * 4 + lShift];
|
float impact = leaves[(st + threadIdx.x) * 4 + lShift];
|
||||||
|
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
// scan on shuffl functions
|
// scan on shuffl functions
|
||||||
for (int i = 1; i < Policy::WARP; i *= 2)
|
for (int i = 1; i < Policy::WARP; i *= 2)
|
||||||
@ -218,7 +219,21 @@ __device void CascadeInvoker<Policy>::detect(Detection* objects, const uint ndet
|
|||||||
if (threadIdx.x >= i)
|
if (threadIdx.x >= i)
|
||||||
impact += n;
|
impact += n;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
__shared__ volatile float ptr[Policy::STA_X * Policy::STA_Y];
|
||||||
|
|
||||||
|
const int idx = threadIdx.y * Policy::STA_X + threadIdx.x;
|
||||||
|
|
||||||
|
ptr[idx] = impact;
|
||||||
|
|
||||||
|
if ( threadIdx.x >= 1) ptr [idx ] = (ptr [idx - 1] + ptr [idx]);
|
||||||
|
if ( threadIdx.x >= 2) ptr [idx ] = (ptr [idx - 2] + ptr [idx]);
|
||||||
|
if ( threadIdx.x >= 4) ptr [idx ] = (ptr [idx - 4] + ptr [idx]);
|
||||||
|
if ( threadIdx.x >= 8) ptr [idx ] = (ptr [idx - 8] + ptr [idx]);
|
||||||
|
if ( threadIdx.x >= 16) ptr [idx ] = (ptr [idx - 16] + ptr [idx]);
|
||||||
|
|
||||||
|
impact = ptr[idx];
|
||||||
|
#endif
|
||||||
confidence += impact;
|
confidence += impact;
|
||||||
if(__any((confidence <= stages[(st + threadIdx.x)]))) st += 2048;
|
if(__any((confidence <= stages[(st + threadIdx.x)]))) st += 2048;
|
||||||
}
|
}
|
||||||
|
@ -298,14 +298,14 @@ struct cv::gpu::SCascade::Fields
|
|||||||
leaves.upload(hleaves);
|
leaves.upload(hleaves);
|
||||||
levels.upload(hlevels);
|
levels.upload(hlevels);
|
||||||
|
|
||||||
invoker = device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
|
void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
|
||||||
{
|
{
|
||||||
cudaMemset(count.data, 0, sizeof(Detection));
|
cudaMemset(count.data, 0, sizeof(Detection));
|
||||||
cudaSafeCall( cudaGetLastError());
|
cudaSafeCall( cudaGetLastError());
|
||||||
|
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
|
||||||
|
= device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
|
||||||
invoker(roi, hogluv, objects, count, downscales, scale, stream);
|
invoker(roi, hogluv, objects, count, downscales, scale, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -407,8 +407,14 @@ private:
|
|||||||
|
|
||||||
GpuMat channels(plane, cv::Rect(0, 0, fw, fh * Fields::HOG_LUV_BINS));
|
GpuMat channels(plane, cv::Rect(0, 0, fw, fh * Fields::HOG_LUV_BINS));
|
||||||
cv::gpu::resize(channels, shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA, s);
|
cv::gpu::resize(channels, shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA, s);
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
|
||||||
device::imgproc::shfl_integral_gpu_buffered(shrunk, integralBuffer, hogluv, 12, stream);
|
if (info.majorVersion() < 3)
|
||||||
|
cv::gpu::integralBuffered(shrunk, hogluv, integralBuffer, s);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
device::imgproc::shfl_integral_gpu_buffered(shrunk, integralBuffer, hogluv, 12, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -452,7 +458,7 @@ public:
|
|||||||
|
|
||||||
GpuMat sobelBuf;
|
GpuMat sobelBuf;
|
||||||
|
|
||||||
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker;
|
DeviceInfo info;
|
||||||
|
|
||||||
enum { BOOST = 0 };
|
enum { BOOST = 0 };
|
||||||
enum
|
enum
|
||||||
|
Loading…
x
Reference in New Issue
Block a user