From 08910e81af95dd2004930845e5f206c2b9368aac Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Wed, 14 Nov 2012 12:40:44 +0400 Subject: [PATCH] integrate pre-Kepler architectures --- modules/gpu/src/cuda/isf-sc.cu | 15 +++++++++++++++ modules/gpu/src/softcascade.cpp | 16 +++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index ac4b8f0e8..b6c87e17b 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -209,6 +209,7 @@ __device void CascadeInvoker::detect(Detection* objects, const uint ndet const int lShift = (next - 1) * 2 + (int)(sum >= threshold); float impact = leaves[(st + threadIdx.x) * 4 + lShift]; +#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 #pragma unroll // scan on shuffl functions for (int i = 1; i < Policy::WARP; i *= 2) @@ -218,7 +219,21 @@ __device void CascadeInvoker::detect(Detection* objects, const uint ndet if (threadIdx.x >= i) impact += n; } +#else + __shared__ volatile float ptr[Policy::STA_X * Policy::STA_Y]; + const int idx = threadIdx.y * Policy::STA_X + threadIdx.x; + + ptr[idx] = impact; + + if ( threadIdx.x >= 1) ptr [idx ] = (ptr [idx - 1] + ptr [idx]); + if ( threadIdx.x >= 2) ptr [idx ] = (ptr [idx - 2] + ptr [idx]); + if ( threadIdx.x >= 4) ptr [idx ] = (ptr [idx - 4] + ptr [idx]); + if ( threadIdx.x >= 8) ptr [idx ] = (ptr [idx - 8] + ptr [idx]); + if ( threadIdx.x >= 16) ptr [idx ] = (ptr [idx - 16] + ptr [idx]); + + impact = ptr[idx]; +#endif confidence += impact; if(__any((confidence <= stages[(st + threadIdx.x)]))) st += 2048; } diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index 6133bd1cb..c5bcbedb5 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -298,14 +298,14 @@ struct cv::gpu::SCascade::Fields leaves.upload(hleaves); levels.upload(hlevels); - invoker = device::icf::CascadeInvoker(levels, octaves, stages, nodes, leaves); - } void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const { cudaMemset(count.data, 0, sizeof(Detection)); cudaSafeCall( cudaGetLastError()); + device::icf::CascadeInvoker invoker + = device::icf::CascadeInvoker(levels, octaves, stages, nodes, leaves); invoker(roi, hogluv, objects, count, downscales, scale, stream); } @@ -407,8 +407,14 @@ private: GpuMat channels(plane, cv::Rect(0, 0, fw, fh * Fields::HOG_LUV_BINS)); cv::gpu::resize(channels, shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA, s); - cudaStream_t stream = StreamAccessor::getStream(s); - device::imgproc::shfl_integral_gpu_buffered(shrunk, integralBuffer, hogluv, 12, stream); + + if (info.majorVersion() < 3) + cv::gpu::integralBuffered(shrunk, hogluv, integralBuffer, s); + else + { + cudaStream_t stream = StreamAccessor::getStream(s); + device::imgproc::shfl_integral_gpu_buffered(shrunk, integralBuffer, hogluv, 12, stream); + } } public: @@ -452,7 +458,7 @@ public: GpuMat sobelBuf; - device::icf::CascadeInvoker invoker; + DeviceInfo info; enum { BOOST = 0 }; enum