diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index 3d3536683..a4496bf67 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -181,6 +181,7 @@ __device void CascadeInvoker::detect(Detection* objects, const uint ndet int st = level.octave * level.step; const int stEnd = st + level.step; + const int hogluvStep = gridDim.y * Policy::STA_Y; float confidence = 0.f; for(; st < stEnd; st += Policy::WARP) { @@ -189,13 +190,13 @@ __device void CascadeInvoker::detect(Detection* objects, const uint ndet Node node = nodes[nId]; float threshold = rescale(level, node); - int sum = get(x, y + (node.threshold >> 28) * 120, node.rect); + int sum = get(x, y + (node.threshold >> 28) * hogluvStep, node.rect); int next = 1 + (int)(sum >= threshold); node = nodes[nId + next]; threshold = rescale(level, node); - sum = get(x, y + (node.threshold >> 28) * 120, node.rect); + sum = get(x, y + (node.threshold >> 28) * hogluvStep, node.rect); const int lShift = (next - 1) * 2 + (int)(sum >= threshold); float impact = leaves[(st + threadIdx.x) * 4 + lShift]; diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index 2bbbb64d2..454ac30da 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -130,7 +130,7 @@ struct CascadeInvoker { CascadeInvoker(): levels(0), stages(0), nodes(0), leaves(0), scales(0) {} - CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzb& _octaves, const PtrStepSzf& _stages, + CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzf& _stages, const PtrStepSzb& _nodes, const PtrStepSzf& _leaves) : levels((const Level*)_levels.ptr()), stages((const float*)_stages.ptr()), diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index 5da3abf53..038654225 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -316,7 +316,7 @@ struct cv::gpu::SCascade::Fields cudaMemset(count.data, 0, sizeof(Detection)); cudaSafeCall( cudaGetLastError()); device::icf::CascadeInvoker invoker - = device::icf::CascadeInvoker(levels, octaves, stages, nodes, leaves); + = device::icf::CascadeInvoker(levels, stages, nodes, leaves); invoker(roi, hogluv, objects, count, downscales, stream); } @@ -414,7 +414,7 @@ private: void integrate(const int fh, const int fw, Stream& s) { GpuMat channels(plane, cv::Rect(0, 0, fw, fh * Fields::HOG_LUV_BINS)); - cv::gpu::resize(channels, shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA, s); + cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s); if (info.majorVersion() < 3) cv::gpu::integralBuffered(shrunk, hogluv, integralBuffer, s); @@ -518,7 +518,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1)); - objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1)); + objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1)); cudaStream_t stream = StreamAccessor::getStream(s); flds.detect(rois, tmp, objects, stream); @@ -527,13 +527,14 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const { CV_Assert(fields); + int shr = (*fields).shrinkage; const GpuMat roi = _roi.getGpuMat(); - _mask.create( roi.cols / 4, roi.rows / 4, roi.type() ); + _mask.create( roi.cols / shr, roi.rows / shr, roi.type() ); GpuMat mask = _mask.getGpuMat(); cv::gpu::GpuMat tmp; - cv::gpu::resize(roi, tmp, cv::Size(), 0.25, 0.25, CV_INTER_AREA, stream); + cv::gpu::resize(roi, tmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, stream); cv::gpu::transpose(tmp, mask, stream); }