fixed some bugs in GPU matrix reductions, removed <functional> into precomp.hpp
This commit is contained in:
parent
0da71a01ff
commit
01dafce1a1
@ -328,13 +328,13 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
__shared__ best_type smaxval[nthreads];
|
__shared__ best_type smaxval[nthreads];
|
||||||
|
|
||||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
uint idx = min(tid, gridDim.x * gridDim.y - 1);
|
uint idx = min(tid, size - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
|
findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
// Returns required buffer sizes
|
// Returns required buffer sizes
|
||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
||||||
int& b1rows, int& b2cols, int& b2rows)
|
int& b1rows, int& b2cols, int& b2rows)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(cols, rows, threads, grid);
|
estimateThreadCfg(cols, rows, threads, grid);
|
||||||
@ -623,7 +623,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocCaller(const DevMem2D src, double* minval, double* maxval,
|
void minMaxLocCaller(const DevMem2D src, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
|
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -671,7 +671,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
__shared__ uint smaxloc[nthreads];
|
__shared__ uint smaxloc[nthreads];
|
||||||
|
|
||||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
uint idx = min(tid, gridDim.x * gridDim.y - 1);
|
uint idx = min(tid, size - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
@ -679,7 +679,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
smaxloc[tid] = maxloc[idx];
|
smaxloc[tid] = maxloc[idx];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
{
|
{
|
||||||
@ -1150,7 +1150,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
|
DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
|
||||||
smem[tid] = res.x;
|
smem[tid] = res.x;
|
||||||
smem[tid + nthreads] = res.y;
|
smem[tid + nthreads] = res.y;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
|
DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
|
||||||
smem[tid] = res.x;
|
smem[tid] = res.x;
|
||||||
smem[tid + nthreads] = res.y;
|
smem[tid + nthreads] = res.y;
|
||||||
smem[tid + 2 * nthreads] = res.z;
|
smem[tid + 2 * nthreads] = res.z;
|
||||||
@ -1384,7 +1384,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
|
DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
|
||||||
smem[tid] = res.x;
|
smem[tid] = res.x;
|
||||||
smem[tid + nthreads] = res.y;
|
smem[tid + nthreads] = res.y;
|
||||||
smem[tid + 2 * nthreads] = res.z;
|
smem[tid + 2 * nthreads] = res.z;
|
||||||
|
@ -41,7 +41,6 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <functional>
|
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
@ -276,11 +276,11 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
minMaxMaskCaller<double> };
|
minMaxMaskCaller<double> };
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
||||||
|
|
||||||
bool double_ok = hasGreaterOrEqualVersion(1, 3) &&
|
CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) &&
|
||||||
hasNativeDoubleSupport(getDevice());
|
hasNativeDoubleSupport(getDevice())));
|
||||||
CV_Assert(src.type() != CV_64F || double_ok);
|
|
||||||
|
|
||||||
double minVal_; if (!minVal) minVal = &minVal_;
|
double minVal_; if (!minVal) minVal = &minVal_;
|
||||||
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
||||||
@ -375,11 +375,11 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
minMaxLocMaskCaller<double> };
|
minMaxLocMaskCaller<double> };
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
||||||
|
|
||||||
bool double_ok = hasGreaterOrEqualVersion(1, 3) &&
|
CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) &&
|
||||||
hasNativeDoubleSupport(getDevice());
|
hasNativeDoubleSupport(getDevice())));
|
||||||
CV_Assert(src.type() != CV_64F || double_ok);
|
|
||||||
|
|
||||||
double minVal_; if (!minVal) minVal = &minVal_;
|
double minVal_; if (!minVal) minVal = &minVal_;
|
||||||
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
||||||
@ -388,7 +388,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
|
|
||||||
Size valbuf_size, locbuf_size;
|
Size valbuf_size, locbuf_size;
|
||||||
getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width,
|
getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width,
|
||||||
valbuf_size.height, locbuf_size.width, locbuf_size.height);
|
valbuf_size.height, locbuf_size.width, locbuf_size.height);
|
||||||
ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
|
ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
|
||||||
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
|
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
|
||||||
|
|
||||||
@ -459,9 +459,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
|||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
bool double_ok = hasGreaterOrEqualVersion(1, 3) &&
|
CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) &&
|
||||||
hasNativeDoubleSupport(getDevice());
|
hasNativeDoubleSupport(getDevice())));
|
||||||
CV_Assert(src.type() != CV_64F || double_ok);
|
|
||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
|
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
|
||||||
|
@ -57,6 +57,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include "opencv2/gpu/gpu.hpp"
|
#include "opencv2/gpu/gpu.hpp"
|
||||||
#include "opencv2/imgproc/imgproc.hpp"
|
#include "opencv2/imgproc/imgproc.hpp"
|
||||||
|
@ -49,7 +49,7 @@ using namespace std;
|
|||||||
using namespace gpu;
|
using namespace gpu;
|
||||||
|
|
||||||
#define CHECK(pred, err) if (!(pred)) { \
|
#define CHECK(pred, err) if (!(pred)) { \
|
||||||
ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
|
ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
|
||||||
ts->set_failed_test_info(err); \
|
ts->set_failed_test_info(err); \
|
||||||
return; }
|
return; }
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user