Merge remote-tracking branch 'origin/2.4'
Pull requests: #943 from jet47:cuda-5.5-support #944 from jet47:cmake-2.8.11-cuda-fix #912 from SpecLad:contributing #934 from SpecLad:parallel-for #931 from jet47:gpu-test-fixes #932 from bitwangyaoyao:2.4_fixBFM #918 from bitwangyaoyao:2.4_samples #924 from pengx17:2.4_arithm_fix #925 from pengx17:2.4_canny_tmp_fix #927 from bitwangyaoyao:2.4_perf #930 from pengx17:2.4_haar_ext #928 from apavlenko:bugfix_3027 #920 from asmorkalov:android_move #910 from pengx17:2.4_oclgfft #913 from janm399:2.4 #916 from bitwangyaoyao:2.4_fixPyrLK #919 from abidrahmank:2.4 #923 from pengx17:2.4_macfix Conflicts: modules/calib3d/src/stereobm.cpp modules/features2d/src/detectors.cpp modules/gpu/src/error.cpp modules/gpu/src/precomp.hpp modules/imgproc/src/distransform.cpp modules/imgproc/src/morph.cpp modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/perf/perf_color.cpp modules/ocl/perf/perf_imgproc.cpp modules/ocl/perf/perf_match_template.cpp modules/ocl/perf/precomp.cpp modules/ocl/perf/precomp.hpp modules/ocl/src/arithm.cpp modules/ocl/src/canny.cpp modules/ocl/src/filtering.cpp modules/ocl/src/haar.cpp modules/ocl/src/hog.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/opencl/haarobjectdetect.cl modules/ocl/src/pyrlk.cpp modules/video/src/bgfg_gaussmix2.cpp modules/video/src/lkpyramid.cpp platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh platforms/scripts/ABI_compat_generator.py samples/ocl/facedetect.cpp
This commit is contained in:
@@ -150,7 +150,7 @@ namespace
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator
|
||||
class TransformHypothesesGenerator : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
@@ -160,7 +160,7 @@ namespace
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
std::vector<int> subset_indices(subset_size);
|
||||
@@ -172,7 +172,7 @@ namespace
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
for (int iter = range.start; iter < range.end; ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
@@ -238,7 +238,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
parallel_for_(Range(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
|
@@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
|
||||
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
|
||||
}
|
||||
__device__ __forceinline__ TransformOp() {}
|
||||
__device__ __forceinline__ TransformOp(const TransformOp&) {}
|
||||
__host__ __device__ __forceinline__ TransformOp() {}
|
||||
__host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
@@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
|
||||
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
|
||||
}
|
||||
__device__ __forceinline__ ProjectOp() {}
|
||||
__device__ __forceinline__ ProjectOp(const ProjectOp&) {}
|
||||
__host__ __device__ __forceinline__ ProjectOp() {}
|
||||
__host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
|
@@ -62,8 +62,8 @@ namespace canny
|
||||
return ::abs(x) + ::abs(y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L1() {}
|
||||
__device__ __forceinline__ L1(const L1&) {}
|
||||
__host__ __device__ __forceinline__ L1() {}
|
||||
__host__ __device__ __forceinline__ L1(const L1&) {}
|
||||
};
|
||||
struct L2 : binary_function<int, int, float>
|
||||
{
|
||||
@@ -72,8 +72,8 @@ namespace canny
|
||||
return ::sqrtf(x * x + y * y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L2() {}
|
||||
__device__ __forceinline__ L2(const L2&) {}
|
||||
__host__ __device__ __forceinline__ L2() {}
|
||||
__host__ __device__ __forceinline__ L2(const L2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -470,8 +470,8 @@ namespace canny
|
||||
return (uchar)(-(e >> 1));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ GetEdges() {}
|
||||
__device__ __forceinline__ GetEdges(const GetEdges&) {}
|
||||
__host__ __device__ __forceinline__ GetEdges() {}
|
||||
__host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@@ -162,8 +162,8 @@ namespace arithm
|
||||
return vadd4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd4() {}
|
||||
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd4() {}
|
||||
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -175,8 +175,8 @@ namespace arithm
|
||||
return vadd2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd2() {}
|
||||
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd2() {}
|
||||
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -188,8 +188,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a + b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AddMat() {}
|
||||
__device__ __forceinline__ AddMat(const AddMat& other) {}
|
||||
__host__ __device__ __forceinline__ AddMat() {}
|
||||
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -397,8 +397,8 @@ namespace arithm
|
||||
return vsub4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub4() {}
|
||||
__device__ __forceinline__ VSub4(const VSub4& other) {}
|
||||
__host__ __device__ __forceinline__ VSub4() {}
|
||||
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -410,8 +410,8 @@ namespace arithm
|
||||
return vsub2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub2() {}
|
||||
__device__ __forceinline__ VSub2(const VSub2& other) {}
|
||||
__host__ __device__ __forceinline__ VSub2() {}
|
||||
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -423,8 +423,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a - b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ SubMat() {}
|
||||
__device__ __forceinline__ SubMat(const SubMat& other) {}
|
||||
__host__ __device__ __forceinline__ SubMat() {}
|
||||
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -617,8 +617,8 @@ namespace arithm
|
||||
return res;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
|
||||
};
|
||||
|
||||
struct Mul_16sc4_32f : binary_function<short4, float, short4>
|
||||
@@ -629,8 +629,8 @@ namespace arithm
|
||||
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Mul : binary_function<T, T, D>
|
||||
@@ -640,8 +640,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a * b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul() {}
|
||||
__device__ __forceinline__ Mul(const Mul& other) {}
|
||||
__host__ __device__ __forceinline__ Mul() {}
|
||||
__host__ __device__ __forceinline__ Mul(const Mul&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
|
||||
@@ -888,8 +888,8 @@ namespace arithm
|
||||
return b != 0 ? saturate_cast<D>(a / b) : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float>
|
||||
{
|
||||
@@ -898,8 +898,8 @@ namespace arithm
|
||||
return b != 0 ? static_cast<float>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double>
|
||||
{
|
||||
@@ -908,8 +908,8 @@ namespace arithm
|
||||
return b != 0 ? static_cast<double>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
|
||||
@@ -1196,8 +1196,8 @@ namespace arithm
|
||||
return vabsdiff4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff4() {}
|
||||
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -1209,8 +1209,8 @@ namespace arithm
|
||||
return vabsdiff2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff2() {}
|
||||
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -1235,8 +1235,8 @@ namespace arithm
|
||||
return saturate_cast<T>(_abs(a - b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AbsDiffMat() {}
|
||||
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat() {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1370,8 +1370,8 @@ namespace arithm
|
||||
return saturate_cast<T>(x * x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sqr() {}
|
||||
__device__ __forceinline__ Sqr(const Sqr& other) {}
|
||||
__host__ __device__ __forceinline__ Sqr() {}
|
||||
__host__ __device__ __forceinline__ Sqr(const Sqr&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1466,8 +1466,8 @@ namespace arithm
|
||||
return saturate_cast<T>(f(x));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Exp() {}
|
||||
__device__ __forceinline__ Exp(const Exp& other) {}
|
||||
__host__ __device__ __forceinline__ Exp() {}
|
||||
__host__ __device__ __forceinline__ Exp(const Exp&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1507,8 +1507,8 @@ namespace arithm
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpEq4() {}
|
||||
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4() {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@@ -1517,8 +1517,8 @@ namespace arithm
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpNe4() {}
|
||||
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@@ -1527,8 +1527,8 @@ namespace arithm
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLt4() {}
|
||||
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@@ -1537,8 +1537,8 @@ namespace arithm
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLe4() {}
|
||||
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -2008,8 +2008,8 @@ namespace arithm
|
||||
return vmin4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin4() {}
|
||||
__device__ __forceinline__ VMin4(const VMin4& other) {}
|
||||
__host__ __device__ __forceinline__ VMin4() {}
|
||||
__host__ __device__ __forceinline__ VMin4(const VMin4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -2021,8 +2021,8 @@ namespace arithm
|
||||
return vmin2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin2() {}
|
||||
__device__ __forceinline__ VMin2(const VMin2& other) {}
|
||||
__host__ __device__ __forceinline__ VMin2() {}
|
||||
__host__ __device__ __forceinline__ VMin2(const VMin2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2100,8 +2100,8 @@ namespace arithm
|
||||
return vmax4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax4() {}
|
||||
__device__ __forceinline__ VMax4(const VMax4& other) {}
|
||||
__host__ __device__ __forceinline__ VMax4() {}
|
||||
__host__ __device__ __forceinline__ VMax4(const VMax4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -2113,8 +2113,8 @@ namespace arithm
|
||||
return vmax2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax2() {}
|
||||
__device__ __forceinline__ VMax2(const VMax2& other) {}
|
||||
__host__ __device__ __forceinline__ VMax2() {}
|
||||
__host__ __device__ __forceinline__ VMax2(const VMax2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@@ -188,10 +188,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
|
||||
|
||||
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
|
||||
NppiSize oSizeROI, Npp64f* pRetVal);
|
||||
#if CUDA_VERSION < 5050
|
||||
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
|
||||
|
||||
static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
#else
|
||||
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
|
||||
NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
|
||||
|
||||
typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
|
||||
|
||||
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
|
||||
static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
|
||||
#endif
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src1.cols;
|
||||
@@ -203,7 +213,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
|
||||
DeviceBuffer dbuf;
|
||||
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
|
||||
#if CUDA_VERSION < 5050
|
||||
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
|
||||
#else
|
||||
int bufSize;
|
||||
buf_size_funcs[funcIdx](sz, &bufSize);
|
||||
|
||||
GpuMat buf(1, bufSize, CV_8UC1);
|
||||
|
||||
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
|
||||
#endif
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
|
Reference in New Issue
Block a user