Merge remote-tracking branch 'origin/2.4'
Pull requests: #943 from jet47:cuda-5.5-support #944 from jet47:cmake-2.8.11-cuda-fix #912 from SpecLad:contributing #934 from SpecLad:parallel-for #931 from jet47:gpu-test-fixes #932 from bitwangyaoyao:2.4_fixBFM #918 from bitwangyaoyao:2.4_samples #924 from pengx17:2.4_arithm_fix #925 from pengx17:2.4_canny_tmp_fix #927 from bitwangyaoyao:2.4_perf #930 from pengx17:2.4_haar_ext #928 from apavlenko:bugfix_3027 #920 from asmorkalov:android_move #910 from pengx17:2.4_oclgfft #913 from janm399:2.4 #916 from bitwangyaoyao:2.4_fixPyrLK #919 from abidrahmank:2.4 #923 from pengx17:2.4_macfix Conflicts: modules/calib3d/src/stereobm.cpp modules/features2d/src/detectors.cpp modules/gpu/src/error.cpp modules/gpu/src/precomp.hpp modules/imgproc/src/distransform.cpp modules/imgproc/src/morph.cpp modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/perf/perf_color.cpp modules/ocl/perf/perf_imgproc.cpp modules/ocl/perf/perf_match_template.cpp modules/ocl/perf/precomp.cpp modules/ocl/perf/precomp.hpp modules/ocl/src/arithm.cpp modules/ocl/src/canny.cpp modules/ocl/src/filtering.cpp modules/ocl/src/haar.cpp modules/ocl/src/hog.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/opencl/haarobjectdetect.cl modules/ocl/src/pyrlk.cpp modules/video/src/bgfg_gaussmix2.cpp modules/video/src/lkpyramid.cpp platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh platforms/scripts/ABI_compat_generator.py samples/ocl/facedetect.cpp
This commit is contained in:
@@ -40,10 +40,6 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
#include <tbb/tbb.h>
|
||||
#endif
|
||||
|
||||
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
|
||||
{
|
||||
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
|
||||
@@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
|
||||
return iter;
|
||||
}
|
||||
|
||||
struct rprop_loop {
|
||||
struct rprop_loop : cv::ParallelLoopBody {
|
||||
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
|
||||
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
|
||||
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
|
||||
@@ -1063,7 +1059,7 @@ struct rprop_loop {
|
||||
int buf_sz;
|
||||
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
double* buf_ptr;
|
||||
double** x = 0;
|
||||
@@ -1084,7 +1080,7 @@ struct rprop_loop {
|
||||
buf_ptr += (df[i] - x[i])*2;
|
||||
}
|
||||
|
||||
for(int si = range.begin(); si < range.end(); si++ )
|
||||
for(int si = range.start; si < range.end; si++ )
|
||||
{
|
||||
if (si % dcount0 != 0) continue;
|
||||
int n1, n2, k;
|
||||
@@ -1170,36 +1166,33 @@ struct rprop_loop {
|
||||
}
|
||||
|
||||
// backward pass, update dEdw
|
||||
#ifdef HAVE_TBB
|
||||
static tbb::spin_mutex mutex;
|
||||
tbb::spin_mutex::scoped_lock lock;
|
||||
#endif
|
||||
static cv::Mutex mutex;
|
||||
|
||||
for(int i = l_count-1; i > 0; i-- )
|
||||
{
|
||||
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
|
||||
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
|
||||
cvMul( grad1, &_df, grad1 );
|
||||
#ifdef HAVE_TBB
|
||||
lock.acquire(mutex);
|
||||
#endif
|
||||
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
|
||||
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
|
||||
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
|
||||
|
||||
// update bias part of dEdw
|
||||
for( k = 0; k < dcount; k++ )
|
||||
{
|
||||
double* dst = _dEdw.data.db + n1*n2;
|
||||
const double* src = grad1->data.db + k*n2;
|
||||
for(int j = 0; j < n2; j++ )
|
||||
dst[j] += src[j];
|
||||
{
|
||||
cv::AutoLock lock(mutex);
|
||||
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
|
||||
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
|
||||
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
|
||||
|
||||
// update bias part of dEdw
|
||||
for( k = 0; k < dcount; k++ )
|
||||
{
|
||||
double* dst = _dEdw.data.db + n1*n2;
|
||||
const double* src = grad1->data.db + k*n2;
|
||||
for(int j = 0; j < n2; j++ )
|
||||
dst[j] += src[j];
|
||||
}
|
||||
|
||||
if (i > 1)
|
||||
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
|
||||
}
|
||||
|
||||
if (i > 1)
|
||||
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
|
||||
#ifdef HAVE_TBB
|
||||
lock.release();
|
||||
#endif
|
||||
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
|
||||
if( i > 1 )
|
||||
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
|
||||
@@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
|
||||
double E = 0;
|
||||
|
||||
// first, iterate through all the samples and compute dEdw
|
||||
cv::parallel_for(cv::BlockedRange(0, count),
|
||||
cv::parallel_for_(cv::Range(0, count),
|
||||
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
|
||||
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
|
||||
);
|
||||
|
@@ -884,7 +884,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,
|
||||
}
|
||||
|
||||
|
||||
class Tree_predictor
|
||||
class Tree_predictor : public cv::ParallelLoopBody
|
||||
{
|
||||
private:
|
||||
pCvSeq* weak;
|
||||
@@ -894,9 +894,7 @@ private:
|
||||
const CvMat* missing;
|
||||
const float shrinkage;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
static tbb::spin_mutex SumMutex;
|
||||
#endif
|
||||
static cv::Mutex SumMutex;
|
||||
|
||||
|
||||
public:
|
||||
@@ -915,14 +913,11 @@ public:
|
||||
Tree_predictor& operator=( const Tree_predictor& )
|
||||
{ return *this; }
|
||||
|
||||
virtual void operator()(const cv::BlockedRange& range) const
|
||||
virtual void operator()(const cv::Range& range) const
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::spin_mutex::scoped_lock lock;
|
||||
#endif
|
||||
CvSeqReader reader;
|
||||
int begin = range.begin();
|
||||
int end = range.end();
|
||||
int begin = range.start;
|
||||
int end = range.end;
|
||||
|
||||
int weak_count = end - begin;
|
||||
CvDTree* tree;
|
||||
@@ -940,13 +935,11 @@ public:
|
||||
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
|
||||
}
|
||||
}
|
||||
#ifdef HAVE_TBB
|
||||
lock.acquire(SumMutex);
|
||||
sum[i] += tmp_sum;
|
||||
lock.release();
|
||||
#else
|
||||
sum[i] += tmp_sum;
|
||||
#endif
|
||||
|
||||
{
|
||||
cv::AutoLock lock(SumMutex);
|
||||
sum[i] += tmp_sum;
|
||||
}
|
||||
}
|
||||
} // Tree_predictor::operator()
|
||||
|
||||
@@ -954,11 +947,7 @@ public:
|
||||
|
||||
}; // class Tree_predictor
|
||||
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::spin_mutex Tree_predictor::SumMutex;
|
||||
#endif
|
||||
|
||||
cv::Mutex Tree_predictor::SumMutex;
|
||||
|
||||
|
||||
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
|
||||
@@ -976,12 +965,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
|
||||
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
|
||||
params.shrinkage, _sample, _missing, sum);
|
||||
|
||||
//#ifdef HAVE_TBB
|
||||
// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
|
||||
// tbb::auto_partitioner());
|
||||
//#else
|
||||
cv::parallel_for(cv::BlockedRange(begin, end), predictor);
|
||||
//#endif
|
||||
cv::parallel_for_(cv::Range(begin, end), predictor);
|
||||
|
||||
for (int i=0; i<class_count; ++i)
|
||||
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
|
||||
@@ -1210,7 +1194,7 @@ void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
|
||||
|
||||
//===========================================================================
|
||||
|
||||
class Sample_predictor
|
||||
class Sample_predictor : public cv::ParallelLoopBody
|
||||
{
|
||||
private:
|
||||
const CvGBTrees* gbt;
|
||||
@@ -1240,10 +1224,10 @@ public:
|
||||
{}
|
||||
|
||||
|
||||
virtual void operator()(const cv::BlockedRange& range) const
|
||||
virtual void operator()(const cv::Range& range) const
|
||||
{
|
||||
int begin = range.begin();
|
||||
int end = range.end();
|
||||
int begin = range.start;
|
||||
int end = range.end;
|
||||
|
||||
CvMat x;
|
||||
CvMat miss;
|
||||
@@ -1299,11 +1283,7 @@ CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
|
||||
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
|
||||
_data->get_missing(), _sample_idx);
|
||||
|
||||
//#ifdef HAVE_TBB
|
||||
// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
|
||||
//#else
|
||||
cv::parallel_for(cv::BlockedRange(0,n), predictor);
|
||||
//#endif
|
||||
cv::parallel_for_(cv::Range(0,n), predictor);
|
||||
|
||||
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
|
||||
int r_step = CV_IS_MAT_CONT(response->type) ?
|
||||
|
@@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end,
|
||||
return result;
|
||||
}
|
||||
|
||||
struct P1 {
|
||||
struct P1 : cv::ParallelLoopBody {
|
||||
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
|
||||
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
|
||||
{
|
||||
@@ -333,10 +333,10 @@ struct P1 {
|
||||
float* result;
|
||||
int buf_sz;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
cv::AutoBuffer<float> buf(buf_sz);
|
||||
for(int i = range.begin(); i < range.end(); i += 1 )
|
||||
for(int i = range.start; i < range.end; i += 1 )
|
||||
{
|
||||
float* neighbor_responses = &buf[0];
|
||||
float* dist = neighbor_responses + 1*k;
|
||||
@@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
|
||||
int k1 = get_sample_count();
|
||||
k1 = MIN( k1, k );
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
|
||||
_results, _neighbor_responses, _dist, &result)
|
||||
cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
|
||||
_results, _neighbor_responses, _dist, &result)
|
||||
);
|
||||
|
||||
return result;
|
||||
|
@@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
|
||||
return result;
|
||||
}
|
||||
|
||||
struct predict_body {
|
||||
struct predict_body : cv::ParallelLoopBody {
|
||||
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
|
||||
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
|
||||
CvMat* _results, float* _value, int _var_count1
|
||||
@@ -307,7 +307,7 @@ struct predict_body {
|
||||
float* value;
|
||||
int var_count1;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
|
||||
int cls = -1;
|
||||
@@ -324,7 +324,7 @@ struct predict_body {
|
||||
cv::AutoBuffer<double> buffer(nclasses + var_count1);
|
||||
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
|
||||
|
||||
for(int k = range.begin(); k < range.end(); k += 1 )
|
||||
for(int k = range.start; k < range.end; k += 1 )
|
||||
{
|
||||
int ival;
|
||||
double opt = FLT_MAX;
|
||||
@@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
|
||||
|
||||
const int* vidx = var_idx ? var_idx->data.i : 0;
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
|
||||
vidx, cls_labels, results, &value, var_count
|
||||
));
|
||||
cv::parallel_for_(cv::Range(0, samples->rows),
|
||||
predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
|
||||
vidx, cls_labels, results, &value, var_count));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
@@ -2192,7 +2192,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const
|
||||
return result;
|
||||
}
|
||||
|
||||
struct predict_body_svm {
|
||||
struct predict_body_svm : ParallelLoopBody {
|
||||
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
|
||||
{
|
||||
pointer = _pointer;
|
||||
@@ -2206,9 +2206,9 @@ struct predict_body_svm {
|
||||
const CvMat* samples;
|
||||
CvMat* results;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
for(int i = range.begin(); i < range.end(); i++ )
|
||||
for(int i = range.start; i < range.end; i++ )
|
||||
{
|
||||
CvMat sample;
|
||||
cvGetRow( samples, &sample, i );
|
||||
@@ -2224,7 +2224,7 @@ struct predict_body_svm {
|
||||
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
|
||||
{
|
||||
float result = 0;
|
||||
cv::parallel_for(cv::BlockedRange(0, samples->rows),
|
||||
cv::parallel_for_(cv::Range(0, samples->rows),
|
||||
predict_body_svm(this, &result, samples, results)
|
||||
);
|
||||
return result;
|
||||
|
Reference in New Issue
Block a user