Merge remote-tracking branch 'origin/2.4'

Pull requests:
	#943 from jet47:cuda-5.5-support
	#944 from jet47:cmake-2.8.11-cuda-fix
	#912 from SpecLad:contributing
	#934 from SpecLad:parallel-for
	#931 from jet47:gpu-test-fixes
	#932 from bitwangyaoyao:2.4_fixBFM
	#918 from bitwangyaoyao:2.4_samples
	#924 from pengx17:2.4_arithm_fix
	#925 from pengx17:2.4_canny_tmp_fix
	#927 from bitwangyaoyao:2.4_perf
	#930 from pengx17:2.4_haar_ext
	#928 from apavlenko:bugfix_3027
	#920 from asmorkalov:android_move
	#910 from pengx17:2.4_oclgfft
	#913 from janm399:2.4
	#916 from bitwangyaoyao:2.4_fixPyrLK
	#919 from abidrahmank:2.4
	#923 from pengx17:2.4_macfix

Conflicts:
	modules/calib3d/src/stereobm.cpp
	modules/features2d/src/detectors.cpp
	modules/gpu/src/error.cpp
	modules/gpu/src/precomp.hpp
	modules/imgproc/src/distransform.cpp
	modules/imgproc/src/morph.cpp
	modules/ocl/include/opencv2/ocl/ocl.hpp
	modules/ocl/perf/perf_color.cpp
	modules/ocl/perf/perf_imgproc.cpp
	modules/ocl/perf/perf_match_template.cpp
	modules/ocl/perf/precomp.cpp
	modules/ocl/perf/precomp.hpp
	modules/ocl/src/arithm.cpp
	modules/ocl/src/canny.cpp
	modules/ocl/src/filtering.cpp
	modules/ocl/src/haar.cpp
	modules/ocl/src/hog.cpp
	modules/ocl/src/imgproc.cpp
	modules/ocl/src/opencl/haarobjectdetect.cl
	modules/ocl/src/pyrlk.cpp
	modules/video/src/bgfg_gaussmix2.cpp
	modules/video/src/lkpyramid.cpp
	platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh
	platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh
	platforms/scripts/ABI_compat_generator.py
	samples/ocl/facedetect.cpp
This commit is contained in:
Roman Donchenko
2013-06-04 18:31:51 +04:00
236 changed files with 5549 additions and 3276 deletions

View File

@@ -40,10 +40,6 @@
#include "precomp.hpp"
#ifdef HAVE_TBB
#include <tbb/tbb.h>
#endif
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
{
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
@@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
return iter;
}
struct rprop_loop {
struct rprop_loop : cv::ParallelLoopBody {
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
@@ -1063,7 +1059,7 @@ struct rprop_loop {
int buf_sz;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
double* buf_ptr;
double** x = 0;
@@ -1084,7 +1080,7 @@ struct rprop_loop {
buf_ptr += (df[i] - x[i])*2;
}
for(int si = range.begin(); si < range.end(); si++ )
for(int si = range.start; si < range.end; si++ )
{
if (si % dcount0 != 0) continue;
int n1, n2, k;
@@ -1170,36 +1166,33 @@ struct rprop_loop {
}
// backward pass, update dEdw
#ifdef HAVE_TBB
static tbb::spin_mutex mutex;
tbb::spin_mutex::scoped_lock lock;
#endif
static cv::Mutex mutex;
for(int i = l_count-1; i > 0; i-- )
{
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
cvMul( grad1, &_df, grad1 );
#ifdef HAVE_TBB
lock.acquire(mutex);
#endif
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
// update bias part of dEdw
for( k = 0; k < dcount; k++ )
{
double* dst = _dEdw.data.db + n1*n2;
const double* src = grad1->data.db + k*n2;
for(int j = 0; j < n2; j++ )
dst[j] += src[j];
{
cv::AutoLock lock(mutex);
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
// update bias part of dEdw
for( k = 0; k < dcount; k++ )
{
double* dst = _dEdw.data.db + n1*n2;
const double* src = grad1->data.db + k*n2;
for(int j = 0; j < n2; j++ )
dst[j] += src[j];
}
if (i > 1)
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
}
if (i > 1)
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
#ifdef HAVE_TBB
lock.release();
#endif
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
if( i > 1 )
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
@@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
double E = 0;
// first, iterate through all the samples and compute dEdw
cv::parallel_for(cv::BlockedRange(0, count),
cv::parallel_for_(cv::Range(0, count),
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
);

View File

@@ -884,7 +884,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,
}
class Tree_predictor
class Tree_predictor : public cv::ParallelLoopBody
{
private:
pCvSeq* weak;
@@ -894,9 +894,7 @@ private:
const CvMat* missing;
const float shrinkage;
#ifdef HAVE_TBB
static tbb::spin_mutex SumMutex;
#endif
static cv::Mutex SumMutex;
public:
@@ -915,14 +913,11 @@ public:
Tree_predictor& operator=( const Tree_predictor& )
{ return *this; }
virtual void operator()(const cv::BlockedRange& range) const
virtual void operator()(const cv::Range& range) const
{
#ifdef HAVE_TBB
tbb::spin_mutex::scoped_lock lock;
#endif
CvSeqReader reader;
int begin = range.begin();
int end = range.end();
int begin = range.start;
int end = range.end;
int weak_count = end - begin;
CvDTree* tree;
@@ -940,13 +935,11 @@ public:
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
}
}
#ifdef HAVE_TBB
lock.acquire(SumMutex);
sum[i] += tmp_sum;
lock.release();
#else
sum[i] += tmp_sum;
#endif
{
cv::AutoLock lock(SumMutex);
sum[i] += tmp_sum;
}
}
} // Tree_predictor::operator()
@@ -954,11 +947,7 @@ public:
}; // class Tree_predictor
#ifdef HAVE_TBB
tbb::spin_mutex Tree_predictor::SumMutex;
#endif
cv::Mutex Tree_predictor::SumMutex;
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
@@ -976,12 +965,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
params.shrinkage, _sample, _missing, sum);
//#ifdef HAVE_TBB
// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
// tbb::auto_partitioner());
//#else
cv::parallel_for(cv::BlockedRange(begin, end), predictor);
//#endif
cv::parallel_for_(cv::Range(begin, end), predictor);
for (int i=0; i<class_count; ++i)
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
@@ -1210,7 +1194,7 @@ void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
//===========================================================================
class Sample_predictor
class Sample_predictor : public cv::ParallelLoopBody
{
private:
const CvGBTrees* gbt;
@@ -1240,10 +1224,10 @@ public:
{}
virtual void operator()(const cv::BlockedRange& range) const
virtual void operator()(const cv::Range& range) const
{
int begin = range.begin();
int end = range.end();
int begin = range.start;
int end = range.end;
CvMat x;
CvMat miss;
@@ -1299,11 +1283,7 @@ CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
_data->get_missing(), _sample_idx);
//#ifdef HAVE_TBB
// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
//#else
cv::parallel_for(cv::BlockedRange(0,n), predictor);
//#endif
cv::parallel_for_(cv::Range(0,n), predictor);
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?

View File

@@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end,
return result;
}
struct P1 {
struct P1 : cv::ParallelLoopBody {
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
{
@@ -333,10 +333,10 @@ struct P1 {
float* result;
int buf_sz;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
cv::AutoBuffer<float> buf(buf_sz);
for(int i = range.begin(); i < range.end(); i += 1 )
for(int i = range.start; i < range.end; i += 1 )
{
float* neighbor_responses = &buf[0];
float* dist = neighbor_responses + 1*k;
@@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
int k1 = get_sample_count();
k1 = MIN( k1, k );
cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
_results, _neighbor_responses, _dist, &result)
cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
_results, _neighbor_responses, _dist, &result)
);
return result;

View File

@@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
return result;
}
struct predict_body {
struct predict_body : cv::ParallelLoopBody {
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
CvMat* _results, float* _value, int _var_count1
@@ -307,7 +307,7 @@ struct predict_body {
float* value;
int var_count1;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
int cls = -1;
@@ -324,7 +324,7 @@ struct predict_body {
cv::AutoBuffer<double> buffer(nclasses + var_count1);
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
for(int k = range.begin(); k < range.end(); k += 1 )
for(int k = range.start; k < range.end; k += 1 )
{
int ival;
double opt = FLT_MAX;
@@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
const int* vidx = var_idx ? var_idx->data.i : 0;
cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
vidx, cls_labels, results, &value, var_count
));
cv::parallel_for_(cv::Range(0, samples->rows),
predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
vidx, cls_labels, results, &value, var_count));
return value;
}

View File

@@ -2192,7 +2192,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const
return result;
}
struct predict_body_svm {
struct predict_body_svm : ParallelLoopBody {
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
{
pointer = _pointer;
@@ -2206,9 +2206,9 @@ struct predict_body_svm {
const CvMat* samples;
CvMat* results;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
for(int i = range.begin(); i < range.end(); i++ )
for(int i = range.start; i < range.end; i++ )
{
CvMat sample;
cvGetRow( samples, &sample, i );
@@ -2224,7 +2224,7 @@ struct predict_body_svm {
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
float result = 0;
cv::parallel_for(cv::BlockedRange(0, samples->rows),
cv::parallel_for_(cv::Range(0, samples->rows),
predict_body_svm(this, &result, samples, results)
);
return result;