Replaced most of the usages of parallel_for with that of parallel_for_.

This should allow many algorithms to take advantage of more parallelization
technologies.
This commit is contained in:
Roman Donchenko
2013-05-30 18:44:33 +04:00
parent 37091b086c
commit 29b13ec1de
24 changed files with 232 additions and 352 deletions

View File

@@ -40,10 +40,6 @@
#include "precomp.hpp"
#ifdef HAVE_TBB
#include <tbb/tbb.h>
#endif
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
{
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
@@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
return iter;
}
struct rprop_loop {
struct rprop_loop : cv::ParallelLoopBody {
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
@@ -1063,7 +1059,7 @@ struct rprop_loop {
int buf_sz;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
double* buf_ptr;
double** x = 0;
@@ -1084,7 +1080,7 @@ struct rprop_loop {
buf_ptr += (df[i] - x[i])*2;
}
for(int si = range.begin(); si < range.end(); si++ )
for(int si = range.start; si < range.end; si++ )
{
if (si % dcount0 != 0) continue;
int n1, n2, k;
@@ -1170,36 +1166,33 @@ struct rprop_loop {
}
// backward pass, update dEdw
#ifdef HAVE_TBB
static tbb::spin_mutex mutex;
tbb::spin_mutex::scoped_lock lock;
#endif
static cv::Mutex mutex;
for(int i = l_count-1; i > 0; i-- )
{
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
cvMul( grad1, &_df, grad1 );
#ifdef HAVE_TBB
lock.acquire(mutex);
#endif
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
// update bias part of dEdw
for( k = 0; k < dcount; k++ )
{
double* dst = _dEdw.data.db + n1*n2;
const double* src = grad1->data.db + k*n2;
for(int j = 0; j < n2; j++ )
dst[j] += src[j];
{
cv::AutoLock lock(mutex);
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
// update bias part of dEdw
for( k = 0; k < dcount; k++ )
{
double* dst = _dEdw.data.db + n1*n2;
const double* src = grad1->data.db + k*n2;
for(int j = 0; j < n2; j++ )
dst[j] += src[j];
}
if (i > 1)
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
}
if (i > 1)
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
#ifdef HAVE_TBB
lock.release();
#endif
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
if( i > 1 )
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
@@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
double E = 0;
// first, iterate through all the samples and compute dEdw
cv::parallel_for(cv::BlockedRange(0, count),
cv::parallel_for_(cv::Range(0, count),
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
);

View File

@@ -900,7 +900,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,
}
class Tree_predictor
class Tree_predictor : public cv::ParallelLoopBody
{
private:
pCvSeq* weak;
@@ -910,9 +910,7 @@ private:
const CvMat* missing;
const float shrinkage;
#ifdef HAVE_TBB
static tbb::spin_mutex SumMutex;
#endif
static cv::Mutex SumMutex;
public:
@@ -931,14 +929,11 @@ public:
Tree_predictor& operator=( const Tree_predictor& )
{ return *this; }
virtual void operator()(const cv::BlockedRange& range) const
virtual void operator()(const cv::Range& range) const
{
#ifdef HAVE_TBB
tbb::spin_mutex::scoped_lock lock;
#endif
CvSeqReader reader;
int begin = range.begin();
int end = range.end();
int begin = range.start;
int end = range.end;
int weak_count = end - begin;
CvDTree* tree;
@@ -956,13 +951,11 @@ public:
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
}
}
#ifdef HAVE_TBB
lock.acquire(SumMutex);
sum[i] += tmp_sum;
lock.release();
#else
sum[i] += tmp_sum;
#endif
{
cv::AutoLock lock(SumMutex);
sum[i] += tmp_sum;
}
}
} // Tree_predictor::operator()
@@ -970,11 +963,7 @@ public:
}; // class Tree_predictor
#ifdef HAVE_TBB
tbb::spin_mutex Tree_predictor::SumMutex;
#endif
cv::Mutex Tree_predictor::SumMutex;
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
@@ -992,12 +981,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
params.shrinkage, _sample, _missing, sum);
//#ifdef HAVE_TBB
// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
// tbb::auto_partitioner());
//#else
cv::parallel_for(cv::BlockedRange(begin, end), predictor);
//#endif
cv::parallel_for_(cv::Range(begin, end), predictor);
for (int i=0; i<class_count; ++i)
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
@@ -1228,7 +1212,7 @@ void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
//===========================================================================
class Sample_predictor
class Sample_predictor : public cv::ParallelLoopBody
{
private:
const CvGBTrees* gbt;
@@ -1258,10 +1242,10 @@ public:
{}
virtual void operator()(const cv::BlockedRange& range) const
virtual void operator()(const cv::Range& range) const
{
int begin = range.begin();
int end = range.end();
int begin = range.start;
int end = range.end;
CvMat x;
CvMat miss;
@@ -1317,11 +1301,7 @@ CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
_data->get_missing(), _sample_idx);
//#ifdef HAVE_TBB
// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
//#else
cv::parallel_for(cv::BlockedRange(0,n), predictor);
//#endif
cv::parallel_for_(cv::Range(0,n), predictor);
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?

View File

@@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end,
return result;
}
struct P1 {
struct P1 : cv::ParallelLoopBody {
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
{
@@ -333,10 +333,10 @@ struct P1 {
float* result;
int buf_sz;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
cv::AutoBuffer<float> buf(buf_sz);
for(int i = range.begin(); i < range.end(); i += 1 )
for(int i = range.start; i < range.end; i += 1 )
{
float* neighbor_responses = &buf[0];
float* dist = neighbor_responses + 1*k;
@@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
int k1 = get_sample_count();
k1 = MIN( k1, k );
cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
_results, _neighbor_responses, _dist, &result)
cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
_results, _neighbor_responses, _dist, &result)
);
return result;

View File

@@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
return result;
}
struct predict_body {
struct predict_body : cv::ParallelLoopBody {
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
CvMat* _results, float* _value, int _var_count1
@@ -307,7 +307,7 @@ struct predict_body {
float* value;
int var_count1;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
int cls = -1;
@@ -324,7 +324,7 @@ struct predict_body {
cv::AutoBuffer<double> buffer(nclasses + var_count1);
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
for(int k = range.begin(); k < range.end(); k += 1 )
for(int k = range.start; k < range.end; k += 1 )
{
int ival;
double opt = FLT_MAX;
@@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
const int* vidx = var_idx ? var_idx->data.i : 0;
cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
vidx, cls_labels, results, &value, var_count
));
cv::parallel_for_(cv::Range(0, samples->rows),
predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
vidx, cls_labels, results, &value, var_count));
return value;
}

View File

@@ -2143,7 +2143,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const
return result;
}
struct predict_body_svm {
struct predict_body_svm : ParallelLoopBody {
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
{
pointer = _pointer;
@@ -2157,9 +2157,9 @@ struct predict_body_svm {
const CvMat* samples;
CvMat* results;
void operator()( const cv::BlockedRange& range ) const
void operator()( const cv::Range& range ) const
{
for(int i = range.begin(); i < range.end(); i++ )
for(int i = range.start; i < range.end; i++ )
{
CvMat sample;
cvGetRow( samples, &sample, i );
@@ -2175,7 +2175,7 @@ struct predict_body_svm {
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
float result = 0;
cv::parallel_for(cv::BlockedRange(0, samples->rows),
cv::parallel_for_(cv::Range(0, samples->rows),
predict_body_svm(this, &result, samples, results)
);
return result;