normal bayes classifier has been parallelized using TBB; letter_recog sample updated to demosntrate knearest & bayes classifiers (thanks to Konstantin Krivakin for the patches)
This commit is contained in:
@@ -277,63 +277,74 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct predict_body {
|
||||||
float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) const
|
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
|
||||||
|
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
|
||||||
|
CvMat* _results, float* _value, int _var_count1
|
||||||
|
)
|
||||||
{
|
{
|
||||||
float value = 0;
|
c = _c;
|
||||||
|
cov_rotate_mats = _cov_rotate_mats;
|
||||||
|
inv_eigen_values = _inv_eigen_values;
|
||||||
|
avg = _avg;
|
||||||
|
samples = _samples;
|
||||||
|
vidx = _vidx;
|
||||||
|
cls_labels = _cls_labels;
|
||||||
|
results = _results;
|
||||||
|
value = _value;
|
||||||
|
var_count1 = _var_count1;
|
||||||
|
}
|
||||||
|
|
||||||
int i, j, cls = -1;
|
CvMat* c;
|
||||||
double opt = FLT_MAX;
|
CvMat** cov_rotate_mats;
|
||||||
|
CvMat** inv_eigen_values;
|
||||||
|
CvMat** avg;
|
||||||
|
const CvMat* samples;
|
||||||
|
const int* vidx;
|
||||||
|
CvMat* cls_labels;
|
||||||
|
|
||||||
|
CvMat* results;
|
||||||
|
float* value;
|
||||||
|
int var_count1;
|
||||||
|
|
||||||
|
void operator()( const cv::BlockedRange& range ) const
|
||||||
|
{
|
||||||
|
|
||||||
|
int cls = -1;
|
||||||
int rtype = 0, rstep = 0;
|
int rtype = 0, rstep = 0;
|
||||||
|
|
||||||
int nclasses = cls_labels->cols;
|
int nclasses = cls_labels->cols;
|
||||||
int _var_count = avg[0]->cols;
|
int _var_count = avg[0]->cols;
|
||||||
|
|
||||||
if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all )
|
|
||||||
CV_Error( CV_StsBadArg,
|
|
||||||
"The input samples must be 32f matrix with the number of columns = var_all" );
|
|
||||||
|
|
||||||
if( samples->rows > 1 && !results )
|
|
||||||
CV_Error( CV_StsNullPtr,
|
|
||||||
"When the number of input samples is >1, the output vector of results must be passed" );
|
|
||||||
|
|
||||||
if (results)
|
if (results)
|
||||||
{
|
{
|
||||||
if( !CV_IS_MAT(results) || (CV_MAT_TYPE(results->type) != CV_32FC1 &&
|
|
||||||
CV_MAT_TYPE(results->type) != CV_32SC1) ||
|
|
||||||
(results->cols != 1 && results->rows != 1) ||
|
|
||||||
results->cols + results->rows - 1 != samples->rows )
|
|
||||||
CV_Error( CV_StsBadArg, "The output array must be integer or floating-point vector "
|
|
||||||
"with the number of elements = number of rows in the input matrix" );
|
|
||||||
|
|
||||||
rtype = CV_MAT_TYPE(results->type);
|
rtype = CV_MAT_TYPE(results->type);
|
||||||
rstep = CV_IS_MAT_CONT(results->type) ? 1 : results->step/CV_ELEM_SIZE(rtype);
|
rstep = CV_IS_MAT_CONT(results->type) ? 1 : results->step/CV_ELEM_SIZE(rtype);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int* vidx = var_idx ? var_idx->data.i : 0;
|
|
||||||
|
|
||||||
// allocate memory and initializing headers for calculating
|
// allocate memory and initializing headers for calculating
|
||||||
cv::AutoBuffer<double> buffer(nclasses + var_count);
|
cv::AutoBuffer<double> buffer(nclasses + var_count1);
|
||||||
CvMat diff = cvMat( 1, var_count, CV_64FC1, &buffer[0] );
|
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
|
||||||
|
|
||||||
for( int k = 0; k < samples->rows; k++ )
|
for(int k = range.begin(); k < range.end(); k += 1 )
|
||||||
{
|
{
|
||||||
int ival;
|
int ival;
|
||||||
|
double opt = FLT_MAX;
|
||||||
|
|
||||||
for( i = 0; i < nclasses; i++ )
|
for(int i = 0; i < nclasses; i++ )
|
||||||
{
|
{
|
||||||
|
|
||||||
double cur = c->data.db[i];
|
double cur = c->data.db[i];
|
||||||
CvMat* u = cov_rotate_mats[i];
|
CvMat* u = cov_rotate_mats[i];
|
||||||
CvMat* w = inv_eigen_values[i];
|
CvMat* w = inv_eigen_values[i];
|
||||||
|
|
||||||
const double* avg_data = avg[i]->data.db;
|
const double* avg_data = avg[i]->data.db;
|
||||||
const float* x = (const float*)(samples->data.ptr + samples->step*k);
|
const float* x = (const float*)(samples->data.ptr + samples->step*k);
|
||||||
|
|
||||||
// cov = u w u' --> cov^(-1) = u w^(-1) u'
|
// cov = u w u' --> cov^(-1) = u w^(-1) u'
|
||||||
for( j = 0; j < _var_count; j++ )
|
for(int j = 0; j < _var_count; j++ )
|
||||||
diff.data.db[j] = avg_data[j] - x[vidx ? vidx[j] : j];
|
diff.data.db[j] = avg_data[j] - x[vidx ? vidx[j] : j];
|
||||||
|
|
||||||
cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T );
|
cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T );
|
||||||
for( j = 0; j < _var_count; j++ )
|
for(int j = 0; j < _var_count; j++ )
|
||||||
{
|
{
|
||||||
double d = diff.data.db[j];
|
double d = diff.data.db[j];
|
||||||
cur += d*d*w->data.db[j];
|
cur += d*d*w->data.db[j];
|
||||||
@@ -356,17 +367,39 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
|
|||||||
results->data.fl[k*rstep] = (float)ival;
|
results->data.fl[k*rstep] = (float)ival;
|
||||||
}
|
}
|
||||||
if( k == 0 )
|
if( k == 0 )
|
||||||
value = (float)ival;
|
*value = (float)ival;
|
||||||
|
|
||||||
/*if( _probs )
|
|
||||||
{
|
|
||||||
CV_CALL( cvConvertScale( &expo, &expo, -0.5 ));
|
|
||||||
CV_CALL( cvExp( &expo, &expo ));
|
|
||||||
if( _probs->cols == 1 )
|
|
||||||
CV_CALL( cvReshape( &expo, &expo, 1, nclasses ));
|
|
||||||
CV_CALL( cvConvertScale( &expo, _probs, 1./cvSum( &expo ).val[0] ));
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) const
|
||||||
|
{
|
||||||
|
float value = 0;
|
||||||
|
|
||||||
|
if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all )
|
||||||
|
CV_Error( CV_StsBadArg,
|
||||||
|
"The input samples must be 32f matrix with the number of columns = var_all" );
|
||||||
|
|
||||||
|
if( samples->rows > 1 && !results )
|
||||||
|
CV_Error( CV_StsNullPtr,
|
||||||
|
"When the number of input samples is >1, the output vector of results must be passed" );
|
||||||
|
|
||||||
|
if( results )
|
||||||
|
{
|
||||||
|
if( !CV_IS_MAT(results) || (CV_MAT_TYPE(results->type) != CV_32FC1 &&
|
||||||
|
CV_MAT_TYPE(results->type) != CV_32SC1) ||
|
||||||
|
(results->cols != 1 && results->rows != 1) ||
|
||||||
|
results->cols + results->rows - 1 != samples->rows )
|
||||||
|
CV_Error( CV_StsBadArg, "The output array must be integer or floating-point vector "
|
||||||
|
"with the number of elements = number of rows in the input matrix" );
|
||||||
|
}
|
||||||
|
|
||||||
|
const int* vidx = var_idx ? var_idx->data.i : 0;
|
||||||
|
|
||||||
|
cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
|
||||||
|
vidx, cls_labels, results, &value, var_count
|
||||||
|
));
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
void help()
|
void help()
|
||||||
{
|
{
|
||||||
printf("\nThe sample demonstrates how to train Random Trees classifier\n"
|
printf("\nThe sample demonstrates how to train Random Trees classifier\n"
|
||||||
"(or Boosting classifier, or MLP - see main()) using the provided dataset.\n"
|
"(or Boosting classifier, or MLP, or Knearest, or Nbayes - see main()) using the provided dataset.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"We use the sample database letter-recognition.data\n"
|
"We use the sample database letter-recognition.data\n"
|
||||||
"from UCI Repository, here is the link:\n"
|
"from UCI Repository, here is the link:\n"
|
||||||
@@ -28,7 +28,7 @@ void help()
|
|||||||
"The usage: letter_recog [-data <path to letter-recognition.data>] \\\n"
|
"The usage: letter_recog [-data <path to letter-recognition.data>] \\\n"
|
||||||
" [-save <output XML file for the classifier>] \\\n"
|
" [-save <output XML file for the classifier>] \\\n"
|
||||||
" [-load <XML file with the pre-trained classifier>] \\\n"
|
" [-load <XML file with the pre-trained classifier>] \\\n"
|
||||||
" [-boost|-mlp] # to use boost/mlp classifier instead of default Random Trees\n" );
|
" [-boost|-mlp|-knearest|-nbayes] # to use boost/mlp/knearest classifier instead of default Random Trees\n" );
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function reads data and responses from the file <filename>
|
// This function reads data and responses from the file <filename>
|
||||||
@@ -484,6 +484,147 @@ int build_mlp_classifier( char* data_filename,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
int build_knearest_classifier( char* data_filename, int K )
|
||||||
|
{
|
||||||
|
const int var_count = 16;
|
||||||
|
CvMat* data = 0;
|
||||||
|
CvMat train_data;
|
||||||
|
CvMat* responses;
|
||||||
|
|
||||||
|
int ok = read_num_class_data( data_filename, 16, &data, &responses );
|
||||||
|
int nsamples_all = 0, ntrain_samples = 0;
|
||||||
|
int i, j;
|
||||||
|
double train_hr = 0, test_hr = 0;
|
||||||
|
CvANN_MLP mlp;
|
||||||
|
|
||||||
|
if( !ok )
|
||||||
|
{
|
||||||
|
printf( "Could not read the database %s\n", data_filename );
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf( "The database %s is loaded.\n", data_filename );
|
||||||
|
nsamples_all = data->rows;
|
||||||
|
ntrain_samples = (int)(nsamples_all*0.8);
|
||||||
|
|
||||||
|
// 1. unroll the responses
|
||||||
|
printf( "Unrolling the responses...\n");
|
||||||
|
cvGetRows( data, &train_data, 0, ntrain_samples );
|
||||||
|
|
||||||
|
// 2. train classifier
|
||||||
|
CvMat* train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
|
||||||
|
for (int i = 0; i < ntrain_samples; i++)
|
||||||
|
train_resp->data.fl[i] = responses->data.fl[i];
|
||||||
|
CvKNearest knearest(&train_data, train_resp);
|
||||||
|
|
||||||
|
CvMat* nearests = cvCreateMat( (nsamples_all - ntrain_samples), K, CV_32FC1);
|
||||||
|
float _sample[var_count * (nsamples_all - ntrain_samples)];
|
||||||
|
CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample );
|
||||||
|
float true_results[nsamples_all - ntrain_samples];
|
||||||
|
for (int j = ntrain_samples; j < nsamples_all; j++)
|
||||||
|
{
|
||||||
|
float *s = data->data.fl + j * var_count;
|
||||||
|
|
||||||
|
for (int i = 0; i < var_count; i++)
|
||||||
|
{
|
||||||
|
sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i];
|
||||||
|
}
|
||||||
|
true_results[j - ntrain_samples] = responses->data.fl[j];
|
||||||
|
}
|
||||||
|
CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
|
||||||
|
knearest.find_nearest(&sample, K, result, 0, nearests, 0);
|
||||||
|
int true_resp = 0;
|
||||||
|
int accuracy = 0;
|
||||||
|
for (int i = 0; i < nsamples_all - ntrain_samples; i++)
|
||||||
|
{
|
||||||
|
if (result->data.fl[i] == true_results[i])
|
||||||
|
true_resp++;
|
||||||
|
for(int k = 0; k < K; k++ )
|
||||||
|
{
|
||||||
|
if( nearests->data.fl[i * K + k] == true_results[i])
|
||||||
|
accuracy++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("true_resp = %f%%\tavg accuracy = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100,
|
||||||
|
(float)accuracy / (nsamples_all - ntrain_samples) / K * 100);
|
||||||
|
|
||||||
|
cvReleaseMat( &train_resp );
|
||||||
|
cvReleaseMat( &nearests );
|
||||||
|
cvReleaseMat( &result );
|
||||||
|
cvReleaseMat( &data );
|
||||||
|
cvReleaseMat( &responses );
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
int build_nbayes_classifier( char* data_filename )
|
||||||
|
{
|
||||||
|
const int var_count = 16;
|
||||||
|
CvMat* data = 0;
|
||||||
|
CvMat train_data;
|
||||||
|
CvMat* responses;
|
||||||
|
|
||||||
|
int ok = read_num_class_data( data_filename, 16, &data, &responses );
|
||||||
|
int nsamples_all = 0, ntrain_samples = 0;
|
||||||
|
int i, j;
|
||||||
|
double train_hr = 0, test_hr = 0;
|
||||||
|
CvANN_MLP mlp;
|
||||||
|
|
||||||
|
if( !ok )
|
||||||
|
{
|
||||||
|
printf( "Could not read the database %s\n", data_filename );
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf( "The database %s is loaded.\n", data_filename );
|
||||||
|
nsamples_all = data->rows;
|
||||||
|
ntrain_samples = (int)(nsamples_all*0.5);
|
||||||
|
|
||||||
|
// 1. unroll the responses
|
||||||
|
printf( "Unrolling the responses...\n");
|
||||||
|
cvGetRows( data, &train_data, 0, ntrain_samples );
|
||||||
|
|
||||||
|
// 2. train classifier
|
||||||
|
CvMat* train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
|
||||||
|
for (int i = 0; i < ntrain_samples; i++)
|
||||||
|
train_resp->data.fl[i] = responses->data.fl[i];
|
||||||
|
CvNormalBayesClassifier nbayes(&train_data, train_resp);
|
||||||
|
|
||||||
|
float _sample[var_count * (nsamples_all - ntrain_samples)];
|
||||||
|
CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample );
|
||||||
|
float true_results[nsamples_all - ntrain_samples];
|
||||||
|
for (int j = ntrain_samples; j < nsamples_all; j++)
|
||||||
|
{
|
||||||
|
float *s = data->data.fl + j * var_count;
|
||||||
|
|
||||||
|
for (int i = 0; i < var_count; i++)
|
||||||
|
{
|
||||||
|
sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i];
|
||||||
|
}
|
||||||
|
true_results[j - ntrain_samples] = responses->data.fl[j];
|
||||||
|
}
|
||||||
|
CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
|
||||||
|
(int)nbayes.predict(&sample, result);
|
||||||
|
int true_resp = 0;
|
||||||
|
int accuracy = 0;
|
||||||
|
for (int i = 0; i < nsamples_all - ntrain_samples; i++)
|
||||||
|
{
|
||||||
|
if (result->data.fl[i] == true_results[i])
|
||||||
|
true_resp++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("true_resp = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100);
|
||||||
|
|
||||||
|
cvReleaseMat( &train_resp );
|
||||||
|
cvReleaseMat( &result );
|
||||||
|
cvReleaseMat( &data );
|
||||||
|
cvReleaseMat( &responses );
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int main( int argc, char *argv[] )
|
int main( int argc, char *argv[] )
|
||||||
{
|
{
|
||||||
@@ -519,6 +660,14 @@ int main( int argc, char *argv[] )
|
|||||||
{
|
{
|
||||||
method = 2;
|
method = 2;
|
||||||
}
|
}
|
||||||
|
else if ( strcmp(argv[i], "-knearest") == 0)
|
||||||
|
{
|
||||||
|
method = 3;
|
||||||
|
}
|
||||||
|
else if ( strcmp(argv[i], "-nbayes") == 0)
|
||||||
|
{
|
||||||
|
method = 4;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -530,6 +679,10 @@ int main( int argc, char *argv[] )
|
|||||||
build_boost_classifier( data_filename, filename_to_save, filename_to_load ) :
|
build_boost_classifier( data_filename, filename_to_save, filename_to_load ) :
|
||||||
method == 2 ?
|
method == 2 ?
|
||||||
build_mlp_classifier( data_filename, filename_to_save, filename_to_load ) :
|
build_mlp_classifier( data_filename, filename_to_save, filename_to_load ) :
|
||||||
|
method == 3 ?
|
||||||
|
build_knearest_classifier( data_filename, 10 ) :
|
||||||
|
method == 4 ?
|
||||||
|
build_nbayes_classifier( data_filename) :
|
||||||
-1) < 0)
|
-1) < 0)
|
||||||
{
|
{
|
||||||
help();
|
help();
|
||||||
|
Reference in New Issue
Block a user