Overloaded PCA constructor and ( ) operator to implement Feature#2287 - PCA that retains a specified amount of variance from the data. A sample was added to samples/cpp to demonstrate the new functionality. Docs and Tests were also updated

This commit is contained in:
Kevin
2012-08-22 23:21:49 -04:00
committed by Vadim Pisarevsky
parent a74a2302aa
commit 93155c6ae0
4 changed files with 335 additions and 4 deletions

View File

@@ -2359,8 +2359,10 @@ public:
PCA();
//! the constructor that performs PCA
PCA(InputArray data, InputArray mean, int flags, int maxComponents=0);
PCA(InputArray data, InputArray mean, int flags, double retainedVariance);
//! operator that performs PCA. The previously stored data, if any, is released
PCA& operator()(InputArray data, InputArray mean, int flags, int maxComponents=0);
PCA& operator()(InputArray data, InputArray mean, int flags, double retainedVariance);
//! projects vector from the original space to the principal components subspace
Mat project(InputArray vec) const;
//! projects vector from the original space to the principal components subspace
@@ -2378,6 +2380,9 @@ public:
CV_EXPORTS_W void PCACompute(InputArray data, CV_OUT InputOutputArray mean,
OutputArray eigenvectors, int maxComponents=0);
CV_EXPORTS_W void PCACompute(InputArray data, CV_OUT InputOutputArray mean,
OutputArray eigenvectors, double retainedVariance);
CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean,
InputArray eigenvectors, OutputArray result);

View File

@@ -2811,6 +2811,11 @@ PCA::PCA(InputArray data, InputArray _mean, int flags, int maxComponents)
operator()(data, _mean, flags, maxComponents);
}
PCA::PCA(InputArray data, InputArray _mean, int flags, double retainedVariance)
{
operator()(data, _mean, flags, retainedVariance);
}
PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComponents)
{
Mat data = _data.getMat(), _mean = __mean.getMat();
@@ -2895,6 +2900,109 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
return *this;
}
PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, double retainedVariance)
{
Mat data = _data.getMat(), _mean = __mean.getMat();
int covar_flags = CV_COVAR_SCALE;
int i, len, in_count;
Size mean_sz;
CV_Assert( data.channels() == 1 );
if( flags & CV_PCA_DATA_AS_COL )
{
len = data.rows;
in_count = data.cols;
covar_flags |= CV_COVAR_COLS;
mean_sz = Size(1, len);
}
else
{
len = data.cols;
in_count = data.rows;
covar_flags |= CV_COVAR_ROWS;
mean_sz = Size(len, 1);
}
CV_Assert( retainedVariance > 0 && retainedVariance <= 1 );
int count = std::min(len, in_count);
// "scrambled" way to compute PCA (when cols(A)>rows(A)):
// B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y
if( len <= in_count )
covar_flags |= CV_COVAR_NORMAL;
int ctype = std::max(CV_32F, data.depth());
mean.create( mean_sz, ctype );
Mat covar( count, count, ctype );
if( _mean.data )
{
CV_Assert( _mean.size() == mean_sz );
_mean.convertTo(mean, ctype);
}
calcCovarMatrix( data, covar, mean, covar_flags, ctype );
eigen( covar, eigenvalues, eigenvectors );
if( !(covar_flags & CV_COVAR_NORMAL) )
{
// CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A
// CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A'
Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);
if( data.type() != ctype || tmp_mean.data == mean.data )
{
data.convertTo( tmp_data, ctype );
subtract( tmp_data, tmp_mean, tmp_data );
}
else
{
subtract( data, tmp_mean, tmp_mean );
tmp_data = tmp_mean;
}
Mat evects1(count, len, ctype);
gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1,
(flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0);
eigenvectors = evects1;
// normalize all eigenvectors
for( i = 0; i < eigenvectors.rows; i++ )
{
Mat vec = eigenvectors.row(i);
normalize(vec, vec);
}
}
// compute the cumulative energy content for each eigenvector
Mat g(eigenvalues.size(), ctype);
for(int ig = 0; ig < g.rows; ig++)
{
g.at<float>(ig,0) = 0;
for(int im = 0; im <= ig; im++)
{
g.at<float>(ig,0) += eigenvalues.at<float>(im,0);
}
}
int L;
for(L = 0; L < eigenvalues.rows; L++)
{
double energy = g.at<float>(L, 0) / g.at<float>(g.rows - 1, 0);
if(energy > retainedVariance)
break;
}
L = std::max(2, L);
// use clone() to physically copy the data and thus deallocate the original matrices
eigenvalues = eigenvalues.rowRange(0,L).clone();
eigenvectors = eigenvectors.rowRange(0,L).clone();
return *this;
}
void PCA::project(InputArray _data, OutputArray result) const
{
@@ -2965,6 +3073,15 @@ void cv::PCACompute(InputArray data, InputOutputArray mean,
pca.eigenvectors.copyTo(eigenvectors);
}
void cv::PCACompute(InputArray data, InputOutputArray mean,
OutputArray eigenvectors, double retainedVariance)
{
PCA pca;
pca(data, mean, 0, retainedVariance);
pca.mean.copyTo(mean);
pca.eigenvectors.copyTo(eigenvectors);
}
void cv::PCAProject(InputArray data, InputArray mean,
InputArray eigenvectors, OutputArray result)
{

View File

@@ -297,6 +297,7 @@ protected:
prjEps, backPrjEps,
evalEps, evecEps;
int maxComponents = 100;
double retainedVariance = 0.95;
Mat rPoints(sz, CV_32FC1), rTestPoints(sz, CV_32FC1);
RNG& rng = ts->get_rng();
@@ -423,9 +424,33 @@ protected:
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
return;
}
// 3. check C++ PCA w/retainedVariance
cPCA( rPoints.t(), Mat(), CV_PCA_DATA_AS_COL, retainedVariance );
diffPrjEps = 1, diffBackPrjEps = 1;
Mat rvPrjTestPoints = cPCA.project(rTestPoints.t());
if( cPCA.eigenvectors.rows > maxComponents)
err = norm(cv::abs(rvPrjTestPoints.rowRange(0,maxComponents)), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 );
else
err = norm(cv::abs(rvPrjTestPoints), cv::abs(rPrjTestPoints.colRange(0,cPCA.eigenvectors.rows).t()), CV_RELATIVE_L2 );
if( err > diffPrjEps )
{
ts->printf( cvtest::TS::LOG, "bad accuracy of project() (CV_PCA_DATA_AS_COL); retainedVariance=0.95; err = %f\n", err );
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
return;
}
err = norm(cPCA.backProject(rvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 );
if( err > diffBackPrjEps )
{
ts->printf( cvtest::TS::LOG, "bad accuracy of backProject() (CV_PCA_DATA_AS_COL); retainedVariance=0.95; err = %f\n", err );
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
return;
}
#ifdef CHECK_C
// 3. check C PCA & ROW
// 4. check C PCA & ROW
_points = rPoints;
_testPoints = rTestPoints;
_avg = avg;
@@ -455,7 +480,7 @@ protected:
return;
}
// 3. check C PCA & COL
// 5. check C PCA & COL
_points = cPoints;
_testPoints = cTestPoints;
avg = avg.t(); _avg = avg;
@@ -871,4 +896,4 @@ TEST(Core_Mat, reshape_1942)
cn = M.channels();
);
ASSERT_EQ(1, cn);
}
}