a lot of small corrections to bring down the number of undocumented functions, reported by the script; added em.cpp sample

This commit is contained in:
Vadim Pisarevsky
2011-06-09 01:16:45 +00:00
parent 3b9e752be7
commit 20aca7440f
30 changed files with 474 additions and 746 deletions

View File

@@ -849,187 +849,13 @@ void CvEM::init_auto( const CvVectors& train_data )
void CvEM::kmeans( const CvVectors& train_data, int nclusters, CvMat* labels,
CvTermCriteria termcrit, const CvMat* centers0 )
{
CvMat* centers = 0;
CvMat* old_centers = 0;
CvMat* counters = 0;
CV_FUNCNAME( "CvEM::kmeans" );
__BEGIN__;
cv::RNG rng(0xFFFFFFFF);
int i, j, k, nsamples, dims;
int iter = 0;
double max_dist = DBL_MAX;
termcrit = cvCheckTermCriteria( termcrit, 1e-6, 100 );
termcrit.epsilon *= termcrit.epsilon;
nsamples = train_data.count;
dims = train_data.dims;
nclusters = MIN( nclusters, nsamples );
CV_CALL( centers = cvCreateMat( nclusters, dims, CV_64FC1 ));
CV_CALL( old_centers = cvCreateMat( nclusters, dims, CV_64FC1 ));
CV_CALL( counters = cvCreateMat( 1, nclusters, CV_32SC1 ));
cvZero( old_centers );
if( centers0 )
{
CV_CALL( cvConvert( centers0, centers ));
}
else
{
for( i = 0; i < nsamples; i++ )
labels->data.i[i] = i*nclusters/nsamples;
cvRandShuffle( labels, &rng.state );
}
for( ;; )
{
CvMat* temp;
if( iter > 0 || centers0 )
{
for( i = 0; i < nsamples; i++ )
{
const float* s = train_data.data.fl[i];
int k_best = 0;
double min_dist = DBL_MAX;
for( k = 0; k < nclusters; k++ )
{
const double* c = (double*)(centers->data.ptr + k*centers->step);
double dist = 0;
for( j = 0; j <= dims - 4; j += 4 )
{
double t0 = c[j] - s[j];
double t1 = c[j+1] - s[j+1];
dist += t0*t0 + t1*t1;
t0 = c[j+2] - s[j+2];
t1 = c[j+3] - s[j+3];
dist += t0*t0 + t1*t1;
}
for( ; j < dims; j++ )
{
double t = c[j] - s[j];
dist += t*t;
}
if( min_dist > dist )
{
min_dist = dist;
k_best = k;
}
}
labels->data.i[i] = k_best;
}
}
if( ++iter > termcrit.max_iter )
break;
CV_SWAP( centers, old_centers, temp );
cvZero( centers );
cvZero( counters );
// update centers
for( i = 0; i < nsamples; i++ )
{
const float* s = train_data.data.fl[i];
k = labels->data.i[i];
double* c = (double*)(centers->data.ptr + k*centers->step);
for( j = 0; j <= dims - 4; j += 4 )
{
double t0 = c[j] + s[j];
double t1 = c[j+1] + s[j+1];
c[j] = t0;
c[j+1] = t1;
t0 = c[j+2] + s[j+2];
t1 = c[j+3] + s[j+3];
c[j+2] = t0;
c[j+3] = t1;
}
for( ; j < dims; j++ )
c[j] += s[j];
counters->data.i[k]++;
}
if( iter > 1 )
max_dist = 0;
for( k = 0; k < nclusters; k++ )
{
double* c = (double*)(centers->data.ptr + k*centers->step);
if( counters->data.i[k] != 0 )
{
double scale = 1./counters->data.i[k];
for( j = 0; j < dims; j++ )
c[j] *= scale;
}
else
{
const float* s;
for( j = 0; j < 10; j++ )
{
i = rng(nsamples);
if( counters->data.i[labels->data.i[i]] > 1 )
break;
}
s = train_data.data.fl[i];
for( j = 0; j < dims; j++ )
c[j] = s[j];
}
if( iter > 1 )
{
double dist = 0;
const double* c_o = (double*)(old_centers->data.ptr + k*old_centers->step);
for( j = 0; j < dims; j++ )
{
double t = c[j] - c_o[j];
dist += t*t;
}
if( max_dist < dist )
max_dist = dist;
}
}
if( max_dist < termcrit.epsilon )
break;
}
cvZero( counters );
int i, nsamples = train_data.count, dims = train_data.dims;
cv::Ptr<CvMat> temp_mat = cvCreateMat(nsamples, dims, CV_32F);
for( i = 0; i < nsamples; i++ )
counters->data.i[labels->data.i[i]]++;
// ensure that we do not have empty clusters
for( k = 0; k < nclusters; k++ )
if( counters->data.i[k] == 0 )
for(;;)
{
i = rng(nsamples);
j = labels->data.i[i];
if( counters->data.i[j] > 1 )
{
labels->data.i[i] = k;
counters->data.i[j]--;
counters->data.i[k]++;
break;
}
}
__END__;
cvReleaseMat( &centers );
cvReleaseMat( &old_centers );
cvReleaseMat( &counters );
memcpy( temp_mat->data.ptr + temp_mat->step*i, train_data.data.fl[i], dims*sizeof(float));
cvKMeans2(temp_mat, nclusters, labels, termcrit, 10);
}