opencv/modules/contrib/src/facerec.cpp

717 lines
25 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2011. Philipp Wagner <bytefish[at]gmx[dot]de>.
* Released to public domain under terms of the BSD Simplified license.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the organization nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* See <http://www.opensource.org/licenses/bsd-license>
*/
#include "precomp.hpp"
#include <set>
namespace cv
{
using std::set;
// Reads a sequence from a FileNode::SEQ with type _Tp into a result vector.
template<typename _Tp>
inline void readFileNodeList(const FileNode& fn, vector<_Tp>& result) {
if (fn.type() == FileNode::SEQ) {
for (FileNodeIterator it = fn.begin(); it != fn.end();) {
_Tp item;
it >> item;
result.push_back(item);
}
}
}
// Writes the a list of given items to a cv::FileStorage.
template<typename _Tp>
inline void writeFileNodeList(FileStorage& fs, const string& name,
const vector<_Tp>& items) {
// typedefs
typedef typename vector<_Tp>::const_iterator constVecIterator;
// write the elements in item to fs
fs << name << "[";
for (constVecIterator it = items.begin(); it != items.end(); ++it) {
fs << *it;
}
fs << "]";
}
static Mat asRowMatrix(InputArrayOfArrays src, int rtype, double alpha=1, double beta=0)
{
// number of samples
int n = (int) src.total();
// return empty matrix if no data given
if(n == 0)
return Mat();
// dimensionality of samples
int d = (int)src.getMat(0).total();
// create data matrix
Mat data(n, d, rtype);
// copy data
for(int i = 0; i < n; i++) {
Mat xi = data.row(i);
src.getMat(i).reshape(1, 1).convertTo(xi, rtype, alpha, beta);
}
return data;
}
// Removes duplicate elements in a given vector.
template<typename _Tp>
inline vector<_Tp> remove_dups(const vector<_Tp>& src) {
typedef typename set<_Tp>::const_iterator constSetIterator;
typedef typename vector<_Tp>::const_iterator constVecIterator;
set<_Tp> set_elems;
for (constVecIterator it = src.begin(); it != src.end(); ++it)
set_elems.insert(*it);
vector<_Tp> elems;
for (constSetIterator it = set_elems.begin(); it != set_elems.end(); ++it)
elems.push_back(*it);
return elems;
}
// Turk, M., and Pentland, A. "Eigenfaces for recognition.". Journal of
// Cognitive Neuroscience 3 (1991), 7186.
class Eigenfaces : public FaceRecognizer
{
private:
int _num_components;
vector<Mat> _projections;
vector<int> _labels;
Mat _eigenvectors;
Mat _eigenvalues;
Mat _mean;
public:
using FaceRecognizer::save;
using FaceRecognizer::load;
// Initializes an empty Eigenfaces model.
Eigenfaces(int num_components = 0) :
_num_components(num_components) { }
// Initializes and computes an Eigenfaces model with images in src and
// corresponding labels in labels. num_components will be kept for
// classification.
Eigenfaces(InputArray src, InputArray labels,
int num_components = 0) :
_num_components(num_components) {
train(src, labels);
}
// Computes an Eigenfaces model with images in src and corresponding labels
// in labels.
void train(InputArray src, InputArray labels);
// Predicts the label of a query image in src.
int predict(const InputArray src) const;
// See FaceRecognizer::load.
void load(const FileStorage& fs);
// See FaceRecognizer::save.
void save(FileStorage& fs) const;
// Returns the eigenvectors of this PCA.
Mat eigenvectors() const { return _eigenvectors; }
// Returns the eigenvalues of this PCA.
Mat eigenvalues() const { return _eigenvalues; }
// Returns the sample mean of this PCA.
Mat mean() const { return _mean; }
// Returns the number of components used in this PCA.
int num_components() const { return _num_components; }
};
// Belhumeur, P. N., Hespanha, J., and Kriegman, D. "Eigenfaces vs. Fisher-
// faces: Recognition using class specific linear projection.". IEEE
// Transactions on Pattern Analysis and Machine Intelligence 19, 7 (1997),
// 711720.
class Fisherfaces: public FaceRecognizer
{
private:
int _num_components;
Mat _eigenvectors;
Mat _eigenvalues;
Mat _mean;
vector<Mat> _projections;
vector<int> _labels;
public:
using FaceRecognizer::save;
using FaceRecognizer::load;
// Initializes an empty Fisherfaces model.
Fisherfaces(int num_components = 0) :
_num_components(num_components) {}
// Initializes and computes a Fisherfaces model with images in src and
// corresponding labels in labels. num_components will be kept for
// classification.
Fisherfaces(InputArray src,
InputArray labels,
int num_components = 0) :
_num_components(num_components) {
train(src, labels);
}
~Fisherfaces() { }
// Computes a Fisherfaces model with images in src and corresponding labels
// in labels.
void train(InputArray src, InputArray labels);
// Predicts the label of a query image in src.
int predict(InputArray src) const;
// See FaceRecognizer::load.
virtual void load(const FileStorage& fs);
// See FaceRecognizer::save.
virtual void save(FileStorage& fs) const;
// Returns the eigenvectors of this Fisherfaces model.
Mat eigenvectors() const { return _eigenvectors; }
// Returns the eigenvalues of this Fisherfaces model.
Mat eigenvalues() const { return _eigenvalues; }
// Returns the sample mean of this Fisherfaces model.
Mat mean() const { return _eigenvalues; }
// Returns the number of components used in this Fisherfaces model.
int num_components() const { return _num_components; }
};
// Face Recognition based on Local Binary Patterns.
//
// TODO Allow to change the distance metric.
// TODO Allow to change LBP computation (Extended LBP used right now).
// TODO Optimize, Optimize, Optimize!
//
// Ahonen T, Hadid A. and Pietikäinen M. "Face description with local binary
// patterns: Application to face recognition." IEEE Transactions on Pattern
// Analysis and Machine Intelligence, 28(12):2037-2041.
//
class LBPH : public FaceRecognizer
{
private:
int _grid_x;
int _grid_y;
int _radius;
int _neighbors;
vector<Mat> _histograms;
vector<int> _labels;
public:
using FaceRecognizer::save;
using FaceRecognizer::load;
// Initializes this LBPH Model. The current implementation is rather fixed
// as it uses the Extended Local Binary Patterns per default.
//
// radius, neighbors are used in the local binary patterns creation.
// grid_x, grid_y control the grid size of the spatial histograms.
LBPH(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8) :
_grid_x(grid_x),
_grid_y(grid_y),
_radius(radius),
_neighbors(neighbors) {}
// Initializes and computes this LBPH Model. The current implementation is
// rather fixed as it uses the Extended Local Binary Patterns per default.
//
// (radius=1), (neighbors=8) are used in the local binary patterns creation.
// (grid_x=8), (grid_y=8) controls the grid size of the spatial histograms.
LBPH(InputArray src,
InputArray labels,
int radius=1, int neighbors=8,
int grid_x=8, int grid_y=8) :
_grid_x(grid_x),
_grid_y(grid_y),
_radius(radius),
_neighbors(neighbors) {
train(src, labels);
}
~LBPH() { }
// Computes a LBPH model with images in src and
// corresponding labels in labels.
void train(InputArray src, InputArray labels);
// Predicts the label of a query image in src.
int predict(InputArray src) const;
// See FaceRecognizer::load.
void load(const FileStorage& fs);
// See FaceRecognizer::save.
void save(FileStorage& fs) const;
// Getter functions.
int neighbors() const { return _neighbors; }
int radius() const { return _radius; }
int grid_x() const { return _grid_x; }
int grid_y() const { return _grid_y; }
};
//------------------------------------------------------------------------------
// FaceRecognizer
//------------------------------------------------------------------------------
void FaceRecognizer::save(const string& filename) const {
FileStorage fs(filename, FileStorage::WRITE);
if (!fs.isOpened())
CV_Error(CV_StsError, "File can't be opened for writing!");
this->save(fs);
fs.release();
}
void FaceRecognizer::load(const string& filename) {
FileStorage fs(filename, FileStorage::READ);
if (!fs.isOpened())
CV_Error(CV_StsError, "File can't be opened for writing!");
this->load(fs);
fs.release();
}
//------------------------------------------------------------------------------
// Eigenfaces
//------------------------------------------------------------------------------
void Eigenfaces::train(InputArray src, InputArray _lbls) {
// assert type
if(_lbls.getMat().type() != CV_32SC1)
CV_Error(CV_StsUnsupportedFormat, "Labels must be given as integer (CV_32SC1).");
// get labels
vector<int> labels = _lbls.getMat();
// observations in row
Mat data = asRowMatrix(src, CV_64FC1);
// number of samples
int n = data.rows;
// dimensionality of data
//int d = data.cols;
// assert there are as much samples as labels
if(n != labels.size())
CV_Error(CV_StsBadArg, "The number of samples must equal the number of labels!");
// clip number of components to be valid
if((_num_components <= 0) || (_num_components > n))
_num_components = n;
// perform the PCA
PCA pca(data, Mat(), CV_PCA_DATA_AS_ROW, _num_components);
// copy the PCA results
_mean = pca.mean.reshape(1,1); // store the mean vector
_eigenvalues = pca.eigenvalues.clone(); // eigenvalues by row
transpose(pca.eigenvectors, _eigenvectors); // eigenvectors by column
_labels = labels; // store labels for prediction
// save projections
for(int sampleIdx = 0; sampleIdx < data.rows; sampleIdx++) {
Mat p = subspaceProject(_eigenvectors, _mean, data.row(sampleIdx));
this->_projections.push_back(p);
}
}
int Eigenfaces::predict(InputArray _src) const {
// get data
Mat src = _src.getMat();
// project into PCA subspace
Mat q = subspaceProject(_eigenvectors, _mean, src.reshape(1,1));
double minDist = DBL_MAX;
int minClass = -1;
for(int sampleIdx = 0; sampleIdx < _projections.size(); sampleIdx++) {
double dist = norm(_projections[sampleIdx], q, NORM_L2);
if(dist < minDist) {
minDist = dist;
minClass = _labels[sampleIdx];
}
}
return minClass;
}
void Eigenfaces::load(const FileStorage& fs) {
//read matrices
fs["num_components"] >> _num_components;
fs["mean"] >> _mean;
fs["eigenvalues"] >> _eigenvalues;
fs["eigenvectors"] >> _eigenvectors;
// read sequences
readFileNodeList(fs["projections"], _projections);
readFileNodeList(fs["labels"], _labels);
}
void Eigenfaces::save(FileStorage& fs) const {
// write matrices
fs << "num_components" << _num_components;
fs << "mean" << _mean;
fs << "eigenvalues" << _eigenvalues;
fs << "eigenvectors" << _eigenvectors;
// write sequences
writeFileNodeList(fs, "projections", _projections);
writeFileNodeList(fs, "labels", _labels);
}
//------------------------------------------------------------------------------
// Fisherfaces
//------------------------------------------------------------------------------
void Fisherfaces::train(InputArray src, InputArray _lbls) {
if(_lbls.getMat().type() != CV_32SC1)
CV_Error(CV_StsUnsupportedFormat, "Labels must be given as integer (CV_32SC1).");
// get data
vector<int> labels = _lbls.getMat();
Mat data = asRowMatrix(src, CV_64FC1);
// dimensionality
int N = data.rows; // number of samples
//int D = data.cols; // dimension of samples
// assert correct data alignment
if(labels.size() != N)
CV_Error(CV_StsUnsupportedFormat, "Labels must be given as integer (CV_32SC1).");
// compute the Fisherfaces
int C = remove_dups(labels).size(); // number of unique classes
// clip number of components to be a valid number
if((_num_components <= 0) || (_num_components > (C-1)))
_num_components = (C-1);
// perform a PCA and keep (N-C) components
PCA pca(data, Mat(), CV_PCA_DATA_AS_ROW, (N-C));
// project the data and perform a LDA on it
LDA lda(pca.project(data),labels, _num_components);
// store the total mean vector
_mean = pca.mean.reshape(1,1);
// store labels
_labels = labels;
// store the eigenvalues of the discriminants
lda.eigenvalues().convertTo(_eigenvalues, CV_64FC1);
// Now calculate the projection matrix as pca.eigenvectors * lda.eigenvectors.
// Note: OpenCV stores the eigenvectors by row, so we need to transpose it!
gemm(pca.eigenvectors, lda.eigenvectors(), 1.0, Mat(), 0.0, _eigenvectors, CV_GEMM_A_T);
// store the projections of the original data
for(int sampleIdx = 0; sampleIdx < data.rows; sampleIdx++) {
Mat p = subspaceProject(_eigenvectors, _mean, data.row(sampleIdx));
_projections.push_back(p);
}
}
int Fisherfaces::predict(InputArray _src) const {
Mat src = _src.getMat();
// project into LDA subspace
Mat q = subspaceProject(_eigenvectors, _mean, src.reshape(1,1));
// find 1-nearest neighbor
double minDist = DBL_MAX;
int minClass = -1;
for(int sampleIdx = 0; sampleIdx < _projections.size(); sampleIdx++) {
double dist = norm(_projections[sampleIdx], q, NORM_L2);
if(dist < minDist) {
minDist = dist;
minClass = _labels[sampleIdx];
}
}
return minClass;
}
// See FaceRecognizer::load.
void Fisherfaces::load(const FileStorage& fs) {
//read matrices
fs["num_components"] >> _num_components;
fs["mean"] >> _mean;
fs["eigenvalues"] >> _eigenvalues;
fs["eigenvectors"] >> _eigenvectors;
// read sequences
readFileNodeList(fs["projections"], _projections);
readFileNodeList(fs["labels"], _labels);
}
// See FaceRecognizer::save.
void Fisherfaces::save(FileStorage& fs) const {
// write matrices
fs << "num_components" << _num_components;
fs << "mean" << _mean;
fs << "eigenvalues" << _eigenvalues;
fs << "eigenvectors" << _eigenvectors;
// write sequences
writeFileNodeList(fs, "projections", _projections);
writeFileNodeList(fs, "labels", _labels);
}
//------------------------------------------------------------------------------
// LBPH
//------------------------------------------------------------------------------
template <typename _Tp> static
void olbp_(InputArray _src, OutputArray _dst) {
// get matrices
Mat src = _src.getMat();
// allocate memory for result
_dst.create(src.rows-2, src.cols-2, CV_8UC1);
Mat dst = _dst.getMat();
// zero the result matrix
dst.setTo(0);
// calculate patterns
for(int i=1;i<src.rows-1;i++) {
for(int j=1;j<src.cols-1;j++) {
_Tp center = src.at<_Tp>(i,j);
unsigned char code = 0;
code |= (src.at<_Tp>(i-1,j-1) >= center) << 7;
code |= (src.at<_Tp>(i-1,j) >= center) << 6;
code |= (src.at<_Tp>(i-1,j+1) >= center) << 5;
code |= (src.at<_Tp>(i,j+1) >= center) << 4;
code |= (src.at<_Tp>(i+1,j+1) >= center) << 3;
code |= (src.at<_Tp>(i+1,j) >= center) << 2;
code |= (src.at<_Tp>(i+1,j-1) >= center) << 1;
code |= (src.at<_Tp>(i,j-1) >= center) << 0;
dst.at<unsigned char>(i-1,j-1) = code;
}
}
}
//------------------------------------------------------------------------------
// cv::elbp
//------------------------------------------------------------------------------
template <typename _Tp> static
inline void elbp_(InputArray _src, OutputArray _dst, int radius, int neighbors) {
//get matrices
Mat src = _src.getMat();
// allocate memory for result
_dst.create(src.rows-2*radius, src.cols-2*radius, CV_32SC1);
Mat dst = _dst.getMat();
// zero
dst.setTo(0);
for(int n=0; n<neighbors; n++) {
// sample points
float x = static_cast<float>(-radius) * sin(2.0*CV_PI*n/static_cast<float>(neighbors));
float y = static_cast<float>(radius) * cos(2.0*CV_PI*n/static_cast<float>(neighbors));
// relative indices
int fx = static_cast<int>(floor(x));
int fy = static_cast<int>(floor(y));
int cx = static_cast<int>(ceil(x));
int cy = static_cast<int>(ceil(y));
// fractional part
float ty = y - fy;
float tx = x - fx;
// set interpolation weights
float w1 = (1 - tx) * (1 - ty);
float w2 = tx * (1 - ty);
float w3 = (1 - tx) * ty;
float w4 = tx * ty;
// iterate through your data
for(int i=radius; i < src.rows-radius;i++) {
for(int j=radius;j < src.cols-radius;j++) {
// calculate interpolated value
float t = w1*src.at<_Tp>(i+fy,j+fx) + w2*src.at<_Tp>(i+fy,j+cx) + w3*src.at<_Tp>(i+cy,j+fx) + w4*src.at<_Tp>(i+cy,j+cx);
// floating point precision, so check some machine-dependent epsilon
dst.at<int>(i-radius,j-radius) += ((t > src.at<_Tp>(i,j)) || (std::abs(t-src.at<_Tp>(i,j)) < std::numeric_limits<float>::epsilon())) << n;
}
}
}
}
static void elbp(InputArray src, OutputArray dst, int radius, int neighbors)
{
switch (src.type()) {
case CV_8SC1: elbp_<char>(src,dst, radius, neighbors); break;
case CV_8UC1: elbp_<unsigned char>(src, dst, radius, neighbors); break;
case CV_16SC1: elbp_<short>(src,dst, radius, neighbors); break;
case CV_16UC1: elbp_<unsigned short>(src,dst, radius, neighbors); break;
case CV_32SC1: elbp_<int>(src,dst, radius, neighbors); break;
case CV_32FC1: elbp_<float>(src,dst, radius, neighbors); break;
case CV_64FC1: elbp_<double>(src,dst, radius, neighbors); break;
default: break;
}
}
static Mat
histc_(const Mat& src, int minVal=0, int maxVal=255, bool normed=false)
{
Mat result;
// Establish the number of bins.
int histSize = maxVal-minVal+1;
// Set the ranges.
float range[] = { minVal, maxVal } ;
const float* histRange = { range };
// calc histogram
calcHist(&src, 1, 0, Mat(), result, 1, &histSize, &histRange, true, false);
// normalize
if(normed) {
result /= src.total();
}
return result.reshape(1,1);
}
static Mat histc(InputArray _src, int minVal, int maxVal, bool normed)
{
Mat src = _src.getMat();
switch (src.type()) {
case CV_8SC1:
return histc_(Mat_<float>(src), minVal, maxVal, normed);
break;
case CV_8UC1:
return histc_(src, minVal, maxVal, normed);
break;
case CV_16SC1:
return histc_(Mat_<float>(src), minVal, maxVal, normed);
break;
case CV_16UC1:
return histc_(src, minVal, maxVal, normed);
break;
case CV_32SC1:
return histc_(Mat_<float>(src), minVal, maxVal, normed);
break;
case CV_32FC1:
return histc_(src, minVal, maxVal, normed);
break;
default:
CV_Error(CV_StsUnmatchedFormats, "This type is not implemented yet."); break;
}
return Mat();
}
static Mat spatial_histogram(InputArray _src, int numPatterns,
int grid_x, int grid_y, bool normed)
{
Mat src = _src.getMat();
// calculate LBP patch size
int width = src.cols/grid_x;
int height = src.rows/grid_y;
// allocate memory for the spatial histogram
Mat result = Mat::zeros(grid_x * grid_y, numPatterns, CV_32FC1);
// return matrix with zeros if no data was given
if(src.empty())
return result.reshape(1,1);
// initial result_row
int resultRowIdx = 0;
// iterate through grid
for(int i = 0; i < grid_y; i++) {
for(int j = 0; j < grid_x; j++) {
Mat src_cell = Mat(src, Range(i*height,(i+1)*height), Range(j*width,(j+1)*width));
Mat cell_hist = histc(src_cell, 0, (numPatterns-1), true);
// copy to the result matrix
Mat result_row = result.row(resultRowIdx);
cell_hist.reshape(1,1).convertTo(result_row, CV_32FC1);
// increase row count in result matrix
resultRowIdx++;
}
}
// return result as reshaped feature vector
return result.reshape(1,1);
}
//------------------------------------------------------------------------------
// cv::elbp, cv::olbp, cv::varlbp wrapper
//------------------------------------------------------------------------------
static Mat elbp(InputArray src, int radius, int neighbors) {
Mat dst;
elbp(src, dst, radius, neighbors);
return dst;
}
void LBPH::load(const FileStorage& fs) {
fs["radius"] >> _radius;
fs["neighbors"] >> _neighbors;
fs["grid_x"] >> _grid_x;
fs["grid_y"] >> _grid_y;
//read matrices
readFileNodeList(fs["histograms"], _histograms);
readFileNodeList(fs["labels"], _labels);
}
// See FaceRecognizer::save.
void LBPH::save(FileStorage& fs) const {
fs << "radius" << _radius;
fs << "neighbors" << _neighbors;
fs << "grid_x" << _grid_x;
fs << "grid_y" << _grid_y;
// write matrices
writeFileNodeList(fs, "histograms", _histograms);
writeFileNodeList(fs, "labels", _labels);
}
void LBPH::train(InputArray _src, InputArray _lbls) {
if(_src.kind() != _InputArray::STD_VECTOR_MAT && _src.kind() != _InputArray::STD_VECTOR_VECTOR)
CV_Error(CV_StsUnsupportedFormat, "LBPH::train expects InputArray::STD_VECTOR_MAT or _InputArray::STD_VECTOR_VECTOR.");
// get the vector of matrices
vector<Mat> src;
_src.getMatVector(src);
// turn the label matrix into a vector
vector<int> labels = _lbls.getMat();
if(labels.size() != src.size())
CV_Error(CV_StsUnsupportedFormat, "The number of labels must equal the number of samples.");
// store given labels
_labels = labels;
// store the spatial histograms of the original data
for(int sampleIdx = 0; sampleIdx < src.size(); sampleIdx++) {
// calculate lbp image
Mat lbp_image = elbp(src[sampleIdx], _radius, _neighbors);
// get spatial histogram from this lbp image
Mat p = spatial_histogram(
lbp_image, /* lbp_image */
static_cast<int>(std::pow(2.0, static_cast<double>(_neighbors))), /* number of possible patterns */
_grid_x, /* grid size x */
_grid_y, /* grid size y */
true);
// add to templates
_histograms.push_back(p);
}
}
int LBPH::predict(InputArray _src) const {
Mat src = _src.getMat();
// get the spatial histogram from input image
Mat lbp_image = elbp(src, _radius, _neighbors);
Mat query = spatial_histogram(
lbp_image, /* lbp_image */
static_cast<int>(std::pow(2.0, static_cast<double>(_neighbors))), /* number of possible patterns */
_grid_x, /* grid size x */
_grid_y, /* grid size y */
true /* normed histograms */);
// find 1-nearest neighbor
double minDist = DBL_MAX;
int minClass = -1;
for(int sampleIdx = 0; sampleIdx < _histograms.size(); sampleIdx++) {
double dist = compareHist(_histograms[sampleIdx], query, CV_COMP_CHISQR);
if(dist < minDist) {
minDist = dist;
minClass = _labels[sampleIdx];
}
}
return minClass;
}
Ptr<FaceRecognizer> createEigenFaceRecognizer(int num_components)
{
return new Eigenfaces(num_components);
}
Ptr<FaceRecognizer> createFisherFaceRecognizer(int num_components)
{
return new Fisherfaces(num_components);
}
Ptr<FaceRecognizer> createLBPHFaceRecognizer(int radius, int neighbors,
int grid_x, int grid_y)
{
return new LBPH(radius, neighbors, grid_x, grid_y);
}
}