removed ERFilter (to be moved to opencv_contrib/modules/text) and lineMOD (to be moved to opencv_contrib/modules/rgbd)
This commit is contained in:
@@ -315,8 +315,6 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#include "opencv2/objdetect/linemod.hpp"
|
||||
#include "opencv2/objdetect/erfilter.hpp"
|
||||
#include "opencv2/objdetect/detection_based_tracker.hpp"
|
||||
|
||||
#endif
|
||||
|
@@ -1,266 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OBJDETECT_ERFILTER_HPP__
|
||||
#define __OPENCV_OBJDETECT_ERFILTER_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
/*!
|
||||
Extremal Region Stat structure
|
||||
|
||||
The ERStat structure represents a class-specific Extremal Region (ER).
|
||||
|
||||
An ER is a 4-connected set of pixels with all its grey-level values smaller than the values
|
||||
in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's
|
||||
in the component tree of the image.
|
||||
*/
|
||||
struct CV_EXPORTS ERStat
|
||||
{
|
||||
public:
|
||||
//! Constructor
|
||||
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
|
||||
//! Destructor
|
||||
~ERStat() { }
|
||||
|
||||
//! seed point and the threshold (max grey-level value)
|
||||
int pixel;
|
||||
int level;
|
||||
|
||||
//! incrementally computable features
|
||||
int area;
|
||||
int perimeter;
|
||||
int euler; //!< euler number
|
||||
Rect rect;
|
||||
double raw_moments[2]; //!< order 1 raw moments to derive the centroid
|
||||
double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
|
||||
std::deque<int> *crossings;//!< horizontal crossings
|
||||
float med_crossings; //!< median of the crossings at three different height levels
|
||||
|
||||
//! 2nd stage features
|
||||
float hole_area_ratio;
|
||||
float convex_hull_ratio;
|
||||
float num_inflexion_points;
|
||||
|
||||
// TODO Other features can be added (average color, standard deviation, and such)
|
||||
|
||||
|
||||
// TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ?
|
||||
std::vector<int> *pixels;
|
||||
|
||||
//! probability that the ER belongs to the class we are looking for
|
||||
double probability;
|
||||
|
||||
//! pointers preserving the tree structure of the component tree
|
||||
ERStat* parent;
|
||||
ERStat* child;
|
||||
ERStat* next;
|
||||
ERStat* prev;
|
||||
|
||||
//! wenever the regions is a local maxima of the probability
|
||||
bool local_maxima;
|
||||
ERStat* max_probability_ancestor;
|
||||
ERStat* min_probability_ancestor;
|
||||
};
|
||||
|
||||
/*!
|
||||
Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithms
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
|
||||
*/
|
||||
class CV_EXPORTS ERFilter : public Algorithm
|
||||
{
|
||||
public:
|
||||
|
||||
//! callback with the classifier is made a class. By doing it we hide SVM, Boost etc.
|
||||
class CV_EXPORTS Callback
|
||||
{
|
||||
public:
|
||||
virtual ~Callback() { }
|
||||
//! The classifier must return probability measure for the region.
|
||||
virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here?
|
||||
};
|
||||
|
||||
/*!
|
||||
the key method. Takes image on input and returns the selected regions in a vector of ERStat
|
||||
only distinctive ERs which correspond to characters are selected by a sequential classifier
|
||||
\param image is the input image
|
||||
\param regions is output for the first stage, input/output for the second one.
|
||||
*/
|
||||
virtual void run( InputArray image, std::vector<ERStat>& regions ) = 0;
|
||||
|
||||
|
||||
//! set/get methods to set the algorithm properties,
|
||||
virtual void setCallback(const Ptr<ERFilter::Callback>& cb) = 0;
|
||||
virtual void setThresholdDelta(int thresholdDelta) = 0;
|
||||
virtual void setMinArea(float minArea) = 0;
|
||||
virtual void setMaxArea(float maxArea) = 0;
|
||||
virtual void setMinProbability(float minProbability) = 0;
|
||||
virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0;
|
||||
virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0;
|
||||
virtual int getNumRejected() = 0;
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
The component tree of the image is extracted by a threshold increased step by step
|
||||
from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
|
||||
number of holes, and number of horizontal crossings) are computed for each ER
|
||||
and used as features for a classifier which estimates the class-conditional
|
||||
probability P(er|character). The value of P(er|character) is tracked using the inclusion
|
||||
relation of ER across all thresholds and only the ERs which correspond to local maximum
|
||||
of the probability P(er|character) are selected (if the local maximum of the
|
||||
probability is above a global limit pmin and the difference between local maximum and
|
||||
local minimum is greater than minProbabilityDiff).
|
||||
|
||||
\param cb Callback with the classifier.
|
||||
default classifier can be implicitly load with function loadClassifierNM1()
|
||||
from file in samples/cpp/trained_classifierNM1.xml
|
||||
\param thresholdDelta Threshold step in subsequent thresholds when extracting the component tree
|
||||
\param minArea The minimum area (% of image size) allowed for retreived ER's
|
||||
\param minArea The maximum area (% of image size) allowed for retreived ER's
|
||||
\param minProbability The minimum probability P(er|character) allowed for retreived ER's
|
||||
\param nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities
|
||||
\param minProbability The minimum probability difference between local maxima and local minima ERs
|
||||
*/
|
||||
CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
|
||||
int thresholdDelta = 1, float minArea = 0.00025,
|
||||
float maxArea = 0.13, float minProbability = 0.4,
|
||||
bool nonMaxSuppression = true,
|
||||
float minProbabilityDiff = 0.1);
|
||||
|
||||
/*!
|
||||
Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
In the second stage, the ERs that passed the first stage are classified into character
|
||||
and non-character classes using more informative but also more computationally expensive
|
||||
features. The classifier uses all the features calculated in the first stage and the following
|
||||
additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.
|
||||
|
||||
\param cb Callback with the classifier
|
||||
default classifier can be implicitly load with function loadClassifierNM2()
|
||||
from file in samples/cpp/trained_classifierNM2.xml
|
||||
\param minProbability The minimum probability P(er|character) allowed for retreived ER's
|
||||
*/
|
||||
CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
|
||||
float minProbability = 0.3);
|
||||
|
||||
|
||||
/*!
|
||||
Allow to implicitly load the default classifier when creating an ERFilter object.
|
||||
The function takes as parameter the XML or YAML file with the classifier model
|
||||
(e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
|
||||
*/
|
||||
|
||||
CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
|
||||
|
||||
/*!
|
||||
Allow to implicitly load the default classifier when creating an ERFilter object.
|
||||
The function takes as parameter the XML or YAML file with the classifier model
|
||||
(e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
|
||||
*/
|
||||
|
||||
CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
|
||||
|
||||
|
||||
// computeNMChannels operation modes
|
||||
enum { ERFILTER_NM_RGBLGrad = 0,
|
||||
ERFILTER_NM_IHSGrad = 1
|
||||
};
|
||||
|
||||
/*!
|
||||
Compute the different channels to be processed independently in the N&M algorithm
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
|
||||
magnitude channels (Grad) are used in order to obtain high localization recall.
|
||||
This implementation also provides an alternative combination of red (R), green (G), blue (B),
|
||||
lightness (L), and gradient magnitude (Grad).
|
||||
|
||||
\param _src Source image. Must be RGB CV_8UC3.
|
||||
\param _channels Output vector<Mat> where computed channels are stored.
|
||||
\param _mode Mode of operation. Currently the only available options are
|
||||
ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad.
|
||||
|
||||
*/
|
||||
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
|
||||
|
||||
|
||||
/*!
|
||||
Find groups of Extremal Regions that are organized as text blocks. This function implements
|
||||
the grouping algorithm described in:
|
||||
Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013.
|
||||
Notice that this implementation constrains the results to horizontally-aligned text and
|
||||
latin script (since ERFilter classifiers are trained only for latin script detection).
|
||||
|
||||
The algorithm combines two different clustering techniques in a single parameter-free procedure
|
||||
to detect groups of regions organized as text. The maximally meaningful groups are fist detected
|
||||
in several feature spaces, where each feature space is a combination of proximity information
|
||||
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
|
||||
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
|
||||
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
|
||||
validated using a classifier in order to assest if they form a valid horizontally-aligned text block.
|
||||
|
||||
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
|
||||
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
|
||||
\param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml)
|
||||
\param minProbability The minimum probability for accepting a group
|
||||
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
|
||||
*/
|
||||
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > ®ions,
|
||||
const std::string& filename, float minProbablity,
|
||||
std::vector<Rect > &groups);
|
||||
|
||||
}
|
||||
#endif // _OPENCV_ERFILTER_HPP_
|
@@ -1,455 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OBJDETECT_LINEMOD_HPP__
|
||||
#define __OPENCV_OBJDETECT_LINEMOD_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <map>
|
||||
|
||||
/****************************************************************************************\
|
||||
* LINE-MOD *
|
||||
\****************************************************************************************/
|
||||
|
||||
namespace cv {
|
||||
namespace linemod {
|
||||
|
||||
/// @todo Convert doxy comments to rst
|
||||
|
||||
/**
|
||||
* \brief Discriminant feature described by its location and label.
|
||||
*/
|
||||
struct CV_EXPORTS Feature
|
||||
{
|
||||
int x; ///< x offset
|
||||
int y; ///< y offset
|
||||
int label; ///< Quantization
|
||||
|
||||
Feature() : x(0), y(0), label(0) {}
|
||||
Feature(int x, int y, int label);
|
||||
|
||||
void read(const FileNode& fn);
|
||||
void write(FileStorage& fs) const;
|
||||
};
|
||||
|
||||
inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
|
||||
|
||||
struct CV_EXPORTS Template
|
||||
{
|
||||
int width;
|
||||
int height;
|
||||
int pyramid_level;
|
||||
std::vector<Feature> features;
|
||||
|
||||
void read(const FileNode& fn);
|
||||
void write(FileStorage& fs) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Represents a modality operating over an image pyramid.
|
||||
*/
|
||||
class QuantizedPyramid
|
||||
{
|
||||
public:
|
||||
// Virtual destructor
|
||||
virtual ~QuantizedPyramid() {}
|
||||
|
||||
/**
|
||||
* \brief Compute quantized image at current pyramid level for online detection.
|
||||
*
|
||||
* \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
|
||||
* representing its classification.
|
||||
*/
|
||||
virtual void quantize(Mat& dst) const =0;
|
||||
|
||||
/**
|
||||
* \brief Extract most discriminant features at current pyramid level to form a new template.
|
||||
*
|
||||
* \param[out] templ The new template.
|
||||
*/
|
||||
virtual bool extractTemplate(Template& templ) const =0;
|
||||
|
||||
/**
|
||||
* \brief Go to the next pyramid level.
|
||||
*
|
||||
* \todo Allow pyramid scale factor other than 2
|
||||
*/
|
||||
virtual void pyrDown() =0;
|
||||
|
||||
protected:
|
||||
/// Candidate feature with a score
|
||||
struct Candidate
|
||||
{
|
||||
Candidate(int x, int y, int label, float score);
|
||||
|
||||
/// Sort candidates with high score to the front
|
||||
bool operator<(const Candidate& rhs) const
|
||||
{
|
||||
return score > rhs.score;
|
||||
}
|
||||
|
||||
Feature f;
|
||||
float score;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Choose candidate features so that they are not bunched together.
|
||||
*
|
||||
* \param[in] candidates Candidate features sorted by score.
|
||||
* \param[out] features Destination vector of selected features.
|
||||
* \param[in] num_features Number of candidates to select.
|
||||
* \param[in] distance Hint for desired distance between features.
|
||||
*/
|
||||
static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
|
||||
std::vector<Feature>& features,
|
||||
size_t num_features, float distance);
|
||||
};
|
||||
|
||||
inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
|
||||
|
||||
/**
|
||||
* \brief Interface for modalities that plug into the LINE template matching representation.
|
||||
*
|
||||
* \todo Max response, to allow optimization of summing (255/MAX) features as uint8
|
||||
*/
|
||||
class CV_EXPORTS Modality
|
||||
{
|
||||
public:
|
||||
// Virtual destructor
|
||||
virtual ~Modality() {}
|
||||
|
||||
/**
|
||||
* \brief Form a quantized image pyramid from a source image.
|
||||
*
|
||||
* \param[in] src The source image. Type depends on the modality.
|
||||
* \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
|
||||
* in quantized image and cannot be extracted as features.
|
||||
*/
|
||||
Ptr<QuantizedPyramid> process(const Mat& src,
|
||||
const Mat& mask = Mat()) const
|
||||
{
|
||||
return processImpl(src, mask);
|
||||
}
|
||||
|
||||
virtual String name() const =0;
|
||||
|
||||
virtual void read(const FileNode& fn) =0;
|
||||
virtual void write(FileStorage& fs) const =0;
|
||||
|
||||
/**
|
||||
* \brief Create modality by name.
|
||||
*
|
||||
* The following modality types are supported:
|
||||
* - "ColorGradient"
|
||||
* - "DepthNormal"
|
||||
*/
|
||||
static Ptr<Modality> create(const String& modality_type);
|
||||
|
||||
/**
|
||||
* \brief Load a modality from file.
|
||||
*/
|
||||
static Ptr<Modality> create(const FileNode& fn);
|
||||
|
||||
protected:
|
||||
// Indirection is because process() has a default parameter.
|
||||
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
|
||||
const Mat& mask) const =0;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Modality that computes quantized gradient orientations from a color image.
|
||||
*/
|
||||
class CV_EXPORTS ColorGradient : public Modality
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* \brief Default constructor. Uses reasonable default parameter values.
|
||||
*/
|
||||
ColorGradient();
|
||||
|
||||
/**
|
||||
* \brief Constructor.
|
||||
*
|
||||
* \param weak_threshold When quantizing, discard gradients with magnitude less than this.
|
||||
* \param num_features How many features a template must contain.
|
||||
* \param strong_threshold Consider as candidate features only gradients whose norms are
|
||||
* larger than this.
|
||||
*/
|
||||
ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
|
||||
|
||||
virtual String name() const;
|
||||
|
||||
virtual void read(const FileNode& fn);
|
||||
virtual void write(FileStorage& fs) const;
|
||||
|
||||
float weak_threshold;
|
||||
size_t num_features;
|
||||
float strong_threshold;
|
||||
|
||||
protected:
|
||||
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
|
||||
const Mat& mask) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Modality that computes quantized surface normals from a dense depth map.
|
||||
*/
|
||||
class CV_EXPORTS DepthNormal : public Modality
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* \brief Default constructor. Uses reasonable default parameter values.
|
||||
*/
|
||||
DepthNormal();
|
||||
|
||||
/**
|
||||
* \brief Constructor.
|
||||
*
|
||||
* \param distance_threshold Ignore pixels beyond this distance.
|
||||
* \param difference_threshold When computing normals, ignore contributions of pixels whose
|
||||
* depth difference with the central pixel is above this threshold.
|
||||
* \param num_features How many features a template must contain.
|
||||
* \param extract_threshold Consider as candidate feature only if there are no differing
|
||||
* orientations within a distance of extract_threshold.
|
||||
*/
|
||||
DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
|
||||
int extract_threshold);
|
||||
|
||||
virtual String name() const;
|
||||
|
||||
virtual void read(const FileNode& fn);
|
||||
virtual void write(FileStorage& fs) const;
|
||||
|
||||
int distance_threshold;
|
||||
int difference_threshold;
|
||||
size_t num_features;
|
||||
int extract_threshold;
|
||||
|
||||
protected:
|
||||
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
|
||||
const Mat& mask) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Debug function to colormap a quantized image for viewing.
|
||||
*/
|
||||
void colormap(const Mat& quantized, Mat& dst);
|
||||
|
||||
/**
|
||||
* \brief Represents a successful template match.
|
||||
*/
|
||||
struct CV_EXPORTS Match
|
||||
{
|
||||
Match()
|
||||
{
|
||||
}
|
||||
|
||||
Match(int x, int y, float similarity, const String& class_id, int template_id);
|
||||
|
||||
/// Sort matches with high similarity to the front
|
||||
bool operator<(const Match& rhs) const
|
||||
{
|
||||
// Secondarily sort on template_id for the sake of duplicate removal
|
||||
if (similarity != rhs.similarity)
|
||||
return similarity > rhs.similarity;
|
||||
else
|
||||
return template_id < rhs.template_id;
|
||||
}
|
||||
|
||||
bool operator==(const Match& rhs) const
|
||||
{
|
||||
return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
|
||||
}
|
||||
|
||||
int x;
|
||||
int y;
|
||||
float similarity;
|
||||
String class_id;
|
||||
int template_id;
|
||||
};
|
||||
|
||||
inline
|
||||
Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id)
|
||||
: x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
|
||||
{}
|
||||
|
||||
/**
|
||||
* \brief Object detector using the LINE template matching algorithm with any set of
|
||||
* modalities.
|
||||
*/
|
||||
class CV_EXPORTS Detector
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* \brief Empty constructor, initialize with read().
|
||||
*/
|
||||
Detector();
|
||||
|
||||
/**
|
||||
* \brief Constructor.
|
||||
*
|
||||
* \param modalities Modalities to use (color gradients, depth normals, ...).
|
||||
* \param T_pyramid Value of the sampling step T at each pyramid level. The
|
||||
* number of pyramid levels is T_pyramid.size().
|
||||
*/
|
||||
Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
|
||||
|
||||
/**
|
||||
* \brief Detect objects by template matching.
|
||||
*
|
||||
* Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
|
||||
*
|
||||
* \param sources Source images, one for each modality.
|
||||
* \param threshold Similarity threshold, a percentage between 0 and 100.
|
||||
* \param[out] matches Template matches, sorted by similarity score.
|
||||
* \param class_ids If non-empty, only search for the desired object classes.
|
||||
* \param[out] quantized_images Optionally return vector<Mat> of quantized images.
|
||||
* \param masks The masks for consideration during matching. The masks should be CV_8UC1
|
||||
* where 255 represents a valid pixel. If non-empty, the vector must be
|
||||
* the same size as sources. Each element must be
|
||||
* empty or the same size as its corresponding source.
|
||||
*/
|
||||
void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
|
||||
const std::vector<String>& class_ids = std::vector<String>(),
|
||||
OutputArrayOfArrays quantized_images = noArray(),
|
||||
const std::vector<Mat>& masks = std::vector<Mat>()) const;
|
||||
|
||||
/**
|
||||
* \brief Add new object template.
|
||||
*
|
||||
* \param sources Source images, one for each modality.
|
||||
* \param class_id Object class ID.
|
||||
* \param object_mask Mask separating object from background.
|
||||
* \param[out] bounding_box Optionally return bounding box of the extracted features.
|
||||
*
|
||||
* \return Template ID, or -1 if failed to extract a valid template.
|
||||
*/
|
||||
int addTemplate(const std::vector<Mat>& sources, const String& class_id,
|
||||
const Mat& object_mask, Rect* bounding_box = NULL);
|
||||
|
||||
/**
|
||||
* \brief Add a new object template computed by external means.
|
||||
*/
|
||||
int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
|
||||
|
||||
/**
|
||||
* \brief Get the modalities used by this detector.
|
||||
*
|
||||
* You are not permitted to add/remove modalities, but you may dynamic_cast them to
|
||||
* tweak parameters.
|
||||
*/
|
||||
const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
|
||||
|
||||
/**
|
||||
* \brief Get sampling step T at pyramid_level.
|
||||
*/
|
||||
int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
|
||||
|
||||
/**
|
||||
* \brief Get number of pyramid levels used by this detector.
|
||||
*/
|
||||
int pyramidLevels() const { return pyramid_levels; }
|
||||
|
||||
/**
|
||||
* \brief Get the template pyramid identified by template_id.
|
||||
*
|
||||
* For example, with 2 modalities (Gradient, Normal) and two pyramid levels
|
||||
* (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
|
||||
*/
|
||||
const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
|
||||
|
||||
int numTemplates() const;
|
||||
int numTemplates(const String& class_id) const;
|
||||
int numClasses() const { return static_cast<int>(class_templates.size()); }
|
||||
|
||||
std::vector<String> classIds() const;
|
||||
|
||||
void read(const FileNode& fn);
|
||||
void write(FileStorage& fs) const;
|
||||
|
||||
String readClass(const FileNode& fn, const String &class_id_override = "");
|
||||
void writeClass(const String& class_id, FileStorage& fs) const;
|
||||
|
||||
void readClasses(const std::vector<String>& class_ids,
|
||||
const String& format = "templates_%s.yml.gz");
|
||||
void writeClasses(const String& format = "templates_%s.yml.gz") const;
|
||||
|
||||
protected:
|
||||
std::vector< Ptr<Modality> > modalities;
|
||||
int pyramid_levels;
|
||||
std::vector<int> T_at_level;
|
||||
|
||||
typedef std::vector<Template> TemplatePyramid;
|
||||
typedef std::map<String, std::vector<TemplatePyramid> > TemplatesMap;
|
||||
TemplatesMap class_templates;
|
||||
|
||||
typedef std::vector<Mat> LinearMemories;
|
||||
// Indexed as [pyramid level][modality][quantized label]
|
||||
typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
|
||||
|
||||
void matchClass(const LinearMemoryPyramid& lm_pyramid,
|
||||
const std::vector<Size>& sizes,
|
||||
float threshold, std::vector<Match>& matches,
|
||||
const String& class_id,
|
||||
const std::vector<TemplatePyramid>& template_pyramids) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Factory function for detector using LINE algorithm with color gradients.
|
||||
*
|
||||
* Default parameter settings suitable for VGA images.
|
||||
*/
|
||||
CV_EXPORTS Ptr<Detector> getDefaultLINE();
|
||||
|
||||
/**
|
||||
* \brief Factory function for detector using LINE-MOD algorithm with color gradients
|
||||
* and depth normals.
|
||||
*
|
||||
* Default parameter settings suitable for VGA images.
|
||||
*/
|
||||
CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
|
||||
|
||||
} // namespace linemod
|
||||
} // namespace cv
|
||||
|
||||
#endif // __OPENCV_OBJDETECT_LINEMOD_HPP__
|
Reference in New Issue
Block a user