[DEV] add EOF on regexp ==> not work ...

This commit is contained in:
Edouard DUPIN 2014-08-07 21:22:18 +02:00
parent 13f7a395ba
commit 4c55bca473
2 changed files with 81 additions and 16 deletions

View File

@ -31,6 +31,8 @@ const struct etk::convertionTable etk::regexp::constConvertionTable[] = {
{ true , '*' , '*' , etk::regexpOpcodeError}, { true , '*' , '*' , etk::regexpOpcodeError},
{ false , '.' , 0 , etk::regexpOpcodeDot}, { false , '.' , 0 , etk::regexpOpcodeDot},
{ true , '.' , '.' , etk::regexpOpcodeError}, { true , '.' , '.' , etk::regexpOpcodeError},
{ true , 'e' , 0 , etk::regexpOpcodeEOF},
{ false , 'e' , 'e' , etk::regexpOpcodeError},
{ false , '?' , 0 , etk::regexpOpcodeQuestion}, { false , '?' , 0 , etk::regexpOpcodeQuestion},
{ true , '?' , '?' , etk::regexpOpcodeError}, { true , '?' , '?' , etk::regexpOpcodeError},
{ false , '+' , 0 , etk::regexpOpcodePlus}, { false , '+' , 0 , etk::regexpOpcodePlus},
@ -106,6 +108,7 @@ std::string etk::regexp::createString(const std::vector<char32_t>& _data, int64_
case regexpOpcodeSpaceNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\S" + ETK_BASH_COLOR_NORMAL; break; case regexpOpcodeSpaceNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\S" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeWord: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\w" + ETK_BASH_COLOR_NORMAL; break; case regexpOpcodeWord: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\w" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeWordNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\W" + ETK_BASH_COLOR_NORMAL; break; case regexpOpcodeWordNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\W" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeEOF: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\e" + ETK_BASH_COLOR_NORMAL; break;
case '\n': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\n" + ETK_BASH_COLOR_NORMAL; break; case '\n': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\n" + ETK_BASH_COLOR_NORMAL; break;
case '\t': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\t" + ETK_BASH_COLOR_NORMAL; break; case '\t': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\t" + ETK_BASH_COLOR_NORMAL; break;
default: default:

View File

@ -70,6 +70,7 @@ enum regExpPrivateSection {
regexpOpcodeTo,/* - */ regexpOpcodeTo,/* - */
regexpOpcodeStar,/* * */ regexpOpcodeStar,/* * */
regexpOpcodeDot,/* . */ regexpOpcodeDot,/* . */
regexpOpcodeEOF,/* \e */
regexpOpcodeQuestion,/* ? */ regexpOpcodeQuestion,/* ? */
regexpOpcodePlus,/* + */ regexpOpcodePlus,/* + */
regexpOpcodePipe,/* | */ regexpOpcodePipe,/* | */
@ -88,19 +89,20 @@ enum regExpPrivateSection {
}; };
/* /*
normal mode : normal mode :
(...) sub element is separate with | (...) sub element is separate with |
\d Digits [0-9] \d Digits [0-9]
\D NOT a digit [^0-9] \D NOT a digit [^0-9]
\l Letters [a-zA-Z] \l Letters [a-zA-Z]
\L NOT a Letter [^a-zA-Z] \L NOT a Letter [^a-zA-Z]
\s Whitespace [ \t\n\r\f\v] \s Whitespace [ \t\n\r\f\v]
\S NOT Whitespace [^ \t\n\r\f\v] \S NOT Whitespace [^ \t\n\r\f\v]
\w "Word" character [a-zA-Z0-9_] \w "Word" character [a-zA-Z0-9_]
\W NOT a "Word" character [^a-zA-Z0-9_] \W NOT a "Word" character [^a-zA-Z0-9_]
\@ at the start or the end not in the parsing of element ==> check if \w is not present (other regExp will be <> ...) \@ at the start or the end not in the parsing of element ==> check if \w is not present (other regExp will be <> ...)
[anjdi] or [a-gt-j] range \e end-of-file / end-of-data [\x00] ==> not counted
. dot [^\x00-\x08\x0A-\x1F\x7F] [anjdi] or [a-gt-j] range
$ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@ . dot [^\x00]
$ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@
@ Previous @ Previous
==> TODO : ==> TODO :
^in the [] invertion of the range element ^in the [] invertion of the range element
@ -276,8 +278,10 @@ template<class CLASS_TYPE> class Node {
Node(int32_t _level) : Node(int32_t _level) :
m_regExpData(), m_regExpData(),
m_nodeLevel(_level), m_nodeLevel(_level),
m_canHaveMultiplicity(true),
m_multipleMin(1), m_multipleMin(1),
m_multipleMax(1) { m_multipleMax(1),
m_countOutput(true) {
}; };
/** /**
@ -310,7 +314,29 @@ template<class CLASS_TYPE> class Node {
virtual void display() { virtual void display() {
TK_INFO("Find NODE : " << levelSpace(m_nodeLevel) << "@???@ {" << getMultMin() << "," << getMultMax() << "} subdata=" << createString(m_regExpData) ); TK_INFO("Find NODE : " << levelSpace(m_nodeLevel) << "@???@ {" << getMultMin() << "," << getMultMax() << "} subdata=" << createString(m_regExpData) );
}; };
protected : protected:
bool m_canHaveMultiplicity; //!< minimum repetition (included)
public:
/**
* @brief Set the multiplicity capabilities.
* @paran[in] _newVal new capabilities.
*/
void setMultiplicityAbility(bool _newVal) {
m_canHaveMultiplicity = _newVal;
if (_newVal == false) {
m_multipleMin = 1;
m_multipleMax = 1;
}
};
protected:
/**
* @brief Get the multiplicity capabilities.
* @return Multiplicity availlable.
*/
bool getMultiplicityAbility() const {
return m_canHaveMultiplicity;
};
protected:
uint32_t m_multipleMin; //!< minimum repetition (included) uint32_t m_multipleMin; //!< minimum repetition (included)
uint32_t m_multipleMax; //!< maximum repetition (included) uint32_t m_multipleMax; //!< maximum repetition (included)
public: public:
@ -320,6 +346,10 @@ template<class CLASS_TYPE> class Node {
* @param[in] _max The maximum appear time. * @param[in] _max The maximum appear time.
*/ */
void setMult(uint32_t _min, uint32_t _max) { void setMult(uint32_t _min, uint32_t _max) {
if (m_canHaveMultiplicity == false) {
TK_WARNING("can not set multiplicity ...");
return;
}
m_multipleMin = std::max(_min, (uint32_t)0); m_multipleMin = std::max(_min, (uint32_t)0);
m_multipleMax = std::max(_max, (uint32_t)1); m_multipleMax = std::max(_max, (uint32_t)1);
} }
@ -338,6 +368,24 @@ template<class CLASS_TYPE> class Node {
uint32_t getMultMax() const { uint32_t getMultMax() const {
return m_multipleMax; return m_multipleMax;
}; };
protected:
bool m_countOutput; //!< minimum repetition (included)
public:
/**
* @brief Set the output count availlable in regexp.
* @paran[in] _newVal new capabilities.
*/
void setCountOutput(bool _newVal) {
m_countOutput = _newVal;
};
protected:
/**
* @brief Get the output count availlable in regexp.
* @return count availlable.
*/
bool getCountOutput() const {
return m_countOutput;
};
}; };
#undef __class__ #undef __class__
@ -533,7 +581,11 @@ template<class CLASS_TYPE> class NodeRangeValue : public Node<CLASS_TYPE> {
_property.multiplicityDecrement(); _property.multiplicityDecrement();
_property.setStatus(parseStatusFull); _property.setStatus(parseStatusFull);
} else { } else {
_property.setPositionStop(newPosVal); if (Node<CLASS_TYPE>::getCountOutput() == true) {
_property.setPositionStop(newPosVal);
} else {
_property.setPositionStop(_property.getPositionStart());
}
if (_currentPos>=_lenMax) { if (_currentPos>=_lenMax) {
_property.setStatus(parseStatusFull); _property.setStatus(parseStatusFull);
} else { } else {
@ -784,6 +836,16 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
case regexpOpcodePipe: case regexpOpcodePipe:
TK_ERROR("Impossible case : '|' " << pos); TK_ERROR("Impossible case : '|' " << pos);
return false; return false;
case regexpOpcodeEOF:
{
NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1);
tmpNode->setDescriptiveName("EOF");
tmpNode->addValue('\0');
tmpNode->setCountOutput(false);
tmpNode->setMultiplicityAbility(false);
m_subNode.push_back(tmpNode);
}
break;
case regexpOpcodeDot: case regexpOpcodeDot:
{ {
NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1);