[DEBUG] correct regExp bug when arrive to the size end parsing values

This commit is contained in:
Edouard DUPIN 2017-08-14 22:15:39 +02:00
parent 6ae64a8c18
commit 867064d9eb
2 changed files with 60 additions and 34 deletions

View File

@ -13,9 +13,9 @@
#include <vector> #include <vector>
#include <memory> #include <memory>
//#define TK_REG_DEBUG TK_HIDDEN #define TK_REG_DEBUG TK_HIDDEN
//#define TK_REG_DEBUG TK_VERBOSE //#define TK_REG_DEBUG TK_VERBOSE
#define TK_REG_DEBUG TK_DEBUG //#define TK_REG_DEBUG TK_DEBUG
#define TK_REG_DEBUG_3 TK_HIDDEN #define TK_REG_DEBUG_3 TK_HIDDEN
//#define TK_REG_DEBUG_3 TK_VERBOSE //#define TK_REG_DEBUG_3 TK_VERBOSE
@ -103,8 +103,14 @@ normal mode :
$ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@ $ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@
@ Previous @ Previous
==> TODO : ==> TODO :
Sart of line Start of line
force regexp to be the shortest. force regexp to be the shortest.
short: [a-z]{2,4}? in string " abghjkh " ==> find "ab"
gready: [a-z]{2,4} in string " abghjkh " ==> find "abgh"
short: [a-z]+? ==> at least 1
short: [a-z]+
short: [a-z]*? ==> at least 0
short: [a-z]*
multiplicity: multiplicity:
* ==> {0, 2147483647} (try to have the minimum size) * ==> {0, 2147483647} (try to have the minimum size)
@ -112,6 +118,9 @@ multiplicity:
+ ==> {1, 2147483647} (try to have the minimum size) + ==> {1, 2147483647} (try to have the minimum size)
{x} ==> {x, x} (try to have the minimum size) {x} ==> {x, x} (try to have the minimum size)
{x,y} ==> {x, y} (try to have the minimum size) {x,y} ==> {x, y} (try to have the minimum size)
option of the system parsing:
enable-multiple-lines ...
*/ */
/** /**
* @brief convertion table of every element in a regular expression. * @brief convertion table of every element in a regular expression.
@ -718,18 +727,18 @@ template<class CLASS_TYPE> class NodeSOL : public Node<CLASS_TYPE> {
|| tmpVal == 0x0A /* <lf> */) { || tmpVal == 0x0A /* <lf> */) {
findLen += 1; findLen += 1;
} else { } else {
tmpFind=false; tmpFind = false;
} }
_property.multiplicityIncrement(); _property.multiplicityIncrement();
} }
_property.setPositionStop(_property.getPositionStart() + findLen); _property.setPositionStop(_property.getPositionStart() + findLen);
if( _property.getMultiplicity()>=Node<CLASS_TYPE>::m_multipleMin if( _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin
&& _property.getMultiplicity()<=Node<CLASS_TYPE>::m_multipleMax && _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax
&& findLen>0 ) { && findLen > 0 ) {
TK_REG_DEBUG("find " << findLen); TK_REG_DEBUG("find " << findLen);
_property.setStatus(parseStatusFull); _property.setStatus(parseStatusFull);
return; return;
} else if( 0 == Node<CLASS_TYPE>::m_multipleMin ) { } else if(Node<CLASS_TYPE>::m_multipleMin == 0) {
TK_REG_DEBUG("find size=0"); TK_REG_DEBUG("find size=0");
_property.setStatus(parseStatusFull); _property.setStatus(parseStatusFull);
return; return;
@ -792,7 +801,7 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
switch (Node<CLASS_TYPE>::m_regExpData[pos]) { switch (Node<CLASS_TYPE>::m_regExpData[pos]) {
case regexpOpcodePTheseIn:{ case regexpOpcodePTheseIn:{
elementSize=getLenOfPThese(Node<CLASS_TYPE>::m_regExpData, pos); elementSize=getLenOfPThese(Node<CLASS_TYPE>::m_regExpData, pos);
for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { for (int64_t kkk=pos+1; kkk<pos+elementSize+1; ++kkk) {
tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]);
} }
// add to the subnode list : // add to the subnode list :
@ -806,7 +815,7 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
return false; return false;
case regexpOpcodeBracketIn: { case regexpOpcodeBracketIn: {
elementSize=getLenOfBracket(Node<CLASS_TYPE>::m_regExpData, pos); elementSize=getLenOfBracket(Node<CLASS_TYPE>::m_regExpData, pos);
for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { for (int64_t kkk=pos+1; kkk<pos+elementSize+1; ++kkk) {
tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]);
} }
// add to the subnode list : // add to the subnode list :
@ -819,13 +828,13 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
TK_ERROR("Impossible case : ']' " << pos); TK_ERROR("Impossible case : ']' " << pos);
return false; return false;
case regexpOpcodeBracetIn: { case regexpOpcodeBracetIn: {
elementSize=getLenOfBrace(Node<CLASS_TYPE>::m_regExpData, pos); elementSize = getLenOfBrace(Node<CLASS_TYPE>::m_regExpData, pos);
for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { for (int64_t kkk=pos+1; kkk<pos+elementSize+1; ++kkk) {
tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]);
} }
uint32_t min = 0; uint32_t min = 0;
uint32_t max = 0; uint32_t max = 0;
if (false == parseBrace(tmpData, min, max)) { if (parseBrace(tmpData, min, max) == false) {
return false; return false;
} }
setMultiplicityOnLastNode(min, max); setMultiplicityOnLastNode(min, max);
@ -983,7 +992,9 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) {
//TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) data to parse : '" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'"); //TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) data to parse : '" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'");
//TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) m_data='" << autoStr(Node<CLASS_TYPE>::m_data) << "'"); //TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) m_data='" << autoStr(Node<CLASS_TYPE>::m_data) << "'");
TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) " << _property); TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) " << _property);
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " work on: " << createString(Node<CLASS_TYPE>::m_regExpData));
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " pos=" << _currentPos << " ==> " << _lenMax);
int findLen = 0; int findLen = 0;
bool error = false; bool error = false;
size_t iii = 0; size_t iii = 0;
@ -993,17 +1004,20 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
// rewind the list: // rewind the list:
bool findPartialNode = false; bool findPartialNode = false;
for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) { for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) {
if (_property.m_subProperty[jjj].getPositionStart() < _currentPos) {
break;
}
if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) { if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) {
findPartialNode = true; findPartialNode = true;
prop = _property.m_subProperty[jjj]; prop = _property.m_subProperty[jjj];
tmpCurrentPos = prop.getPositionStop(); tmpCurrentPos = prop.getPositionStop();
_property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end()); _property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end());
iii = jjj; iii = jjj;
TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) rewind=" << iii); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) rewind=" << iii);
break; break;
} }
} }
// We did not find the element : // We did not find the element:
if (findPartialNode == false) { if (findPartialNode == false) {
_property.m_subProperty.clear(); _property.m_subProperty.clear();
_property.reset(); _property.reset();
@ -1154,12 +1168,17 @@ template<class CLASS_TYPE> class NodePThese : public Node<CLASS_TYPE> {
return _data.size(); return _data.size();
}; };
virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) {
TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) {" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "}"); TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) {" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "}");
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << createString(Node<CLASS_TYPE>::m_regExpData)); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " work on: " << createString(Node<CLASS_TYPE>::m_regExpData));
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " pos=" << _currentPos << " ==> " << _lenMax);
TK_REG_DEBUG_2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) data='" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'"); TK_REG_DEBUG_2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) data='" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'");
TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) input property=" << _property); TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) input property=" << _property);
if (0 == m_subNode.size()) { if (m_subNode.size() == 0) {
_property.setStatus(parseStatusNone);
return;
}
// prevent overflow
if (_currentPos == _lenMax) {
_property.setStatus(parseStatusNone); _property.setStatus(parseStatusNone);
return; return;
} }
@ -1271,9 +1290,7 @@ template<class CLASS_TYPE> class NodePThese : public Node<CLASS_TYPE> {
_property.setPositionStop(_property.getPositionStart()); _property.setPositionStop(_property.getPositionStart());
} else { } else {
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) sub finished ..."); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) sub finished ...");
for (auto &it: _property.m_subProperty) { FindProperty::display<CLASS_TYPE>(_property, _data, 2);
FindProperty::display<CLASS_TYPE>(it, _data, 2);
}
_property.setPositionStop(_property.m_subProperty.back().getPositionStop()); _property.setPositionStop(_property.m_subProperty.back().getPositionStop());
} }
if( _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin if( _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin
@ -1451,10 +1468,10 @@ template<class CLASS_TYPE> class RegExp {
} }
int64_t jjj; int64_t jjj;
// Find the element in the list... // Find the element in the list...
for (jjj=0; jjj<regexp::constConvertionTableSize; jjj++) { for (jjj=0; jjj<regexp::constConvertionTableSize; ++jjj) {
if ( regexp::constConvertionTable[jjj].haveBackSlash == true if ( regexp::constConvertionTable[jjj].haveBackSlash == true
&& _regexp[iii+1] == (char32_t)regexp::constConvertionTable[jjj].inputValue) { && _regexp[iii+1] == (char32_t)regexp::constConvertionTable[jjj].inputValue) {
if (regexp::constConvertionTable[jjj].newValue==0) { if (regexp::constConvertionTable[jjj].newValue == 0) {
tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar); tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar);
} else { } else {
tmpExp.push_back(regexp::constConvertionTable[jjj].newValue); tmpExp.push_back(regexp::constConvertionTable[jjj].newValue);
@ -1463,7 +1480,7 @@ template<class CLASS_TYPE> class RegExp {
} }
} }
// check error : // check error :
if (jjj==regexp::constConvertionTableSize) { if (jjj == regexp::constConvertionTableSize) {
TK_WARNING(" parse : " << _regexp); TK_WARNING(" parse : " << _regexp);
TK_WARNING(" " << etk::regexp::strTick(iii+1)); TK_WARNING(" " << etk::regexp::strTick(iii+1));
TK_ERROR("Dangerous parse of the \\x with the value : '" << _regexp[iii+1] << "' at element " << iii); TK_ERROR("Dangerous parse of the \\x with the value : '" << _regexp[iii+1] << "' at element " << iii);
@ -1487,11 +1504,11 @@ template<class CLASS_TYPE> class RegExp {
} }
int64_t jjj; int64_t jjj;
// find the element in the list... // find the element in the list...
for (jjj=0; jjj<regexp::constConvertionTableSize; jjj++) { for (jjj=0; jjj<regexp::constConvertionTableSize; ++jjj) {
if( false == regexp::constConvertionTable[jjj].haveBackSlash if( regexp::constConvertionTable[jjj].haveBackSlash == false
&& _regexp[iii] == (char32_t)regexp::constConvertionTable[jjj].inputValue) && _regexp[iii] == (char32_t)regexp::constConvertionTable[jjj].inputValue)
{ {
if (regexp::constConvertionTable[jjj].newValue==0) { if (regexp::constConvertionTable[jjj].newValue == 0) {
tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar); tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar);
} else { } else {
tmpExp.push_back(regexp::constConvertionTable[jjj].newValue); tmpExp.push_back(regexp::constConvertionTable[jjj].newValue);
@ -1500,7 +1517,7 @@ template<class CLASS_TYPE> class RegExp {
} }
} }
// not find : normal element // not find : normal element
if (jjj==regexp::constConvertionTableSize) { if (jjj == regexp::constConvertionTableSize) {
//TK_REG_DEBUG("parse : '" << _regexp[iii] << "'" ); //TK_REG_DEBUG("parse : '" << _regexp[iii] << "'" );
tmpExp.push_back(_regexp[iii]); tmpExp.push_back(_regexp[iii]);
} }
@ -1588,9 +1605,10 @@ template<class CLASS_TYPE> class RegExp {
bool parse(const CLASS_TYPE& _SearchIn, bool parse(const CLASS_TYPE& _SearchIn,
int64_t _startPos, int64_t _startPos,
int64_t _endPos) { int64_t _endPos) {
if (false == m_isOk) { if (m_isOk == false) {
return false; return false;
} }
TK_REG_DEBUG("Request parse : " << _startPos << " --> " << _endPos);
int64_t buflen = _SearchIn.size(); int64_t buflen = _SearchIn.size();
if (_endPos > buflen) { if (_endPos > buflen) {
_endPos = buflen; _endPos = buflen;
@ -1678,7 +1696,7 @@ template<class CLASS_TYPE> class RegExp {
bool processOneElement(const CLASS_TYPE& _SearchIn, bool processOneElement(const CLASS_TYPE& _SearchIn,
int64_t _startPos, int64_t _startPos,
int64_t _endPos) { int64_t _endPos) {
if (false == m_isOk) { if (m_isOk == false) {
return false; return false;
} }
int64_t buflen = _SearchIn.size(); int64_t buflen = _SearchIn.size();
@ -1690,7 +1708,7 @@ template<class CLASS_TYPE> class RegExp {
} }
int64_t findLen=0; int64_t findLen=0;
int64_t maxlen = _endPos-_startPos; int64_t maxlen = _endPos-_startPos;
if (true == m_notBeginWithChar) { if (m_notBeginWithChar == true) {
if (_startPos>0) { if (_startPos>0) {
char32_t tmpVal = _SearchIn[_startPos-1]; char32_t tmpVal = _SearchIn[_startPos-1];
if( ( tmpVal >= 'a' if( ( tmpVal >= 'a'
@ -1918,7 +1936,7 @@ template<class CLASS_TYPE> class RegExp {
|| _tmpExp[pos] == regexpOpcodeBracetIn) || _tmpExp[pos] == regexpOpcodeBracetIn)
{ {
// attention the i position change inside the finction... // attention the i position change inside the finction...
if (false==checkGoodPosition(_tmpExp, pos) ) { if (checkGoodPosition(_tmpExp, pos) == false) {
TK_ERROR("Error at position : " << pos+1 ); TK_ERROR("Error at position : " << pos+1 );
return false; return false;
} else { } else {

View File

@ -80,10 +80,18 @@ TEST(TestEtkRegExp, complexString ) {
EXPECT_EQ(res, std::make_pair(7,31)); EXPECT_EQ(res, std::make_pair(7,31));
} }
TEST(TestEtkRegExp, multipleUnderscore ) {
std::string expression = "\\@\\w+_\\@";
std::string dataToParse = " aaa_bbb_ plop_ ";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, dataToParse);
EXPECT_EQ(res, std::make_pair(2,10));
}
TEST(TestEtkRegExp, endError ) { TEST(TestEtkRegExp, endError ) {
std::string expression = "\\@((0(x|X)[0-9a-fA-F]*)|(\\d+\\.?\\d*|\\.\\d+)((e|E)(\\+|\\-)?\\d+)?)(LL|L|l|UL|ul|u|U|F|f)?\\@"; std::string expression = "\\@((0(x|X)[0-9a-fA-F]*)|(\\d+\\.?\\d*|\\.\\d+)((e|E)(\\+|\\-)?\\d+)?)(LL|L|l|UL|ul|u|U|F|f)?\\@";
std::string dataToParse = "(9,"; std::string dataToParse = "(95";
std::pair<int32_t, int32_t> res; std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, dataToParse); res = testRegExpSingle(expression, dataToParse);
EXPECT_EQ(res, std::make_pair(7,31)); EXPECT_EQ(res, std::make_pair(7,31));