From 99f5487ca6303716e51917d30a8dd7e53226f16a Mon Sep 17 00:00:00 2001 From: Edouard DUPIN Date: Tue, 22 Jul 2014 21:11:52 +0200 Subject: [PATCH] [DEV] change regexp log --- etk/RegExp.h | 113 ++++++++++++++++++++++++++------------------------ test/main.cpp | 29 +++++++++++-- 2 files changed, 83 insertions(+), 59 deletions(-) diff --git a/etk/RegExp.h b/etk/RegExp.h index f96446e..515ef4b 100644 --- a/etk/RegExp.h +++ b/etk/RegExp.h @@ -189,7 +189,7 @@ template class Node { * @return Partial can find more data ... * @return None Find nothing */ - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax)=0; + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level)=0; /** * @brief Display the current node properties * @param[in] level of the node @@ -292,8 +292,8 @@ template class NodeValue : public Node { return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { - TK_REG_EXP_DBG_MODE("Parse node : Value{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : " << (char)m_data[0]); + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " Value{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : " << (char)m_data[0]); if (m_data.size() == 0) { TK_ERROR("No data inside type elemTypeValue"); return parseStatusNone; @@ -305,7 +305,7 @@ template class NodeValue : public Node { uint32_t ofset = 0; int64_t kkk; for (kkk=0; findLen+kkk<_lenMax && kkk < (int64_t)m_data.size(); kkk++) { - TK_REG_EXP_DBG_MODE("check element value : '" << (char)m_data[kkk] << "' ?= '" << (char)_data[_currentPos+findLen+kkk] << "'"); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " check element value : '" << (char)m_data[kkk] << "' ?= '" << (char)_data[_currentPos+findLen+kkk] << "'"); if (m_data[kkk] != (char32_t)_data[_currentPos+findLen+kkk]) { tmpFind=false; break; @@ -326,10 +326,10 @@ template class NodeValue : public Node { if ( Node::m_multiplicity >= Node::m_multipleMin && Node::m_multiplicity <= Node::m_multipleMax && findLen > 0) { - TK_REG_EXP_DBG_MODE("value find " << Node::m_positionStop - Node::m_positionStart << " [" << Node::m_positionStart << ".." << Node::m_positionStop << "]"); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " value find " << Node::m_positionStop - Node::m_positionStart << " [" << Node::m_positionStart << ".." << Node::m_positionStop << "]"); return parseStatusFull; } else if (Node::m_multipleMin == 0) { - TK_REG_EXP_DBG_MODE("find size=0"); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " find size=0"); return parseStatusFull; } return parseStatusNone; @@ -397,14 +397,14 @@ template class NodeBracket : public Node { } return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { - TK_REG_EXP_DBG_MODE("Parse node : [...]{" << Node::m_multipleMin + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " [...] {" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); if (0==m_data.size()) { TK_ERROR("No data inside type elemTypeValue"); return parseStatusNone; } - TK_REG_EXP_DBG_MODE("one of element value List : " << createString(m_data)); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " one of element value List : " << createString(m_data)); bool tmpFind = true; int32_t tmpLen = 0; while ( Node::m_multiplicity < Node::m_multipleMax @@ -425,10 +425,10 @@ template class NodeBracket : public Node { && Node::m_multiplicity<=Node::m_multipleMax && tmpLen > 0) { - TK_REG_EXP_DBG_MODE("find " << tmpLen << " [" << Node::m_positionStart << " " << Node::m_positionStop << "]"); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " find " << tmpLen << " [" << Node::m_positionStart << " " << Node::m_positionStop << "]"); return parseStatusFull; } else if( 0 == Node::m_multipleMin ) { - TK_REG_EXP_DBG_MODE("find size=0"); + TK_REG_EXP_DBG_MODE(" " << levelSpace(_level) << " find size=0"); return parseStatusFull; } return parseStatusNone; @@ -456,9 +456,9 @@ template class NodeDigit : public Node { * @brief Destructor */ ~NodeDigit() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : Digit{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : "<< _data[_currentPos] << " lenMax=" << _lenMax); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " Digit{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : "<< _data[_currentPos] << " lenMax=" << _lenMax); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -510,9 +510,9 @@ template class NodeDigitNot : public Node { * @brief Destructor */ ~NodeDigitNot() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : DigitNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " DigitNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -558,9 +558,9 @@ template class NodeLetter : public Node { * @brief Destructor */ ~NodeLetter() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : Letter{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " Letter{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -611,9 +611,9 @@ template class NodeLetterNot : public Node { * @brief Destructor */ ~NodeLetterNot() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : LetterNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " LetterNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -664,9 +664,9 @@ template class NodeWhiteSpace : public Node { * @brief Destructor */ ~NodeWhiteSpace() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : Space{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " Space{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -719,9 +719,9 @@ template class NodeWhiteSpaceNot : public Node { * @brief Destructor */ ~NodeWhiteSpaceNot() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : SpaceNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " SpaceNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -774,9 +774,9 @@ template class NodeWordChar : public Node { * @brief Destructor */ ~NodeWordChar() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : Word{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " Word{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -828,9 +828,9 @@ template class NodeWordCharNot : public Node { * @brief Destructor */ ~NodeWordCharNot() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : WordNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " WordNot{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -883,18 +883,18 @@ template class NodeDot : public Node { * @brief Destructor */ ~NodeDot() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : '.'{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " '.'{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); // equivalent a : [^\x00-\x08\x0A-\x1F\x7F] char32_t tmpVal = _data[_currentPos]; - if( ( tmpVal > 0x08 + if( /*( tmpVal > 0x08 && tmpVal < 0x0A ) || ( tmpVal > 0x1F && tmpVal < 0x7F ) || ( tmpVal > 0x7F - && tmpVal < 0xFF ) ) { - TK_REG_EXP_DBG_MODE("Parse node : '.' find 1 '" << (char)tmpVal << "'" ); + && tmpVal < 0xFF )*/ /*tmpVal != 0x00*/ true ) { + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " '.' find 1 '" << (char)tmpVal << "'" ); Node::m_multiplicity++; int64_t newPosVal = Node::m_positionStop; if (Node::m_positionStop == -1) { @@ -908,11 +908,14 @@ template class NodeDot : public Node { return parseStatusFull; } else { Node::m_positionStop = newPosVal; - return parseStatusPartial; + if (_currentPos>=_lenMax) { + return parseStatusFull; + } else { + return parseStatusPartial; + } } } if (Node::m_positionStop != -1) { - return parseStatusFull; } return parseStatusNone; @@ -941,10 +944,10 @@ template class NodeSOL : public Node { * @brief Destructor */ ~NodeSOL() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; bool tmpFind = false; - TK_REG_EXP_DBG_MODE("Parse node : SOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " SOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); // TODO : is it really what I want ... (maybe next ellement will be requested... (check if previous element is \r or \n while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -995,9 +998,9 @@ template class NodeEOL : public Node { * @brief Destructor */ ~NodeEOL() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : EOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " EOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); bool tmpFind = true; while ( Node::m_multiplicity < Node::m_multipleMax && tmpFind == true @@ -1190,9 +1193,9 @@ template class NodePTheseElem : public Node { return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : (Elem){" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem)"); // NOTE 1 : Must done only one time in EVERY case ... // NOTE 2 : All element inside must be OK if (0 == m_subNode.size()) { @@ -1204,12 +1207,12 @@ template class NodePTheseElem : public Node { m_subNode[iii]->setPositionStart(tmpCurrentPos); int32_t offset = 0; do { - status = m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax); + status = m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax, _level+1); offset = m_subNode[iii]->getFindLen(); tmpCurrentPos = m_subNode[iii]->getPositionStop(); if ( status == parseStatusPartial && iii+1 class NodePTheseElem : public Node { m_subNode[jjj]->setPositionStart(tmpCurrentPos2); int32_t offset2 = 0; do { - status2 = m_subNode[jjj]->parse(_data, tmpCurrentPos2, _lenMax); + status2 = m_subNode[jjj]->parse(_data, tmpCurrentPos2, _lenMax, _level+2); offset2 = m_subNode[jjj]->getFindLen(); tmpCurrentPos2 = m_subNode[jjj]->getPositionStop(); } while (status2 == parseStatusPartial); @@ -1227,22 +1230,22 @@ template class NodePTheseElem : public Node { error = true; break; } else { - TK_REG_EXP_DBG_MODE("Parse node : (Elem) find : " << m_subNode[jjj]->getFindLen() << " [" << Node::m_positionStart << " " << tmpCurrentPos2 << "]"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem) 2 find : " << m_subNode[jjj]->getFindLen() << " [" << Node::m_positionStart << " " << tmpCurrentPos2 << "]"); } } if (error == false) { Node::m_positionStop = tmpCurrentPos2; - TK_REG_EXP_DBG_MODE("Parse node 2: (Elem) return : " << Node::getFindLen()); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem) 2 return : " << Node::getFindLen()); return parseStatusFull; } - TK_REG_EXP_DBG_MODE("Parse node 2: (Elem) second parse ... (done)"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem) 2 second parse ... (done)"); } } while (status == parseStatusPartial); if (status == parseStatusNone) { findLen = 0; return parseStatusNone; } else { - TK_REG_EXP_DBG_MODE("Parse node : (Elem) find : " << m_subNode[iii]->getFindLen() << " [" << Node::m_positionStart << " " << tmpCurrentPos << "]"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem) find : " << m_subNode[iii]->getFindLen() << " [" << Node::m_positionStart << " " << tmpCurrentPos << "]"); } } if (tmpCurrentPos<_currentPos) { @@ -1251,7 +1254,7 @@ template class NodePTheseElem : public Node { findLen = tmpCurrentPos - _currentPos; } Node::m_positionStop = tmpCurrentPos; - TK_REG_EXP_DBG_MODE("Parse node : (Elem) return : " << Node::getFindLen()); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (Elem) return : " << Node::getFindLen()); return parseStatusFull; }; @@ -1343,9 +1346,9 @@ template class NodePThese : public Node { } return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax) { + virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, int32_t _level) { int32_t findLen = 0; - TK_REG_EXP_DBG_MODE("Parse node : (...){" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (...){" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); if (0 == m_subNode.size()) { return parseStatusNone; } @@ -1358,9 +1361,9 @@ template class NodePThese : public Node { it->setPositionStart(_currentPos+findLen); int32_t offset = 0; do { - status = it->parse(_data, _currentPos+findLen+offset, _lenMax); + status = it->parse(_data, _currentPos+findLen+offset, _lenMax, _level+1); offset = it->getFindLen(); - TK_REG_EXP_DBG_MODE("Parse node : (...) mult=" << Node::m_multiplicity << " tmp " << it->getFindLen()); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (...) mult=" << Node::m_multiplicity << " tmp " << it->getFindLen()); } while (status == parseStatusPartial); if (status == parseStatusFull) { findLen += it->getFindLen(); @@ -1369,7 +1372,7 @@ template class NodePThese : public Node { } } - TK_REG_EXP_DBG_MODE("Parse node : (...) mult=" << Node::m_multiplicity << " find " << findLen); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(_level) << " (...) mult=" << Node::m_multiplicity << " find " << findLen); Node::m_multiplicity++; } Node::m_positionStop = Node::m_positionStart + findLen; @@ -1717,7 +1720,7 @@ template class RegExp { } m_exprRootNode.reset(); m_exprRootNode.setPositionStart(iii); - if (m_exprRootNode.parse(_SearchIn, iii, maxlen) == regexp::parseStatusFull) { + if (m_exprRootNode.parse(_SearchIn, iii, maxlen, 0) == regexp::parseStatusFull) { findLen = m_exprRootNode.getFindLen(); TK_DEBUG("main search find : " << findLen << " elements"); if ( _escapeChar != 0 @@ -1784,7 +1787,7 @@ template class RegExp { } m_exprRootNode.reset(); m_exprRootNode.setPositionStart(_startPos); - if (m_exprRootNode.parse(_SearchIn, _startPos, maxlen) == regexp::parseStatusFull) { + if (m_exprRootNode.parse(_SearchIn, _startPos, maxlen, 0) == regexp::parseStatusFull) { findLen = m_exprRootNode.getFindLen(); if ( _escapeChar != 0 && _startPos>0) { diff --git a/test/main.cpp b/test/main.cpp index 81dbcba..ca0c64e 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #undef __class__ #define __class__ "etktest" @@ -187,20 +188,40 @@ void testColor() { exit(0); } +void testRegExpSingle(const std::string& _expression, const std::string& _search) { + etk::RegExp expression(_expression); + TK_INFO("Parse RegEx : " << expression.getRegExDecorated()); + if (expression.parse(_search, 0, _search.size()) == true) { + TK_INFO(" match [" << expression.start() << ".." << expression.stop() << "] "); + TK_INFO(" ==> '" << std::string(_search, expression.start(), expression.stop() - expression.start()) << "'"); + } +} + +void testRegExp() { + std::string data = "/* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n"; + //std::string data = "alpha /* plop */ test"; + //std::string data = "pp \n // qdfqdfsdf \nde"; + testRegExpSingle("/\\*.*\\*/", data); + testRegExpSingle("//.*$", data); + testRegExpSingle("/\\*.*", data); + testRegExpSingle("[a-z]", data); +} int main(int argc, const char *argv[]) { // the only one init for etk: etk::log::setLevel(etk::log::logLevelDebug); etk::setArgZero(argv[0]); etk::initDefaultFolder("ewolApplNoName"); + //testVector(); //testUChar(); //testUString(); - testHash(); - testFSNode(); + //testHash(); + //testFSNode(); //testDimension(); - testArchive(); - testColor(); + //testArchive(); + //testColor(); + testRegExp(); return 0; }