From ce3243a5f438c6dc3158b5f1c786741132a51a6f Mon Sep 17 00:00:00 2001 From: Edouard DUPIN Date: Wed, 27 Aug 2014 21:25:46 +0200 Subject: [PATCH] [DEBUG] new regexp maximizing of the search --- etk/RegExp.h | 48 +++++++++++++++++++++++++++++++++++++++++------- test/main.cpp | 23 ++++++++++++++++++++--- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/etk/RegExp.h b/etk/RegExp.h index f5d2c8b..8145040 100644 --- a/etk/RegExp.h +++ b/etk/RegExp.h @@ -20,7 +20,8 @@ //#define TK_REG_DEBUG TK_VERBOSE //#define TK_REG_DEBUG TK_DEBUG -#define TK_REG_DEBUG_3 TK_VERBOSE +#define TK_REG_DEBUG_3 TK_HIDDEN +//#define TK_REG_DEBUG_3 TK_VERBOSE //#define TK_REG_DEBUG_3 TK_DEBUG #define TK_REG_DEBUG_2 TK_HIDDEN @@ -1044,7 +1045,11 @@ template class NodePTheseElem : public Node { break; } } - _property.setPositionStop( _property.m_subProperty.back().getPositionStop() ); + if (_property.m_subProperty.size()>0) { + _property.setPositionStop(_property.m_subProperty.back().getPositionStop() ); + } else { + TK_WARNING("RegExp ERROR"); + } TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " (elem) return=" << _property); } @@ -1186,6 +1191,9 @@ template class NodePThese : public Node { tmpFind = false; if (tmpCurrentPos+offset>=_lenMax) { TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " (... ---/" << m_subNode.size() << ") ==> out of range : " << tmpCurrentPos << "+" << offset << " >= " << _lenMax); + prop.setStatus(parseStatusFull); + _property.m_subProperty.push_back(prop); + break; } for (size_t iii=iiiStartPos; iii::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ")"); @@ -1315,6 +1323,7 @@ template class RegExp { bool m_isOk; //!< Known if we can process with this regExp bool m_notBeginWithChar; //!< The regular expression must not have previously a char [a-zA-Z0-9_] bool m_notEndWithChar; //!< The regular expression must not have after the end a char [a-zA-Z0-9_] + bool m_maximize; //!< by default the regexp find the minimum size of a regexp . public: // create the regular expression @@ -1326,7 +1335,8 @@ template class RegExp { m_expressionRequested(U""), m_isOk(false), m_notBeginWithChar(false), - m_notEndWithChar(false) { + m_notEndWithChar(false), + m_maximize(false) { m_areaFind.start=0; m_areaFind.stop=0; if (_exp.size() != 0) { @@ -1340,7 +1350,8 @@ template class RegExp { m_expressionRequested(U""), m_isOk(false), m_notBeginWithChar(false), - m_notEndWithChar(false) { + m_notEndWithChar(false), + m_maximize(false) { m_areaFind.start=0; m_areaFind.stop=0; if (_exp.size() != 0) { @@ -1354,7 +1365,13 @@ template class RegExp { ~RegExp() { m_isOk = false; }; - + /** + * @brief SetMaximizing of the regexp + * @param[in] _value Maximize or not the regExp + */ + void setMaximize(bool _value) { + m_maximize = _value; + } /** * @brief Set a new regular expression matching * @param[in] _exp the new expression to search @@ -1568,16 +1585,25 @@ template class RegExp { regexp::FindProperty prop; prop.setPositionStart(iii); bool needOneMoreCycle = true; + bool oneCycleDone = false; while (needOneMoreCycle == true) { needOneMoreCycle = false; m_exprRootNode.parse(_SearchIn, iii, _endPos, prop); + TK_REG_DEBUG("res=" << prop.getStatus()); + if ( prop.getStatus() == regexp::parseStatusNone + && m_maximize == true + && oneCycleDone == false) { + // TODO : do it better Patch the case of ".*" seach with maximizing + oneCycleDone = true; + needOneMoreCycle = true; + } if ( prop.getStatus() == regexp::parseStatusFull || prop.getStatus() == regexp::parseStatusPartial ) { findLen = prop.getFindLen(); TK_REG_DEBUG_3("main search find : " << findLen << " elements data=" << std::string(_SearchIn, prop.getPositionStart(), prop.getFindLen())); // Check end : if (m_notEndWithChar == true) { - TK_DEBUG("Check end is not a char: '" << (char)_SearchIn[iii+findLen] << "'"); + TK_REG_DEBUG("Check end is not a char: '" << (char)_SearchIn[iii+findLen] << "'"); if (_startPos+findLen < (int64_t)_SearchIn.size() ) { char32_t tmpVal = _SearchIn[iii+findLen]; if( ( tmpVal >= 'a' @@ -1588,11 +1614,15 @@ template class RegExp { && tmpVal <= '9' ) || ( tmpVal == '_' ) ) { // go on the next char ... - TK_DEBUG("Need one more cycle ..."); + TK_REG_DEBUG("Need one more cycle ..."); needOneMoreCycle = true; } } } + if ( m_maximize == true + && prop.getStatus() == regexp::parseStatusPartial) { + needOneMoreCycle = true; + } if (needOneMoreCycle == false) { m_areaFind.start = iii; m_areaFind.stop = iii + findLen; @@ -1665,6 +1695,10 @@ template class RegExp { } } } + if ( m_maximize == true + && prop.getStatus() == regexp::parseStatusPartial) { + needOneMoreCycle = true; + } if (needOneMoreCycle == false) { m_areaFind.start = _startPos; m_areaFind.stop = _startPos + findLen; diff --git a/test/main.cpp b/test/main.cpp index 9e2522c..bd2a66d 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -136,7 +136,7 @@ void testDimension() { void testColor() { TK_INFO("==> test of COLOR (START)"); - + /* etk::Color colorRGBA8(0x52,0x0F, 0x65, 0x44); etk::Color colorRGBA16(0x52,0x0F, 0x65, 0x44); etk::Color colorRGBA32(0x52,0x0F, 0x65, 0x44); @@ -150,6 +150,7 @@ void testColor() { etk::Color colorMono32(0x52); etk::Color colorMonoF(5200.22); etk::Color colorMonoD(520000.22); + */ /* etk::Color colorRGBA8__("#520F6544"); etk::Color colorRGBA16__("rgba(0x52, 0x0F, 0x65, 0x44)"); @@ -165,6 +166,7 @@ void testColor() { etk::Color colorMonoF__("mono(5200.22)"); etk::Color colorMonoD__("mono(520000.22)"); */ + /* etk::Color colorRGBAf__(colorRGBA8); etk::Color colorXX332__(colorRGBA8); @@ -183,7 +185,7 @@ void testColor() { TK_INFO("Create a color : MONO 32 : " << colorMono32); TK_INFO("Create a color : MONO float : " << colorMonoF); TK_INFO("Create a color : MONO double : " << colorMonoD); - + */ TK_INFO("==> test of Color (STOP)"); exit(0); } @@ -256,7 +258,22 @@ void testRegExp() { data = " 'dfgd\\'fg' \n" " vec2 m_offset; \n"; - testRegExpSingle("'((\\\\[\\\\'])|.)*'", data); + //testRegExpSingle("'((\\\\[\\\\'])|.)*'", data); + + + + data = "ddfgdfgh"; + etk::RegExp reg(".*"); + reg.setMaximize(true); + TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); + if (reg.parse(data, 0, data.size()) == true) { + //if (reg.processOneElement(data, 0, data.size()) == true) { + TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); + TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); + } + + //TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*" + }