From d162f2a70da7131e2544eae27af3d926414682ae Mon Sep 17 00:00:00 2001 From: Edouard DUPIN Date: Thu, 2 Oct 2014 23:22:51 +0200 Subject: [PATCH] [DEV] corection of the RegExp properties --- etk/RegExp.h | 43 ++++++++++++++++++++++++++++++------------- test/main.cpp | 22 ++++++++++++++++++---- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/etk/RegExp.h b/etk/RegExp.h index 09a1b48..d75f201 100644 --- a/etk/RegExp.h +++ b/etk/RegExp.h @@ -208,6 +208,9 @@ class FindProperty { } void setPositionStop(int64_t _newPos) { m_positionStop = _newPos; + if (m_positionStop < m_positionStart) { + TK_CRITICAL("set volontary a stop position before end : " << this); + } } uint32_t getMultiplicity() const { return m_multiplicity; @@ -420,18 +423,20 @@ template class NodeValue : public Node { }; virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " Value{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : " << (char)m_data[0]); + TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " " << createString(Node::m_regExpData)); TK_REG_DEBUG_3(" " << levelSpace(Node::m_nodeLevel) << " Value " << _property); if (m_data.size() == 0) { TK_ERROR("No data inside type elemTypeValue"); _property.setStatus(parseStatusNone); return; } - if ( _property.getPositionStop() < 0 - && Node::m_multipleMin == 0 - && _property.getMultiplicity() == 0) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusPartial); - return; + if (_property.getStatus() != parseStatusPartial) { + if (Node::m_multipleMin == 0) { + _property.setPositionStop(_property.getPositionStart()); + _property.setStatus(parseStatusPartial); + TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " ==> partial (minSize=0)"); + return; + } } bool tmpFind = true; int32_t findLen = 0; @@ -531,13 +536,15 @@ template class NodeRangeValue : public Node { virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { int32_t findLen = 0; TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " " << getDescriptiveName() << "{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " " << createString(Node::m_regExpData)); TK_REG_DEBUG_3(" " << levelSpace(Node::m_nodeLevel) << " " << getDescriptiveName() << " " << _property); - if ( _property.getPositionStop() < 0 - && Node::m_multipleMin == 0 - && _property.getMultiplicity() == 0) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusPartial); - return; + if (_property.getStatus() != parseStatusPartial) { + if (Node::m_multipleMin == 0) { + _property.setPositionStop(_property.getPositionStart()); + _property.setStatus(parseStatusPartial); + TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " ==> partial (minSize=0)"); + return; + } } char32_t tmpVal = _data[_currentPos]; bool find = false; @@ -695,6 +702,7 @@ template class NodeSOL : public Node { int32_t findLen = 0; bool tmpFind = false; TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " SOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " " << createString(Node::m_regExpData)); // TODO : is it really what I want ... (maybe next ellement will be requested... (check if previous element is \r or \n while ( _property.getMultiplicity() < Node::m_multipleMax && tmpFind == true @@ -1036,10 +1044,10 @@ template class NodePTheseElem : public Node { continue; } } - tmpCurrentPos = prop.getPositionStop(); if (prop.getPositionStart() > prop.getPositionStop()) { TK_CRITICAL("Very bad case ... : " << prop); } + tmpCurrentPos = prop.getPositionStop(); _property.m_subProperty.push_back(prop); TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") === OK === find : " << prop); prop.reset(); @@ -1150,12 +1158,21 @@ template class NodePThese : public Node { virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " (...) {" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); + TK_REG_DEBUG(" " << levelSpace(Node::m_nodeLevel) << " " << createString(Node::m_regExpData)); TK_REG_DEBUG_2(" " << levelSpace(Node::m_nodeLevel) << " (...) data='" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'"); TK_REG_DEBUG_3(" " << levelSpace(Node::m_nodeLevel) << " (...) input property=" << _property); if (0 == m_subNode.size()) { _property.setStatus(parseStatusNone); return; } + if (_property.getStatus() != parseStatusPartial) { + if (Node::m_multipleMin == 0) { + _property.setStatus(parseStatusPartial); + _property.setPositionStop(_property.getPositionStart()); + TK_REG_DEBUG("Parse " << levelSpace(Node::m_nodeLevel) << " ==> partial (minSize=0)"); + return; + } + } bool haveSubPartial = false; for (int64_t iii=_property.m_subProperty.size()-1; iii>=0; --iii) { if (_property.m_subProperty[iii].getStatus() == parseStatusPartial) { diff --git a/test/main.cpp b/test/main.cpp index 4c38a2a..d7721cb 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -253,6 +253,7 @@ void testRegExp() { data = "ddfgdfgh"; etk::RegExp reg(".*"); reg.setMaximize(true); + TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); if (reg.parse(data, 0, data.size()) == true) { //if (reg.processOneElement(data, 0, data.size()) == true) { @@ -260,19 +261,32 @@ void testRegExp() { TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); } - data = "plop \"\" sdfsdf s\"swdfsqd"; + data = "plop \"\" sdfsdf s\"swdfsqd sdfgsdfg \" \" sdfsf"; reg = etk::RegExp("\"(\\\\[\\\\\"]|.)*\""); - //reg.setMaximize(true); + reg.setMaximize(false); TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); if (reg.parse(data, 0, data.size()) == true) { //if (reg.processOneElement(data, 0, data.size()) == true) { TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); } - //TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*" - + data = "void limit(const vec2& _origin, const vec2& _size);\n"; + reg = etk::RegExp("\\@(\\w|_)+[ \\t]*\\("); + reg.setMaximize(false); + TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); + if (reg.parse(data, 0, data.size()) == true) { + //if (reg.processOneElement(data, 0, data.size()) == true) { + TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); + TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); + } + data = "void limit const vec2& _origin, const vec2& _size);\n"; + if (reg.parse(data, 0, data.size()) == true) { + //if (reg.processOneElement(data, 0, data.size()) == true) { + TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); + TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); + } } int main(int argc, const char *argv[]) {