diff --git a/etk/RegExp.cpp b/etk/RegExp.cpp index fbfc385..c2eb258 100644 --- a/etk/RegExp.cpp +++ b/etk/RegExp.cpp @@ -64,6 +64,20 @@ const struct etk::convertionTable etk::regexp::constConvertionTable[] = { }; const int64_t etk::regexp::constConvertionTableSize = sizeof(etk::regexp::constConvertionTable) / sizeof(struct etk::convertionTable) ; +static const char* parseStatusTable[] = { + "parseStatusUnknow", + "parseStatusNone", + "parseStatusPartial", + "parseStatusFull" +}; +std::ostream& etk::regexp::operator <<(std::ostream& _os, enum etk::regexp::parseStatus _obj) { + _os << parseStatusTable[_obj]; + return _os; +} +std::ostream& etk::regexp::operator <<(std::ostream& _os, const etk::regexp::FindProperty& _obj) { + _os << "property([" << _obj.getPositionStart() << "," << _obj.getPositionStop() << "]*" << _obj.getMultiplicity() << " " << _obj.getStatus() << ")"; + return _os; +} std::string etk::regexp::createString(const std::vector& _data, int64_t _start, int64_t _stop) { std::string output(ETK_BASH_COLOR_NORMAL); diff --git a/etk/RegExp.h b/etk/RegExp.h index 1fdf4ed..55b8310 100644 --- a/etk/RegExp.h +++ b/etk/RegExp.h @@ -124,6 +124,8 @@ enum parseStatus { parseStatusFull //!< can not parse more elements }; //! @not-in-doc +std::ostream& operator <<(std::ostream& _os, enum parseStatus _obj); +//! @not-in-doc extern const struct convertionTable constConvertionTable[]; //! @not-in-doc extern const int64_t constConvertionTableSize; @@ -158,28 +160,28 @@ class FindProperty { int64_t m_positionStop; //!< find end position uint32_t m_multiplicity; //!< curent multiplicity of find element std::vector m_subProperty; //!< list of all sub elements - enum parseStatus status; //!< curent status of parsing + enum parseStatus m_status; //!< curent status of parsing public: FindProperty() : m_positionStart(-1), m_positionStop(-1), m_multiplicity(0), - status(parseStatusUnknow) { + m_status(parseStatusUnknow) { // nothing to do ... } - int64_t getPositionStart() { + int64_t getPositionStart() const { return m_positionStart; } void setPositionStart(int64_t _newPos) { m_positionStart = _newPos; } - int64_t getPositionStop() { + int64_t getPositionStop() const { return m_positionStop; } void setPositionStop(int64_t _newPos) { m_positionStop = _newPos; } - uint32_t getMultiplicity() { + uint32_t getMultiplicity() const { return m_multiplicity; } void setMultiplicity(uint32_t _newVal) { @@ -191,12 +193,19 @@ class FindProperty { void multiplicityIncrement() { m_multiplicity++; } - int64_t getFindLen() { + int64_t getFindLen() const { if (m_positionStop < 0) { return 0; } return m_positionStop - m_positionStart; } + void setStatus(enum parseStatus _status) { + m_status = _status; + } + enum parseStatus getStatus() const { + return m_status; + } + void display(const std::string& _data, int32_t _level = 0) { TK_INFO("prop : " << levelSpace(_level) << " [" << m_positionStart << "," @@ -218,6 +227,8 @@ class FindProperty { } }; +std::ostream& operator <<(std::ostream& _os, const FindProperty& _obj); + #undef __class__ #define __class__ "regExp::Node" @@ -263,7 +274,7 @@ template class Node { * @return Partial can find more data ... * @return None Find nothing */ - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property)=0; + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property)=0; /** * @brief Display the current node properties * @param[in] level of the node @@ -327,11 +338,12 @@ template class NodeValue : public Node { } return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " Value{" << Node::m_multipleMin << "," << Node::m_multipleMax << "} : " << (char)m_data[0]); if (m_data.size() == 0) { TK_ERROR("No data inside type elemTypeValue"); - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; } bool tmpFind = true; int32_t findLen = 0; @@ -361,13 +373,16 @@ template class NodeValue : public Node { if ( _property.getMultiplicity() >= Node::m_multipleMin && _property.getMultiplicity() <= Node::m_multipleMax && findLen > 0) { - TK_REG_EXP_DBG_MODE(" " << levelSpace(Node::m_nodeLevel) << " value find " << _property.getPositionStop() - _property.getPositionStart() << " [" << _property.getPositionStart() << ".." << _property.getPositionStop() << "]"); - return parseStatusFull; + _property.setStatus(parseStatusFull); + TK_REG_EXP_DBG_MODE(" " << levelSpace(Node::m_nodeLevel) << " value find " << _property); + return; } else if (Node::m_multipleMin == 0) { - TK_REG_EXP_DBG_MODE(" " << levelSpace(Node::m_nodeLevel) << " find size=0"); - return parseStatusFull; + _property.setStatus(parseStatusFull); + TK_REG_EXP_DBG_MODE(" " << levelSpace(Node::m_nodeLevel) << " value find " << _property); + return; } - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; }; void display() { @@ -411,7 +426,7 @@ template class NodeRangeValue : public Node { return "auto-range"; } // Truc a faire : multipliciter min, return partiel, et ... - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { int32_t findLen = 0; TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " " << getDescriptiveName() << "{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); char32_t tmpVal = _data[_currentPos]; @@ -452,20 +467,25 @@ template class NodeRangeValue : public Node { } if(_property.getMultiplicity() > Node::m_multipleMax) { _property.multiplicityDecrement(); - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } else { _property.setPositionStop(newPosVal); if (_currentPos>=_lenMax) { - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } else { - return parseStatusPartial; + _property.setStatus(parseStatusPartial); + return; } } } if (_property.getPositionStop() != -1) { - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; }; virtual void display() { TK_INFO("Find NODE : " << levelSpace(Node::m_nodeLevel) << " " << getDescriptiveName() << " {" @@ -746,7 +766,7 @@ template class NodeSOL : public Node { * @brief Destructor */ ~NodeSOL() { }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { int32_t findLen = 0; bool tmpFind = false; TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " SOL{" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); @@ -769,12 +789,15 @@ template class NodeSOL : public Node { && _property.getMultiplicity()<=Node::m_multipleMax && findLen>0 ) { TK_REG_EXP_DBG_MODE("find " << findLen); - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } else if( 0 == Node::m_multipleMin ) { TK_REG_EXP_DBG_MODE("find size=0"); - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; }; void display() { TK_INFO("Find NODE : " << levelSpace(Node::m_nodeLevel) << "@SOL@ {" @@ -942,61 +965,66 @@ template class NodePTheseElem : public Node { } return _data.size(); }; - - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { + private: + void parseInternal(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property, size_t _startListIndex) { + + } + public: + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { int32_t findLen = 0; TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem)"); // NOTE 1 : Must done only one time in EVERY case ... // NOTE 2 : All element inside must be OK if (0 == m_subNode.size()) { - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; } int64_t tmpCurrentPos = _currentPos; for (size_t iii=0; iiiparse(_data, tmpCurrentPos, _lenMax, prop); + m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax, prop); offset = prop.getFindLen(); tmpCurrentPos = prop.getPositionStop(); - if ( status == parseStatusPartial + if ( prop.getStatus() == parseStatusPartial && iii+1::m_nodeLevel+1) << " (Elem) 2 second parse ..."); int64_t tmpCurrentPos2 = tmpCurrentPos; int findLen2 = 0; bool error = false; for (size_t jjj=iii+1; jjjparse(_data, tmpCurrentPos2, _lenMax, prop2); + m_subNode[jjj]->parse(_data, tmpCurrentPos2, _lenMax, prop2); offset2 = prop2.getFindLen(); - tmpCurrentPos2 = prop.getPositionStop(); - } while (status2 == parseStatusPartial); - if (status2 == parseStatusNone) { + tmpCurrentPos2 = prop2.getPositionStop(); + } while (prop2.getStatus() == parseStatusPartial); + if (prop2.getStatus() == parseStatusNone) { error = true; break; } else { - TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) 2 find : " << prop2.getFindLen() << " [" << _property.getPositionStart() << " " << tmpCurrentPos2 << "]"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) 2 find : " << prop2); } } if (error == false) { _property.setPositionStop(tmpCurrentPos2); - TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) 2 return : " << _property.getFindLen()); - return parseStatusFull; + _property.setStatus(parseStatusFull); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) 2 return : " << _property); + return; } TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) 2 second parse ... (done)"); } - } while (status == parseStatusPartial); - if (status == parseStatusNone) { + } while (prop.getStatus() == parseStatusPartial); + if (prop.getStatus() == parseStatusNone) { findLen = 0; - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; } else { - TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) find : " << prop.getFindLen() << " [" << _property.getPositionStart() << " " << tmpCurrentPos << "]"); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) find : " << prop); } } if (tmpCurrentPos<_currentPos) { @@ -1005,8 +1033,9 @@ template class NodePTheseElem : public Node { findLen = tmpCurrentPos - _currentPos; } _property.setPositionStop(tmpCurrentPos); - TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) return : " << _property.getFindLen()); - return parseStatusFull; + _property.setStatus(parseStatusFull); + TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (Elem) return : " << _property); + return; }; void display() { @@ -1091,30 +1120,30 @@ template class NodePThese : public Node { } return _data.size(); }; - virtual enum parseStatus parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { + virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { int32_t findLen = 0; TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (...){" << Node::m_multipleMin << "," << Node::m_multipleMax << "}"); if (0 == m_subNode.size()) { - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; } bool tmpFind = true; while ( _property.getMultiplicity() < Node::m_multipleMax && tmpFind == true) { tmpFind = false; for (auto &it : m_subNode) { - enum parseStatus status; FindProperty prop; prop.setPositionStart(_currentPos+findLen); int32_t offset = 0; do { - status = it->parse(_data, _currentPos+findLen+offset, _lenMax, prop); + it->parse(_data, _currentPos+findLen+offset, _lenMax, prop); offset = prop.getFindLen(); TK_REG_EXP_DBG_MODE("Parse " << levelSpace(Node::m_nodeLevel) << " (...) mult=" << _property.getMultiplicity() << " tmp " << prop.getFindLen()); - } while (status == parseStatusPartial); - if (status == parseStatusFull) { + } while (prop.getStatus() == parseStatusPartial); + if (prop.getStatus() == parseStatusFull) { findLen += prop.getFindLen(); tmpFind = true; - } else if (status == parseStatusPartial) { + } else if (prop.getStatus() == parseStatusPartial) { } } @@ -1126,12 +1155,15 @@ template class NodePThese : public Node { && _property.getMultiplicity()<=Node::m_multipleMax && findLen>0 ) { TK_REG_EXP_DBG_MODE("find " << findLen); - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } else if( 0 == Node::m_multipleMin ) { TK_REG_EXP_DBG_MODE("find size=0"); - return parseStatusFull; + _property.setStatus(parseStatusFull); + return; } - return parseStatusNone; + _property.setStatus(parseStatusNone); + return; }; void display() { @@ -1460,7 +1492,8 @@ template class RegExp { } regexp::FindProperty prop; prop.setPositionStart(iii); - if (m_exprRootNode.parse(_SearchIn, iii, maxlen, prop) == regexp::parseStatusFull) { + m_exprRootNode.parse(_SearchIn, iii, maxlen, prop); + if (prop.getStatus() == regexp::parseStatusFull) { findLen = prop.getFindLen(); TK_DEBUG("main search find : " << findLen << " elements"); if ( _escapeChar != 0 @@ -1528,7 +1561,8 @@ template class RegExp { } m_exprRootNode.setPositionStart(_startPos); regexp::FindProperty prop; - if (m_exprRootNode.parse(_SearchIn, _startPos, maxlen, prop) == regexp::parseStatusFull) { + m_exprRootNode.parse(_SearchIn, _startPos, maxlen, prop); + if (prop.getStatus() == regexp::parseStatusFull) { findLen = m_exprRootNode.getFindLen(); if ( _escapeChar != 0 && _startPos>0) { diff --git a/test/main.cpp b/test/main.cpp index ca0c64e..c6ac175 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -198,13 +198,13 @@ void testRegExpSingle(const std::string& _expression, const std::string& _search } void testRegExp() { - std::string data = "/* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n"; + std::string data = " a /* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n"; //std::string data = "alpha /* plop */ test"; //std::string data = "pp \n // qdfqdfsdf \nde"; testRegExpSingle("/\\*.*\\*/", data); - testRegExpSingle("//.*$", data); - testRegExpSingle("/\\*.*", data); - testRegExpSingle("[a-z]", data); + //testRegExpSingle("//.*$", data); + //testRegExpSingle("/\\*.*", data); + //testRegExpSingle("[a-z]", data); } int main(int argc, const char *argv[]) {