/** ******************************************************************************* * @file EdnRegExp.h * @brief Editeur De N'ours : Regular expression annalyser (header) * @author Edouard DUPIN * @date 04/04/2011 * @par Project * Edn * * @par Copyright * Copyright 2010 Edouard DUPIN, all right reserved * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY. * * Licence summary : * You can modify and redistribute the sources code and binaries. * You can send me the bug-fix * You can not earn money with this Software (if the source extract from Edn * represent less than 50% of original Sources) * Term of the licence in in the file licence.txt. * ******************************************************************************* */ #ifndef __EDN_REG_EXP_H__ #define __EDN_REG_EXP_H__ class EdnRegExp; #include "EdnTree.h" #include "EdnBuf.h" #include "EdnVectorBin.h" #include "EdnVectorBuf.h" /* normal mode : (...) sub element is separate with | \d Digits [0-9] \D NOT a digit [^0-9] \l Letters [a-zA-Z] \L NOT a Letter [^a-zA-Z] \s Whitespace [ \t\n\r\f\v] \S NOT Whitespace [^ \t\n\r\f\v] \w "Word" character [a-zA-Z0-9_] \W NOT a "Word" character [^a-zA-Z0-9_] \@ at the start or the end not in the parsing of element ==> check if \w is not present (other regExp will be <> ...) [anjdi] or [a-gt-j] range . dot [^\x00-\x08\x0A-\x1F\x7F] ==> TODO : $ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@ ^in the [] invertion of the range element multiplicity : * ==> {0, 2147483647} ? ==> {0, 1} + ==> {1, 2147483647} {x} ==> {x, x} {x,y} ==> {x, y} */ class RegExpNode; /** * @brief Node Elements for every-one */ class RegExpNode{ public : RegExpNode(void); virtual ~RegExpNode(void) { }; virtual int32_t Generate(EdnVectorBin &data, int32_t startPos, int32_t nbElement); virtual bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); virtual void Display(int32_t level); void SetMult(int32_t min, int32_t max); protected: int32_t GetMultMin(void) { return m_multipleMin; }; int32_t GetMultMax(void) { return m_multipleMax; }; protected : int32_t m_multipleMin; //!< minimum repetition (included) int32_t m_multipleMax; //!< maximum repetition (included) // Data Section ... (can have no data...) EdnVectorBin m_RegExpData; //!< data to parse and compare in some case ... }; class RegExpNodePThese : public RegExpNode { public : RegExpNodePThese(void) { }; ~RegExpNodePThese(void); int32_t Generate(EdnVectorBin &data); bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); protected : // SubNodes : EdnVectorBin m_subNode; //int32_t m_posPthese; //!< position of the element is detected in the output element }; class RegExpNodePTheseElem : public RegExpNode { public : RegExpNodePTheseElem(void) { }; ~RegExpNodePTheseElem(void); int32_t Generate(EdnVectorBin &data); bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); protected : // SubNodes : EdnVectorBin m_subNode; private : bool SetMultiplicityOnLastNode(int32_t min, int32_t max); }; class RegExpNodeValue : public RegExpNode { public : RegExpNodeValue(void) { }; ~RegExpNodeValue(void) { }; int32_t Generate(EdnVectorBin &data); bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); protected : // SubNodes : EdnVectorBin m_data; }; class RegExpNodeBracket : public RegExpNode { public : RegExpNodeBracket(void) { }; ~RegExpNodeBracket(void) { }; int32_t Generate(EdnVectorBin &data); bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); protected : // SubNodes : EdnVectorBin m_data; }; class RegExpNodeDigit : public RegExpNode { public : RegExpNodeDigit(void) { }; ~RegExpNodeDigit(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeDigitNot : public RegExpNode { public : RegExpNodeDigitNot(void) { }; ~RegExpNodeDigitNot(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeLetter : public RegExpNode { public : RegExpNodeLetter(void) { }; ~RegExpNodeLetter(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeLetterNot : public RegExpNode { public : RegExpNodeLetterNot(void) { }; ~RegExpNodeLetterNot(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeWhiteSpace : public RegExpNode { public : RegExpNodeWhiteSpace(void) { }; ~RegExpNodeWhiteSpace(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeWhiteSpaceNot : public RegExpNode { public : RegExpNodeWhiteSpaceNot(void) { }; ~RegExpNodeWhiteSpaceNot(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeWordChar : public RegExpNode { public : RegExpNodeWordChar(void) { }; ~RegExpNodeWordChar(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeWordCharNot : public RegExpNode { public : RegExpNodeWordCharNot(void) { }; ~RegExpNodeWordCharNot(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeDot : public RegExpNode { public : RegExpNodeDot(void) { }; ~RegExpNodeDot(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeSOL : public RegExpNode { public : RegExpNodeSOL(void) { }; ~RegExpNodeSOL(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; class RegExpNodeEOL : public RegExpNode { public : RegExpNodeEOL(void) { }; ~RegExpNodeEOL(void) { }; bool Parse(EdnVectorBuf &data, int32_t currentPos, int32_t lenMax, int32_t &findLen); void Display(int32_t level); }; typedef struct { int32_t start; int32_t stop; }elementPos_ts; // Regular expression manager class EdnRegExp { // public API : public: // create the regular expression EdnRegExp(const char *exp); EdnRegExp(Edn::String &exp); EdnRegExp(void); ~EdnRegExp(void); void SetRegExp(const char *exp); void SetRegExp(Edn::String &exp); Edn::String GetRegExp(void) { return m_expressionRequested;}; bool GetStatus(void) { return m_isOk;}; // process the regular expression bool Process( EdnVectorBuf &SearchIn, int32_t startPos, int32_t endPos, char escapeChar=0); int32_t Start(void) { return m_areaFind.start; }; int32_t Stop(void) { return m_areaFind.stop; }; void Display(void); // internal parameters private: Edn::String m_expressionRequested; // TODO : Remove ... elementPos_ts m_areaFind; //!< position around selection RegExpNodePThese m_exprRootNode; //!< The tree where data is set bool m_isOk; //!< Known if we can process with this regExp bool m_notBeginWithChar; //!< The regular expression must not have previously a char [a-zA-Z0-9_] bool m_notEndWithChar; //!< The regular expression must not have after the end a char [a-zA-Z0-9_] // internal access private: bool CheckGoodPosition(EdnVectorBin tmpExp, int32_t &pos); bool CheckGoodPosition(EdnVectorBin tmpExp); }; #endif