From 0b42adbec8175e6df91229a577cd3d68266cdd04 Mon Sep 17 00:00:00 2001 From: Edouard DUPIN <yui.heero@gmail.com> Date: Wed, 8 Oct 2014 21:03:32 +0200 Subject: [PATCH] [DEV] remove etkRegExp ==> now use generic std::regex --- CMakeLists.txt | 2 - etk/RegExp.cpp | 468 ----------- etk/RegExp.h | 1947 ---------------------------------------------- etk/stdTools.cpp | 70 ++ etk/stdTools.h | 286 +++++++ lutin_etk.py | 1 - test/main.cpp | 540 +------------ 7 files changed, 361 insertions(+), 2953 deletions(-) delete mode 100644 etk/RegExp.cpp delete mode 100644 etk/RegExp.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 02a48bb..cca473a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,8 +38,6 @@ set(src_files etk/stdTools.h etk/Stream.cpp etk/Stream.h - etk/RegExp.cpp - etk/RegExp.h etk/tool.cpp etk/tool.h etk/Noise.cpp diff --git a/etk/RegExp.cpp b/etk/RegExp.cpp deleted file mode 100644 index ebc2383..0000000 --- a/etk/RegExp.cpp +++ /dev/null @@ -1,468 +0,0 @@ -/** - * @author Edouard DUPIN - * - * @copyright 2011, Edouard DUPIN, all right reserved - * - * @license APACHE v2.0 (see license file) - */ - - -#include <etk/RegExp.h> - - - -const struct etk::convertionTable etk::regexp::constConvertionTable[] = { - // haveBackSlash, inputValue, newValue - { false , '(' , 0 , etk::regexpOpcodePTheseIn}, - { true , '(' , '(' , etk::regexpOpcodeError}, - { false , ')' , 0 , etk::regexpOpcodePTheseOut}, - { true , ')' , ')' , etk::regexpOpcodeError}, - { false , '[' , 0 , etk::regexpOpcodeBracketIn}, - { true , '[' , '[' , etk::regexpOpcodeError}, - { false , ']' , 0 , etk::regexpOpcodeBracketOut}, - { true , ']' , ']' , etk::regexpOpcodeError}, - { false , '{' , 0 , etk::regexpOpcodeBracetIn}, - { true , '{' , '{' , etk::regexpOpcodeError}, - { false , '}' , 0 , etk::regexpOpcodeBracetOut}, - { true , '}' , '}' , etk::regexpOpcodeError}, - { false , '-' , 0 , etk::regexpOpcodeTo}, - { true , '-' , '-' , etk::regexpOpcodeError}, - { false , '*' , 0 , etk::regexpOpcodeStar}, - { true , '*' , '*' , etk::regexpOpcodeError}, - { false , '.' , 0 , etk::regexpOpcodeDot}, - { true , '.' , '.' , etk::regexpOpcodeError}, - { true , 'e' , 0 , etk::regexpOpcodeEOF}, - { false , 'e' , 'e' , etk::regexpOpcodeError}, - { false , '?' , 0 , etk::regexpOpcodeQuestion}, - { true , '?' , '?' , etk::regexpOpcodeError}, - { false , '+' , 0 , etk::regexpOpcodePlus}, - { true , '+' , '+' , etk::regexpOpcodeError}, - { false , '|' , 0 , etk::regexpOpcodePipe}, - { true , '|' , '|' , etk::regexpOpcodeError}, - { false , '^' , 0 , etk::regexpOpcodeStartOfLine}, - { true , '^' , '^' , etk::regexpOpcodeError}, - { false , '$' , 0 , etk::regexpOpcodeEndOfLine}, - { true , '$' , '$' , etk::regexpOpcodeError}, - { true , 'd' , 0 , etk::regexpOpcodeDigit}, - { true , 'D' , 0 , etk::regexpOpcodeDigitNot}, - { true , 'l' , 0 , etk::regexpOpcodeLetter}, - { true , 'L' , 0 , etk::regexpOpcodeLetterNot}, - { true , 's' , 0 , etk::regexpOpcodeSpace}, - { true , 'S' , 0 , etk::regexpOpcodeSpaceNot}, - { true , 'w' , 0 , etk::regexpOpcodeWord}, - { true , 'W' , 0 , etk::regexpOpcodeWordNot}, - { true , 'a' , '\a', etk::regexpOpcodeError}, - { true , 'b' , '\b', etk::regexpOpcodeError}, - { true , 'e' , 0x1B, etk::regexpOpcodeError}, // Escape character <Esc> - { true , 'f' , '\f', etk::regexpOpcodeError}, - { true , 'n' , '\n', etk::regexpOpcodeError}, - { true , 'r' , '\r', etk::regexpOpcodeError}, - { true , 't' , '\t', etk::regexpOpcodeError}, - { true , 'v' , '\v', etk::regexpOpcodeError}, - { true , '\\' , '\\', etk::regexpOpcodeError}, - { true , '&' , '&' , etk::regexpOpcodeError}, - { true , '0' , '\0', etk::regexpOpcodeError}, - { true , '@' , 0 , etk::regexpOpcodeNoChar}, -}; -const int64_t etk::regexp::constConvertionTableSize = sizeof(etk::regexp::constConvertionTable) / sizeof(struct etk::convertionTable) ; - -static const char* parseStatusTable[] = { - "parseStatusUnknow", - "parseStatusNone", - "parseStatusPartial", - "parseStatusFull" -}; -std::ostream& etk::regexp::operator <<(std::ostream& _os, enum etk::regexp::parseStatus _obj) { - _os << parseStatusTable[_obj]; - return _os; -} -std::ostream& etk::regexp::operator <<(std::ostream& _os, const etk::regexp::FindProperty& _obj) { - _os << "property([" << _obj.getPositionStart() << "," << _obj.getPositionStop() << "]*" << _obj.getMultiplicity() << " " << _obj.getStatus() << ")"; - return _os; -} - -std::string etk::regexp::createString(const std::vector<char32_t>& _data, int64_t _start, int64_t _stop) { - std::string output(ETK_BASH_COLOR_NORMAL); - for (int64_t iii=_start; iii<(int64_t)_data.size() && iii<_stop ; iii++) { - switch(_data[iii]) { - case regexpOpcodePTheseIn: output += std::string(ETK_BASH_COLOR_RED) + (char*)"(" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodePTheseOut: output += std::string(ETK_BASH_COLOR_RED) + (char*)")" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeBracketIn: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"[" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeBracketOut: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"]" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeTo: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"-" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeBracetIn: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"{" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeBracetOut: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"}" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeStar: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"*" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeDot: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"." + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeQuestion: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"?" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodePlus: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"+" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodePipe: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"|" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeNoChar: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"@" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeStartOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"^" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeEndOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"$" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeDigit: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\d" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeDigitNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\D" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeLetter: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\l" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeLetterNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\L" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeSpace: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\s" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeSpaceNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\S" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeWord: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\w" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeWordNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\W" + ETK_BASH_COLOR_NORMAL; break; - case regexpOpcodeEOF: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\e" + ETK_BASH_COLOR_NORMAL; break; - case '\n': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\n" + ETK_BASH_COLOR_NORMAL; break; - case '\t': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\t" + ETK_BASH_COLOR_NORMAL; break; - default: - char plop[10]; - int8_t nb = u32char::convertUtf8(_data[iii], plop); - plop[nb] = '\0'; - output += plop; - break; - } - } - return output; -} - -char* etk::regexp::levelSpace(uint32_t _level) { - static char* tmpSpace = " "; - if (_level>30) { - return tmpSpace; - } - return tmpSpace + 60 - 2*_level; -} - - -int64_t etk::regexp::getLenOfPTheseElem(const std::vector<char32_t>& _data, int64_t _startPos) { - if (_startPos>=(int64_t)_data.size()){ - return 0; - } - int64_t pos = _startPos; - int32_t nbOpen = 0; - // special case of the (...) or | ==> we search '|' or ')' - if( _data[pos] == regexpOpcodePTheseOut - || _data[pos] == regexpOpcodePipe) { - return 0; - } - // find size ... - while (pos < (int64_t)_data.size() ) { - if(_data[pos] == regexpOpcodePTheseIn) { - // find a sub section : - nbOpen++; - } else if(0 < nbOpen) { - if (_data[pos] == regexpOpcodePTheseOut) { - nbOpen--; - if (0 > nbOpen) { - TK_ERROR("Error in the (...) find element at "<< pos); - return -1; - } - } - } else if( _data[pos] == regexpOpcodePTheseOut - || _data[pos] == regexpOpcodePipe) { - // Find the end of the (...) - // just return the size inside - int32_t sizeInside = pos - _startPos; - if (0 >= sizeInside) { - TK_ERROR("Error in the (...) no data at "<< pos-1); - return 0; - } - return sizeInside; - } - pos++; - } - return pos - _startPos; -} - -int64_t etk::regexp::getLenOfPThese(const std::vector<char32_t>& _data, int64_t _startPos) { - int64_t pos = _startPos; - int32_t nbOpen = 0; - // special case of the (...) or | ==> we search '|' or ')' - if(_data[pos]==regexpOpcodePTheseOut) { - return 0; - } - if(_data[pos]!=regexpOpcodePTheseIn) { - TK_ERROR(" find error in PThese"); - return 0; - } - pos++; - // find size ... - while (pos < (int64_t)_data.size() ) { - if(_data[pos]==regexpOpcodePTheseIn) { - // find a sub section : - nbOpen++; - } else if(0 < nbOpen) { - if (_data[pos]==regexpOpcodePTheseOut) { - nbOpen--; - if (0 > nbOpen) { - TK_ERROR("Error in the (...) find element at "<< pos); - return 0; - } - } - } else if(_data[pos]==regexpOpcodePTheseOut) { - // Find the end of the (...) - // just return the size inside - int32_t sizeInside = pos - _startPos-1; - if (0 >= sizeInside) { - TK_ERROR("Error in the (...) no data at "<< pos-1); - return 0; - } - return sizeInside; - } - pos++; - } - return 0; -} - - -int64_t etk::regexp::getLenOfBracket(const std::vector<char32_t>& _data, int64_t _startPos) { - int64_t pos = _startPos; - // special case of the (...) or | ==> we search '|' or ')' - if(_data[pos]==regexpOpcodeBracketOut) { - return 0; - } - if(_data[pos] != regexpOpcodeBracketIn) { - TK_ERROR("find no {..."); - return 0; - } - pos++; - // find size ... - while (pos < (int64_t)_data.size() ) { - if(_data[pos]==regexpOpcodeBracketOut) { - // Find the end of the [...] - // just return the size inside - int32_t sizeInside = pos - _startPos -1 ; - if (0 >= sizeInside) { - TK_ERROR("Error in the [...] no data at "<< pos-1); - return 0; - } - return sizeInside; - } else if( _data[pos] != regexpOpcodeTo - && _data[pos] > 0xFF ) { - TK_ERROR("Error in the [...] not permited element at "<< pos << " '" << (char)_data[pos] << "'"); - return 0; - } - pos++; - } - return 0; -} - - -int64_t etk::regexp::getLenOfBrace(const std::vector<char32_t>& _data, int64_t _startPos) { - int32_t pos = _startPos; - // special case of the (...) or | ==> we search '|' or ')' - if(_data[pos]==regexpOpcodeBracetOut) { - return 0; - } - if(_data[pos]!=regexpOpcodeBracetIn) { - TK_ERROR(" did not find brace IN { "); - return 0; - } - pos++; - // find size ... - while (pos < (int64_t)_data.size() ) { - if(_data[pos]==regexpOpcodeBracetOut) { - // Find the end of the [...] - // just return the size inside - int32_t sizeInside = pos - _startPos -1 ; - if (0 >= sizeInside) { - TK_ERROR("Error in the {...} no data at "<< pos-1); - return 0; - } - return sizeInside; - } else if( _data[pos] != ',' - && ( _data[pos] < '0' - || _data[pos] > '9') ) { - TK_ERROR("Error in the {...} not permited element at "<< pos << " '" << _data[pos] << "'"); - return 0; - } - pos++; - } - return 0; -} - - -int64_t etk::regexp::getLenOfNormal(const std::vector<char32_t>& _data, int64_t _startPos) { - int64_t pos = _startPos; - // find size ... - while (pos < (int64_t)_data.size() ) { - switch(_data[pos]) { - case regexpOpcodePTheseIn: - case regexpOpcodePTheseOut: - case regexpOpcodeBracketIn: - case regexpOpcodeBracketOut: - case regexpOpcodeBracetIn: - case regexpOpcodeBracetOut: - case regexpOpcodeTo: - case regexpOpcodeStar: - case regexpOpcodeDot: - case regexpOpcodeQuestion: - case regexpOpcodePlus: - case regexpOpcodePipe: - case regexpOpcodeStartOfLine: - case regexpOpcodeEndOfLine: - case regexpOpcodeDigit: - case regexpOpcodeDigitNot: - case regexpOpcodeLetter: - case regexpOpcodeLetterNot: - case regexpOpcodeSpace: - case regexpOpcodeSpaceNot: - case regexpOpcodeWord: - case regexpOpcodeWordNot: - { - // just return the size inside - int32_t sizeInside = pos - _startPos; - if (0 >= sizeInside) { - TK_ERROR("Error in the normal data : no data ..."); - } - return sizeInside; - } - break; - default : - // nothing to do ... - break; - } - pos++; - } - if ((int64_t)pos - (int64_t)_startPos < 0) { - return 0; - } - return pos - _startPos ; -} - - -bool etk::regexp::parseBrace(const std::vector<char32_t>& _data, uint32_t& _min, uint32_t& _max) { - //TK_INFO("parse {...} in "; DisplayElem(data); ); - int64_t k=0; - - int32_t firstElement = 0; - int32_t SecondElement = 0; - - while(k < (int64_t)_data.size()) { - if (_data[k] == ',') { - k++; - break; - } if (_data[k] == '}' ) { - SecondElement = firstElement; - goto allIsSet; - } else if(u32char::isInteger(_data[k]) == true) { - firstElement *= 10; - firstElement += u32char::toInt(_data[k]); - } else { - TK_ERROR("Can not parse this element " << (char)_data[k] << " at pos " << k); - return false; - } - k++; - } - if (k == (int64_t)_data.size()) { - SecondElement = firstElement; - } - while(k < (int64_t)_data.size()) { - if (_data[k] == ',') { - TK_ERROR("Can not find a second , in {} at pos " << k); - return false; - } if (_data[k] == '}') { - goto allIsSet; - } else if (true == u32char::isInteger(_data[k])) { - SecondElement *= 10; - SecondElement += u32char::toInt(_data[k]); - } else { - TK_ERROR("Can not parse this element " << _data[k] << " at pos " << k); - return false; - } - k++; - } - -allIsSet: - if (SecondElement == 0 && firstElement != 0) { - _min = 0; - _max = firstElement; - } else { - _min = firstElement; - _max = SecondElement; - } - if (_min > _max) { - TK_ERROR("Minimum=" << _min << " can not be < maximum=" << _max ); - return false; - } - return true; -} - -std::string etk::regexp::autoStr(const std::string& _data) { - std::string out; - for (auto &it : _data) { - if (it == '\n') { - out += "\\n"; - } else if (it == '\t') { - out += "\\t"; - } else if (it == '\r') { - out += "\\r"; - } else if (it == '\0') { - out += "\\0"; - } else if (it <= 0x20) { - out += std::to_string((int32_t)it); - } else { - out += it; - } - } - return out; -} - - -std::string etk::regexp::autoStr(char _data) { - std::string out; - if (_data == '\n') { - out += "\\n"; - } else if (_data == '\t') { - out += "\\t"; - } else if (_data == '\r') { - out += "\\r"; - } else if (_data == '\0') { - out += "\\0"; - } else if (_data <= 0x20) { - out += std::to_string((int32_t)_data); - } else { - out += _data; - } - return out; -} - -std::string etk::regexp::strTick(int32_t _pos) { - std::string out; - for (int32_t iii=0; iii<_pos; ++iii) { - out += " "; - } - out += "^"; - return out; -} - - - -namespace etk { - template<> std::string to_string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) { - return _val.getRegExp(); - } - template<> std::string to_string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) { - return _val.getRegExp(); - } - template<> std::u32string to_u32string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) { - return _val.getURegExp(); - } - template<> std::u32string to_u32string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) { - return _val.getURegExp(); - } - - template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::u32string& _value) { - _variableRet.compile(_value); - return true; - } - template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::u32string& _value) { - _variableRet.compile(_value); - return true; - } - template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::string& _value) { - _variableRet.compile(_value); - return true; - } - template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::string& _value) { - _variableRet.compile(_value); - return true; - } -}; \ No newline at end of file diff --git a/etk/RegExp.h b/etk/RegExp.h deleted file mode 100644 index d75f201..0000000 --- a/etk/RegExp.h +++ /dev/null @@ -1,1947 +0,0 @@ -/** - * @author Edouard DUPIN - * - * @copyright 2011, Edouard DUPIN, all right reserved - * - * @license APACHE v2.0 (see license file) - */ - -#include <etk/types.h> - -#ifndef __TK_REG_EXP_H__ -#define __TK_REG_EXP_H__ - -#include <etk/debug.h> -#include <etk/stdTools.h> -#include <vector> -#include <memory> - -#define TK_REG_DEBUG TK_HIDDEN -//#define TK_REG_DEBUG TK_VERBOSE -//#define TK_REG_DEBUG TK_DEBUG - -#define TK_REG_DEBUG_3 TK_HIDDEN -//#define TK_REG_DEBUG_3 TK_VERBOSE -//#define TK_REG_DEBUG_3 TK_DEBUG - -#define TK_REG_DEBUG_2 TK_HIDDEN -//#define TK_REG_DEBUG_2 TK_VERBOSE - -//regular colors -#define ETK_BASH_COLOR_BLACK "\e[0;30m" -#define ETK_BASH_COLOR_RED "\e[0;31m" -#define ETK_BASH_COLOR_GREEN "\e[0;32m" -#define ETK_BASH_COLOR_YELLOW "\e[0;33m" -#define ETK_BASH_COLOR_BLUE "\e[0;34m" -#define ETK_BASH_COLOR_MAGENTA "\e[0;35m" -#define ETK_BASH_COLOR_CYAN "\e[0;36m" -#define ETK_BASH_COLOR_WHITE "\e[0;37m" -//emphasized (bolded) colors -#define ETK_BASH_COLOR_BOLD_BLACK "\e[1;30m" -#define ETK_BASH_COLOR_BOLD_RED "\e[1;31m" -#define ETK_BASH_COLOR_BOLD_GREEN "\e[1;32m" -#define ETK_BASH_COLOR_BOLD_YELLOW "\e[1;33m" -#define ETK_BASH_COLOR_BOLD_BLUE "\e[1;34m" -#define ETK_BASH_COLOR_BOLD_MAGENTA "\e[1;35m" -#define ETK_BASH_COLOR_BOLD_CYAN "\e[1;36m" -#define ETK_BASH_COLOR_BOLD_WHITE "\e[1;37m" -//background colors -#define ETK_BASH_COLOR_BG_BLACK "\e[40m" -#define ETK_BASH_COLOR_BG_RED "\e[41m" -#define ETK_BASH_COLOR_BG_GREEN "\e[42m" -#define ETK_BASH_COLOR_BG_YELLOW "\e[43m" -#define ETK_BASH_COLOR_BG_BLUE "\e[44m" -#define ETK_BASH_COLOR_BG_MAGENTA "\e[45m" -#define ETK_BASH_COLOR_BG_CYAN "\e[46m" -#define ETK_BASH_COLOR_BG_WHITE "\e[47m" -// Return to the normal color setings -#define ETK_BASH_COLOR_NORMAL "\e[0m" - - -namespace etk { -//in the unicode section we have : [E000..F8FF] private area ==> we will store element in this area: -// internal define to permit to have all needed system -enum regExpPrivateSection { - regexpOpcodePTheseIn=0xE000, /* ( */ - regexpOpcodePTheseOut,/* ) */ - regexpOpcodeBracketIn,/* [ */ - regexpOpcodeBracketOut,/* ] */ - regexpOpcodeBracetIn,/* { */ - regexpOpcodeBracetOut,/* } */ - regexpOpcodeTo,/* - */ - regexpOpcodeStar,/* * */ - regexpOpcodeDot,/* . */ - regexpOpcodeEOF,/* \e */ - regexpOpcodeQuestion,/* ? */ - regexpOpcodePlus,/* + */ - regexpOpcodePipe,/* | */ - regexpOpcodeStartOfLine,/* ^ this is also NOT, but not manage */ - regexpOpcodeEndOfLine,/* $ */ - regexpOpcodeDigit,/* \d */ - regexpOpcodeDigitNot,/* \D */ - regexpOpcodeLetter,/* \l */ - regexpOpcodeLetterNot,/* \L */ - regexpOpcodeSpace,/* \s */ - regexpOpcodeSpaceNot,/* \S */ - regexpOpcodeWord,/* \w */ - regexpOpcodeWordNot,/* \W */ - regexpOpcodeNoChar,/* \@ */ - regexpOpcodeError, // not used -}; -/* -normal mode : - (...) sub element is separate with | - \d Digits [0-9] - \D NOT a digit [^0-9] - \l Letters [a-zA-Z] - \L NOT a Letter [^a-zA-Z] - \s Whitespace [ \t\n\r\f\v] - \S NOT Whitespace [^ \t\n\r\f\v] - \w "Word" character [a-zA-Z0-9_] - \W NOT a "Word" character [^a-zA-Z0-9_] - \@ at the start or the end not in the parsing of element ==> check if \w is not present (other regExp will be <> ...) - \e end-of-file / end-of-data [\x00] ==> not counted - [anjdi] or [a-gt-j] range - . dot [^\x00] - $ End / Start of line of line ==> ce sera un truc supl�mentaire comme le \@ - @ Previous -==> TODO : - ^in the [] invertion of the range element - Sart of line - force regexp to be the shortest. - -multiplicity : - * ==> {0, 2147483647} (try to have the minimum size) - ? ==> {0, 1} - + ==> {1, 2147483647} (try to have the minimum size) - {x} ==> {x, x} (try to have the minimum size) - {x,y} ==> {x, y} (try to have the minimum size) -*/ -/** - * @brief convertion table of every element in a regular expression. - * @not-in-doc - */ -struct convertionTable { - bool haveBackSlash; - char inputValue; - char newValue; - enum etk::regExpPrivateSection specialChar; -}; -namespace regexp { -enum parseStatus { - parseStatusUnknow, //!< No status set - parseStatusNone, //!< parse have no data - parseStatusPartial, //!< parse is done partially, and can have more data - parseStatusFull //!< can not parse more elements -}; -//! @not-in-doc -std::ostream& operator <<(std::ostream& _os, enum parseStatus _obj); -//! @not-in-doc -extern const struct convertionTable constConvertionTable[]; -//! @not-in-doc -extern const int64_t constConvertionTableSize; -//! @not-in-doc -std::string createString(const std::vector<char32_t>& _data, int64_t _start=0, int64_t _stop=0x7FFFFFFF); -//! @not-in-doc -char * levelSpace(uint32_t _level); -//! @not-in-doc -int64_t getLenOfPTheseElem(const std::vector<char32_t>& _data, int64_t _startPos); -//! @not-in-doc -int64_t getLenOfPThese(const std::vector<char32_t>& _data, int64_t _startPos); -//! @not-in-doc -int64_t getLenOfBracket(const std::vector<char32_t>& _data, int64_t _startPos); -//! @not-in-doc -int64_t getLenOfBrace(const std::vector<char32_t>& _data, int64_t _startPos); -//! @not-in-doc -int64_t getLenOfNormal(const std::vector<char32_t>& _data, int64_t _startPos); -//! @not-in-doc -bool parseBrace(const std::vector<char32_t>& _data, uint32_t& _min, uint32_t& _max); -//! @not-in-doc -std::string autoStr(const std::string& _data); -std::string autoStr(char _data); -std::string strTick(int32_t _pos); - - -#undef __class__ -#define __class__ "regExp::FindProperty" - -/** - * @brief Node Elements for every-one - * @not-in-doc - */ -class FindProperty { - public: - int64_t m_positionStart; //!< find start position - int64_t m_positionStop; //!< find end position - uint32_t m_multiplicity; //!< curent multiplicity of find element - enum parseStatus m_status; //!< curent status of parsing - int32_t m_subIndex; //!< dubindex int the upper list ... for (...) - public: - std::vector<FindProperty> m_subProperty; //!< list of all sub elements - public: - FindProperty() : - m_positionStart(-1), - m_positionStop(-1), - m_multiplicity(0), - m_status(parseStatusUnknow), - m_subIndex(-1) { - // nothing to do ... - } - void reset() { - m_positionStart = -1; - m_positionStop = -1; - m_multiplicity = 0; - m_status = parseStatusUnknow; - m_subIndex = -1; - } - int64_t getPositionStart() const { - return m_positionStart; - } - void setPositionStart(int64_t _newPos) { - m_positionStart = _newPos; - if (m_positionStop < m_positionStart) { - m_positionStop = m_positionStart; - } - } - int64_t getPositionStop() const { - return m_positionStop; - } - void setPositionStop(int64_t _newPos) { - m_positionStop = _newPos; - if (m_positionStop < m_positionStart) { - TK_CRITICAL("set volontary a stop position before end : " << this); - } - } - uint32_t getMultiplicity() const { - return m_multiplicity; - } - void setMultiplicity(uint32_t _newVal) { - m_multiplicity = _newVal; - } - void multiplicityDecrement() { - m_multiplicity--; - } - void multiplicityIncrement() { - m_multiplicity++; - } - int64_t getFindLen() const { - if (m_positionStop < 0) { - return 0; - } - return m_positionStop - m_positionStart; - } - void setStatus(enum parseStatus _status) { - m_status = _status; - } - enum parseStatus getStatus() const { - return m_status; - } - int32_t getSubIndex() const { - return m_subIndex; - } - void setSubIndex(int32_t _newIndex) { - m_subIndex = _newIndex; - } - - void display(const std::string& _data, int32_t _level = 0) { - TK_INFO("prop : " << levelSpace(_level) << " [" - << m_positionStart << "," - << m_positionStop << "]*" - << m_multiplicity << " data='" - << std::string(_data, m_positionStart, m_positionStop-m_positionStart) << "'"); - for (auto &it : m_subProperty) { - it.display(_data, _level+1); - } - } - void display(int32_t _level = 0) { - TK_INFO("prop : " << levelSpace(_level) << " [" - << m_positionStart << "," - << m_positionStop << "]*" - << m_multiplicity); - for (auto &it : m_subProperty) { - it.display(_level+1); - } - } -}; - -std::ostream& operator <<(std::ostream& _os, const FindProperty& _obj); - -#undef __class__ -#define __class__ "regExp::Node" - -/** - * @brief Node Elements for every-one - * @not-in-doc - */ -template<class CLASS_TYPE> class Node { - protected : - // Data Section ... (can have no data...) - std::vector<char32_t> m_regExpData; //!< data to parse and compare in some case ... - int32_t m_nodeLevel; - public : - /** - * @brief Constructor - */ - Node(int32_t _level) : - m_regExpData(), - m_nodeLevel(_level), - m_canHaveMultiplicity(true), - m_multipleMin(1), - m_multipleMax(1), - m_countOutput(true) { - - }; - /** - * @brief Destructor - */ - virtual ~Node() { }; - /** - * @brief Generate the regular expression with the current "converted string" - * @param[in] _data Property of the regexp - * @param[in] _level Node level in the tree - * @return the number of element used - */ - virtual int32_t generate(const std::vector<char32_t>& _data) { - return 0; - }; - /** - * @brief Parse the current node - * @param[in] _data Data to parse (start pointer / or class that have access with operator[] ) - * @param[in] _currentPos Current parsing position. - * @param[in] _lenMax Maximum position to parse the data (can be not hte end of the data due to the fact sometime we want to parse sub section). - * @return Full Find something (can not find more...) - * @return Partial can find more data ... - * @return None Find nothing - */ - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property)=0; - /** - * @brief Display the current node properties - * @param[in] level of the node - */ - virtual void display() { - TK_INFO("Find NODE : " << levelSpace(m_nodeLevel) << "@???@ {" << getMultMin() << "," << getMultMax() << "} subdata=" << createString(m_regExpData) ); - }; - protected: - bool m_canHaveMultiplicity; //!< minimum repetition (included) - public: - /** - * @brief Set the multiplicity capabilities. - * @paran[in] _newVal new capabilities. - */ - void setMultiplicityAbility(bool _newVal) { - m_canHaveMultiplicity = _newVal; - if (_newVal == false) { - m_multipleMin = 1; - m_multipleMax = 1; - } - }; - protected: - /** - * @brief Get the multiplicity capabilities. - * @return Multiplicity availlable. - */ - bool getMultiplicityAbility() const { - return m_canHaveMultiplicity; - }; - protected: - uint32_t m_multipleMin; //!< minimum repetition (included) - uint32_t m_multipleMax; //!< maximum repetition (included) - public: - /** - * @brief Set the multiplicity of this Node. - * @param[in] _min The minimum appear time. - * @param[in] _max The maximum appear time. - */ - void setMult(uint32_t _min, uint32_t _max) { - if (m_canHaveMultiplicity == false) { - TK_WARNING("can not set multiplicity ..."); - return; - } - m_multipleMin = std::max(_min, (uint32_t)0); - m_multipleMax = std::max(_max, (uint32_t)1); - } - protected: - /** - * @brief Get the minimum multiplicity. - * @return The minimum appear availlable. - */ - uint32_t getMultMin() const { - return m_multipleMin; - }; - /** - * @brief Get the maximum multiplicity. - * @return The maximum appear availlable. - */ - uint32_t getMultMax() const { - return m_multipleMax; - }; - protected: - bool m_countOutput; //!< minimum repetition (included) - public: - /** - * @brief Set the output count availlable in regexp. - * @paran[in] _newVal new capabilities. - */ - void setCountOutput(bool _newVal) { - m_countOutput = _newVal; - }; - protected: - /** - * @brief Get the output count availlable in regexp. - * @return count availlable. - */ - bool getCountOutput() const { - return m_countOutput; - }; -}; - -#undef __class__ -#define __class__ "regExp::NodeValue" - -template<class CLASS_TYPE> class NodeValue : public Node<CLASS_TYPE> { - protected : - // SubNodes : - std::vector<char32_t> m_data; - public : - - /** - * @brief Constructor - */ - NodeValue(int32_t _level) : Node<CLASS_TYPE>::Node(_level) { }; - NodeValue(const std::vector<char32_t>& _data, int32_t _level) : Node<CLASS_TYPE>::Node(_level) { - generate(_data); - }; - - int32_t generate(const std::vector<char32_t>& _data) { - Node<CLASS_TYPE>::m_regExpData = _data; - TK_REG_DEBUG("Request Parse \"Value\" data=" << createString(Node<CLASS_TYPE>::m_regExpData) ); - m_data.clear(); - for (int32_t i=0; i<(int64_t)Node<CLASS_TYPE>::m_regExpData.size(); i++) { - m_data.push_back(Node<CLASS_TYPE>::m_regExpData[i]); - } - return _data.size(); - }; - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " Value{" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "} : " << (char)m_data[0]); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << createString(Node<CLASS_TYPE>::m_regExpData)); - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " Value " << _property); - if (m_data.size() == 0) { - TK_ERROR("No data inside type elemTypeValue"); - _property.setStatus(parseStatusNone); - return; - } - if (_property.getStatus() != parseStatusPartial) { - if (Node<CLASS_TYPE>::m_multipleMin == 0) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusPartial); - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " ==> partial (minSize=0)"); - return; - } - } - bool tmpFind = true; - int32_t findLen = 0; - while( _property.getMultiplicity() < Node<CLASS_TYPE>::m_multipleMax - && tmpFind == true) { - uint32_t ofset = 0; - int64_t kkk; - for (kkk=0; findLen+kkk<_lenMax && kkk < (int64_t)m_data.size(); kkk++) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) - << " check element value : '" - << etk::regexp::autoStr((char)m_data[kkk]) - << "' ?= '" - << etk::regexp::autoStr((char)_data[_currentPos+findLen+kkk]) - << "'"); - if (m_data[kkk] != (char32_t)_data[_currentPos+findLen+kkk]) { - tmpFind=false; - break; - } - ofset++; - } - if (kkk != (int64_t)m_data.size()) { - // parsing not ended ... - tmpFind = false; - } - // Update local ofset of data - if (tmpFind == true) { - findLen += ofset; - } - _property.multiplicityIncrement(); - } - _property.setPositionStop(_property.getPositionStart() + findLen); - if ( _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin - && _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax - && findLen > 0) { - _property.setStatus(parseStatusFull); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " value find " << _property); - return; - } else if (Node<CLASS_TYPE>::m_multipleMin == 0) { - _property.setStatus(parseStatusFull); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " value find " << _property); - return; - } - _property.setStatus(parseStatusNone); - return; - }; - - void display() { - TK_INFO("Find NODE : " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << "@Value@ {" - << Node<CLASS_TYPE>::m_multipleMin << "," - << Node<CLASS_TYPE>::m_multipleMax << "} subdata=" - << createString(Node<CLASS_TYPE>::m_regExpData) - << " data: " << createString(m_data) ); - }; -}; -#undef __class__ -#define __class__ "regExp::NodeRangeValue" - -/** - * @not-in-doc - */ -template<class CLASS_TYPE> class NodeRangeValue : public Node<CLASS_TYPE> { - private: - std::vector<std::pair<char32_t, char32_t>> m_rangeList; - std::vector<char32_t> m_dataList; - bool m_invert; - const char *m_typeName; - public : - /** - * @brief Constructor - */ - NodeRangeValue(int32_t _level) : - Node<CLASS_TYPE>::Node(_level), - m_invert(false), - m_typeName("auto-range") { - - }; - /** - * @brief Destructor - */ - virtual ~NodeRangeValue() { }; - void addRange(char32_t _start, char32_t _stop) { - m_rangeList.push_back(std::make_pair(_start, _stop)); - } - void addValue(char32_t _value) { - m_dataList.push_back(_value); - } - void setInvertion(bool _newVal) { - m_invert = _newVal; - } - const char* getDescriptiveName() const { - return m_typeName; - } - void setDescriptiveName(const char* _name) { - m_typeName = _name; - } - // Truc a faire : multipliciter min, return partiel, et ... - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { - int32_t findLen = 0; - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << "{" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "}"); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << createString(Node<CLASS_TYPE>::m_regExpData)); - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " " << _property); - if (_property.getStatus() != parseStatusPartial) { - if (Node<CLASS_TYPE>::m_multipleMin == 0) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusPartial); - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " ==> partial (minSize=0)"); - return; - } - } - char32_t tmpVal = _data[_currentPos]; - bool find = false; - // Check range - for (auto &it : m_rangeList) { - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " range : " << autoStr(it.first) << " < " << autoStr(tmpVal) << " < " << autoStr(it.second)); - if ( tmpVal >= it.first - && tmpVal <= it.second) { - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " OK"); - find = true; - break; - } - } - // Check Value - if (find == false) { - for (auto &it : m_dataList) { - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " value : '" << autoStr(tmpVal) << "'=?='" << autoStr(it) << "'"); - if (tmpVal == it) { - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " OK"); - find = true; - break; - } - } - } - // check inverse request: - if ( ( find == true - && m_invert == false) - || ( find == false - && m_invert == true) ) { - find = true; - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " : Find (invert=" << m_invert << ")"); - } else { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " : Not find (invert=" << m_invert << ")"); - find = false; - } - if (find == true) { - _property.multiplicityIncrement(); - int64_t newPosVal = _property.getPositionStop(); - if (newPosVal == -1) { - newPosVal = _property.getPositionStart() + 1; - } else { - newPosVal++; - } - if(_property.getMultiplicity() > Node<CLASS_TYPE>::m_multipleMax) { - _property.multiplicityDecrement(); - _property.setStatus(parseStatusFull); - } else { - if (Node<CLASS_TYPE>::getCountOutput() == true) { - _property.setPositionStop(newPosVal); - } else { - _property.setPositionStop(_property.getPositionStart()); - } - if (_currentPos>=_lenMax) { - _property.setStatus(parseStatusFull); - } else { - if(_property.getMultiplicity() == Node<CLASS_TYPE>::m_multipleMax) { - _property.setStatus(parseStatusFull); - } else { - _property.setStatus(parseStatusPartial); - } - } - } - }else { - if (_property.getPositionStop() != -1) { - if (_property.getMultiplicity() == 0) { - // simple optimisation ==> permit to remove parsing 1 cycle - _property.setStatus(parseStatusNone); - } else { - _property.setStatus(parseStatusFull); - } - } else if (_property.getMultiplicity() == Node<CLASS_TYPE>::m_multipleMin) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusFull); - } else { - _property.setStatus(parseStatusNone); - } - } - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " : out=" << _property); - return; - }; - virtual void display() { - TK_INFO("Find NODE : " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << getDescriptiveName() << " {" - << Node<CLASS_TYPE>::m_multipleMin << "," - << Node<CLASS_TYPE>::m_multipleMax << - "} subdata=" << createString(Node<CLASS_TYPE>::m_regExpData)); - }; -}; - -#undef __class__ -#define __class__ "regExp::NodeBracket" - -/** - * @not-in-doc - */ -template<class CLASS_TYPE> class NodeBracket : public NodeRangeValue<CLASS_TYPE> { - public: - /** - * @brief Constructor - */ - NodeBracket(int32_t _level) : NodeRangeValue<CLASS_TYPE>::NodeRangeValue(_level) { - NodeRangeValue<CLASS_TYPE>::setDescriptiveName("[...]"); - }; - NodeBracket(const std::vector<char32_t>& _data, int32_t _level) : NodeRangeValue<CLASS_TYPE>::NodeRangeValue(_level) { - generate(_data); - }; - int32_t generate(const std::vector<char32_t>& _data) { - Node<CLASS_TYPE>::m_regExpData = _data; - TK_REG_DEBUG("Request Parse [...] data=" << createString(Node<CLASS_TYPE>::m_regExpData) ); - - char32_t lastElement = 0; - bool multipleElement = false; - // - for (int32_t kkk=0; kkk<(int64_t)Node<CLASS_TYPE>::m_regExpData.size(); kkk++) { - if ( Node<CLASS_TYPE>::m_regExpData[kkk] == regexpOpcodeTo - && multipleElement == true) { - TK_ERROR("Can not have 2 consecutive - in [...]"); - return 0; - } else if (multipleElement == true) { - NodeRangeValue<CLASS_TYPE>::addRange(lastElement, Node<CLASS_TYPE>::m_regExpData[kkk]); - multipleElement = false; - lastElement = 0; - } else if(Node<CLASS_TYPE>::m_regExpData[kkk] == regexpOpcodeTo) { - multipleElement = true; - } else { - if (lastElement != 0) { - NodeRangeValue<CLASS_TYPE>::addValue(lastElement); - } - lastElement = Node<CLASS_TYPE>::m_regExpData[kkk]; - } - } - if (lastElement != 0) { - NodeRangeValue<CLASS_TYPE>::addValue(lastElement); - } - return _data.size(); - }; -}; - -#undef __class__ -#define __class__ "regExp::NodeSOL" - -/** - * @not-in-doc - */ -template<class CLASS_TYPE> class NodeSOL : public Node<CLASS_TYPE> { - public : - /** - * @brief Constructor - */ - NodeSOL(int32_t _level) : Node<CLASS_TYPE>::Node(_level) { }; - /** - * @brief Destructor - */ - ~NodeSOL() { }; - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { - int32_t findLen = 0; - bool tmpFind = false; - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " SOL{" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "}"); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << createString(Node<CLASS_TYPE>::m_regExpData)); - // TODO : is it really what I want ... (maybe next ellement will be requested... (check if previous element is \r or \n - while ( _property.getMultiplicity() < Node<CLASS_TYPE>::m_multipleMax - && tmpFind == true - && _property.getMultiplicity() <_lenMax) { - char32_t tmpVal = _data[_currentPos+_property.getMultiplicity()]; - // TODO : check if the file is a \r\n file ... - if ( tmpVal == 0x0d /* <cr> */ - || tmpVal == 0x0A /* <lf> */) { - findLen += 1; - } else { - tmpFind=false; - } - _property.multiplicityIncrement(); - } - _property.setPositionStop(_property.getPositionStart() + findLen); - if( _property.getMultiplicity()>=Node<CLASS_TYPE>::m_multipleMin - && _property.getMultiplicity()<=Node<CLASS_TYPE>::m_multipleMax - && findLen>0 ) { - TK_REG_DEBUG("find " << findLen); - _property.setStatus(parseStatusFull); - return; - } else if( 0 == Node<CLASS_TYPE>::m_multipleMin ) { - TK_REG_DEBUG("find size=0"); - _property.setStatus(parseStatusFull); - return; - } - _property.setStatus(parseStatusNone); - return; - }; - void display() { - TK_INFO("Find NODE : " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << "@SOL@ {" - << Node<CLASS_TYPE>::m_multipleMin << "," - << Node<CLASS_TYPE>::m_multipleMax << "} subdata=" - << createString(Node<CLASS_TYPE>::m_regExpData) ); - }; -}; - - -class elementPos_ts { - public: - int64_t start; - int64_t stop; -}; - -#undef __class__ -#define __class__ "regExp::NodePTheseElem" - -template<class CLASS_TYPE> class NodePThese; - -/** - * @not-in-doc - */ -template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> { - protected : - // SubNodes : - std::vector<Node<CLASS_TYPE>*> m_subNode; - public : - /** - * @brief Constructor - */ - NodePTheseElem(int32_t _level) : Node<CLASS_TYPE>::Node(_level) { }; - NodePTheseElem(const std::vector<char32_t>& _data, int32_t _level) : Node<CLASS_TYPE>::Node(_level) { - generate(_data); - }; - /** - * @brief Destructor - */ - ~NodePTheseElem() { - /* - for (auto it : m_subNode) { - delete *it; - *it = nullptr; - } - */ - m_subNode.clear(); - }; - int32_t generate(const std::vector<char32_t>& _data) { - Node<CLASS_TYPE>::m_regExpData = _data; - TK_REG_DEBUG("Request Parse (elem) data=" << createString(Node<CLASS_TYPE>::m_regExpData) ); - int64_t pos = 0; - int64_t elementSize = 0; - std::vector<char32_t> tmpData; - while (pos < (int64_t)Node<CLASS_TYPE>::m_regExpData.size()) { - tmpData.clear(); - switch (Node<CLASS_TYPE>::m_regExpData[pos]) { - case regexpOpcodePTheseIn:{ - elementSize=getLenOfPThese(Node<CLASS_TYPE>::m_regExpData, pos); - for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { - tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); - } - // add to the subnode list : - m_subNode.push_back(new NodePThese<CLASS_TYPE>(tmpData, Node<CLASS_TYPE>::m_nodeLevel+1)); - // move current position ... - pos += elementSize+1; - } - break; - case regexpOpcodePTheseOut: - TK_ERROR("Impossible case : ')' " << pos); - return false; - case regexpOpcodeBracketIn: { - elementSize=getLenOfBracket(Node<CLASS_TYPE>::m_regExpData, pos); - for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { - tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); - } - // add to the subnode list : - m_subNode.push_back(new NodeBracket<CLASS_TYPE>(tmpData, Node<CLASS_TYPE>::m_nodeLevel+1)); - // move current position ... - pos += elementSize+1; - } - break; - case regexpOpcodeBracketOut: - TK_ERROR("Impossible case : ']' " << pos); - return false; - case regexpOpcodeBracetIn: { - elementSize=getLenOfBrace(Node<CLASS_TYPE>::m_regExpData, pos); - for (int64_t kkk=pos+1; kkk<pos+elementSize+1; kkk++) { - tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); - } - uint32_t min = 0; - uint32_t max = 0; - if (false == parseBrace(tmpData, min, max)) { - return false; - } - setMultiplicityOnLastNode(min, max); - pos += elementSize+1; - } - break; - case regexpOpcodeBracetOut: - TK_ERROR("Impossible case : '}' " << pos); - return false; - case regexpOpcodeTo: - TK_ERROR("Impossible case : '-' " << pos); - return false; - case regexpOpcodeStar: - setMultiplicityOnLastNode(0, 0x7FFFFFFF); - break; - case regexpOpcodeQuestion: - setMultiplicityOnLastNode(0, 1); - break; - case regexpOpcodePlus: - setMultiplicityOnLastNode(1, 0x7FFFFFFF); - break; - case regexpOpcodePipe: - TK_ERROR("Impossible case : '|' " << pos); - return false; - case regexpOpcodeEOF: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("EOF"); - tmpNode->addValue('\0'); - tmpNode->setCountOutput(false); - tmpNode->setMultiplicityAbility(false); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeDot: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("dot"); - tmpNode->addValue('\0'); - tmpNode->setInvertion(true); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeStartOfLine: - m_subNode.push_back(new NodeSOL<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1)); - break; - case regexpOpcodeEndOfLine: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("EOL"); - tmpNode->addValue('\n'); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeDigit: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("digit"); - tmpNode->addRange('0', '9'); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeDigitNot: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("digit-not"); - tmpNode->addRange('0', '9'); - tmpNode->setInvertion(true); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeLetter: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("letter"); - tmpNode->addRange('a', 'z'); - tmpNode->addRange('A', 'Z'); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeLetterNot: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("letter-not"); - tmpNode->addRange('a', 'z'); - tmpNode->addRange('A', 'Z'); - tmpNode->setInvertion(true); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeSpace: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("space"); - tmpNode->addValue(' '); - tmpNode->addValue('\t'); - tmpNode->addValue('\n'); - tmpNode->addValue('\r'); - tmpNode->addValue('\f'); - tmpNode->addValue('\v'); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeSpaceNot: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("space-not"); - tmpNode->addValue(' '); - tmpNode->addValue('\t'); - tmpNode->addValue('\n'); - tmpNode->addValue('\r'); - tmpNode->addValue('\f'); - tmpNode->addValue('\v'); - tmpNode->setInvertion(true); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeWord: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("word"); - tmpNode->addRange('a', 'z'); - tmpNode->addRange('A', 'Z'); - tmpNode->addRange('0', '9'); - m_subNode.push_back(tmpNode); - } - break; - case regexpOpcodeWordNot: - { - NodeRangeValue<CLASS_TYPE>* tmpNode = new NodeRangeValue<CLASS_TYPE>(Node<CLASS_TYPE>::m_nodeLevel+1); - tmpNode->setDescriptiveName("word-not"); - tmpNode->addRange('a', 'z'); - tmpNode->addRange('A', 'Z'); - tmpNode->addRange('0', '9'); - tmpNode->setInvertion(true); - m_subNode.push_back(tmpNode); - } - break; - - default: { - elementSize = getLenOfNormal(Node<CLASS_TYPE>::m_regExpData, pos); - for (int64_t kkk=pos; kkk<pos+elementSize; kkk++) { - tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); - } - // add to the subnode list : - m_subNode.push_back(new NodeValue<CLASS_TYPE>(tmpData, Node<CLASS_TYPE>::m_nodeLevel+1)); - // move current position ... - pos += elementSize-1; - } - break; - } - pos++; - } - return _data.size(); - }; - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { - //TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) data to parse : '" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'"); - //TK_REG_DEBUG_2("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) m_data='" << autoStr(Node<CLASS_TYPE>::m_data) << "'"); - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) " << _property); - int findLen = 0; - bool error = false; - size_t iii = 0; - int64_t tmpCurrentPos = _currentPos; - FindProperty prop; - if (_property.m_subProperty.size() != 0) { - // rewind the list: - bool findPartialNode = false; - for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) { - if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) { - findPartialNode = true; - prop = _property.m_subProperty[jjj]; - tmpCurrentPos = prop.getPositionStop(); - _property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end()); - iii = jjj; - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) rewind=" << iii); - break; - } - } - // We did not find the element : - if (findPartialNode == false) { - _property.m_subProperty.clear(); - _property.reset(); - prop.setPositionStart(tmpCurrentPos); - } - } else { - prop.setPositionStart(tmpCurrentPos); - } - while (iii < m_subNode.size()) { - //TK_REG_DEBUG_2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") data='" << autoStr(std::string(_data, tmpCurrentPos, _lenMax-tmpCurrentPos)) << "'"); - m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax, prop); - if (prop.getStatus() == parseStatusNone) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") ===None=== : " << prop); - // rewind the list: - bool findPartialNode = false; - for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) { - if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) { - findPartialNode = true; - prop = _property.m_subProperty[jjj]; - tmpCurrentPos = prop.getPositionStop(); - _property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end()); - iii = jjj; - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=?/" << m_subNode.size() << ") == rewind at " << iii << ""); - break; - } - } - // We did not find the element : - if (findPartialNode == false) { - _property.setStatus(parseStatusNone); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) return=" << _property); - return; - } else { - if (tmpCurrentPos >= (int64_t)_data.size()) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=?/" << m_subNode.size() << ") Reach end of buffer"); - _property.setStatus(parseStatusNone); - return; - } - //prop.setPositionStart(tmpCurrentPos); - continue; - } - } - if (prop.getPositionStart() > prop.getPositionStop()) { - TK_CRITICAL("Very bad case ... : " << prop); - } - tmpCurrentPos = prop.getPositionStop(); - _property.m_subProperty.push_back(prop); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") === OK === find : " << prop); - prop.reset(); - prop.setPositionStart(tmpCurrentPos); - iii++; - } - _property.setStatus(parseStatusFull); - // Display sub List : - for (auto &it : _property.m_subProperty) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) sub=" << it); - } - for (int64_t iii=_property.m_subProperty.size()-1; iii>=0; --iii) { - if (_property.m_subProperty[iii].getStatus() == parseStatusPartial) { - _property.setStatus(parseStatusPartial); - break; - } - } - if (_property.m_subProperty.size()>0) { - _property.setPositionStop(_property.m_subProperty.back().getPositionStop() ); - } else { - TK_WARNING("RegExp ERROR"); - } - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) return=" << _property); - } - - void display() { - TK_INFO("Find NODE : " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << "@(Elem)@ {" - << Node<CLASS_TYPE>::m_multipleMin << "," - << Node<CLASS_TYPE>::m_multipleMax << "} subdata=" - << createString(Node<CLASS_TYPE>::m_regExpData) ); - for(auto &it : m_subNode) { - it->display(); - } - }; - private : - /** - * @brief Set the number of repeate time on a the last node in the list ... - * @param[in] _min Minimum of the multiplicity - * @param[in] _max Maximum of the multiplicity - * @return true if we find the node, false otherwise - */ - bool setMultiplicityOnLastNode(uint32_t _min, uint32_t _max) { - if (m_subNode.size() == 0) { - TK_ERROR("Set multiplicity on an inexistant element ...."); - return false; - } - m_subNode.back()->setMult(_min, _max); - return true; - } -}; - -#undef __class__ -#define __class__ "regExp::NodePThese" - -/** - * @not-in-doc - */ -template<class CLASS_TYPE> class NodePThese : public Node<CLASS_TYPE> { - protected : - std::vector<Node<CLASS_TYPE>*> m_subNode; //!< Subnode list - public : - /** - * @brief Constructor - */ - NodePThese(int32_t _level=0) : Node<CLASS_TYPE>::Node(_level) { }; - NodePThese(const std::vector<char32_t>& _data, int32_t _level) : Node<CLASS_TYPE>::Node(_level) { - generate(_data); - }; - /** - * @brief Destructor - */ - ~NodePThese() { - /* - for (auto it : m_subNode) { - delete *it; - *it = nullptr; - } - */ - m_subNode.clear(); - } - int32_t generate(const std::vector<char32_t>& _data) { - Node<CLASS_TYPE>::m_regExpData = _data; - TK_REG_DEBUG("Request Parse (...) data=" << createString(Node<CLASS_TYPE>::m_regExpData) ); - //Find all the '|' in the string (and at the good level ...) - int64_t pos = 0; - int32_t elementSize = getLenOfPTheseElem(Node<CLASS_TYPE>::m_regExpData, pos); - // generate all the "elemTypePTheseElem" of the Node - while (elementSize>0) { - // geerate output deta ... - std::vector<char32_t> tmpData; - for (int64_t kkk=pos; kkk<pos+elementSize; kkk++) { - tmpData.push_back(Node<CLASS_TYPE>::m_regExpData[kkk]); - } - // add to the subnode list : - m_subNode.push_back(new NodePTheseElem<CLASS_TYPE>(tmpData, Node<CLASS_TYPE>::m_nodeLevel+1)); - pos += elementSize+1; - TK_REG_DEBUG("plop=" << createString(Node<CLASS_TYPE>::m_regExpData, pos, pos+1) ); - elementSize = getLenOfPTheseElem(Node<CLASS_TYPE>::m_regExpData, pos); - TK_REG_DEBUG("find " << elementSize << " elements"); - } - if ( pos == 0 - && elementSize == 0) { - TK_ERROR("No data in the (...) element at " << pos); - return false; - } - return _data.size(); - }; - virtual void parse(const CLASS_TYPE& _data, int64_t _currentPos, int64_t _lenMax, FindProperty& _property) { - - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) {" << Node<CLASS_TYPE>::m_multipleMin << "," << Node<CLASS_TYPE>::m_multipleMax << "}"); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " " << createString(Node<CLASS_TYPE>::m_regExpData)); - TK_REG_DEBUG_2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) data='" << autoStr(std::string(_data, _currentPos, _lenMax-_currentPos)) << "'"); - TK_REG_DEBUG_3(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) input property=" << _property); - if (0 == m_subNode.size()) { - _property.setStatus(parseStatusNone); - return; - } - if (_property.getStatus() != parseStatusPartial) { - if (Node<CLASS_TYPE>::m_multipleMin == 0) { - _property.setStatus(parseStatusPartial); - _property.setPositionStop(_property.getPositionStart()); - TK_REG_DEBUG("Parse " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " ==> partial (minSize=0)"); - return; - } - } - bool haveSubPartial = false; - for (int64_t iii=_property.m_subProperty.size()-1; iii>=0; --iii) { - if (_property.m_subProperty[iii].getStatus() == parseStatusPartial) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) Have partial"); - haveSubPartial = true; - break; - } - } - if ( haveSubPartial == false - && _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMax) { - _property.setStatus(parseStatusFull); - return; - } - int64_t tmpCurrentPos = _currentPos; - FindProperty prop; - size_t iiiStartPos = 0; - if (haveSubPartial == true) { - for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) { - if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) { - prop = _property.m_subProperty[jjj]; - tmpCurrentPos = prop.getPositionStop(); - _property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end()); - _property.setPositionStop(tmpCurrentPos); - iiiStartPos = prop.getSubIndex(); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) Rewind to " << iiiStartPos << " last elem=" << prop); - break; - } - } - } else { - if ( _property.getPositionStop() < 0 - && Node<CLASS_TYPE>::m_multipleMin == 0 - && _property.getMultiplicity() == 0) { - _property.setPositionStop(_property.getPositionStart()); - _property.setStatus(parseStatusPartial); - return; - } - prop.setPositionStart(tmpCurrentPos); - } - int32_t findLen = _property.getFindLen(); - int32_t offset = 0; - _property.setStatus(parseStatusFull); - bool tmpFind = true; - while ( _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax - && tmpFind == true) { - tmpFind = false; - if (tmpCurrentPos+offset>=_lenMax) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... ---/" << m_subNode.size() << ") ==> out of range : " << tmpCurrentPos << "+" << offset << " >= " << _lenMax); - prop.setStatus(parseStatusFull); - if (prop.getPositionStart() > prop.getPositionStop()) { - TK_CRITICAL("Very bad case ... : " << prop); - } - _property.m_subProperty.push_back(prop); - break; - } - for (size_t iii=iiiStartPos; iii<m_subNode.size() && tmpCurrentPos+offset<_lenMax; ++iii) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ")"); - m_subNode[iii]->parse(_data, tmpCurrentPos+offset, _lenMax, prop); - //offset = prop.getFindLen(); - if ( prop.getStatus() == parseStatusFull - || prop.getStatus() == parseStatusPartial) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ") --- OK --- prop=" << prop); - findLen += prop.getFindLen(); - offset += prop.getFindLen(); - prop.setSubIndex(iii); - if (prop.getPositionStart() > prop.getPositionStop()) { - TK_CRITICAL("Very bad case ... : " << prop); - } - _property.m_subProperty.push_back(prop); - tmpFind = true; - prop.reset(); - prop.setPositionStart(tmpCurrentPos+offset); - break; - } - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ") ---NONE---"); - prop.reset(); - prop.setPositionStart(tmpCurrentPos+offset); - } - iiiStartPos = 0; - if (tmpFind == true) { - _property.setMultiplicity(_property.m_subProperty.size()); - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) mult=" << _property.getMultiplicity() << " find " << findLen); - if (_property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin) { - _property.setStatus(parseStatusPartial); - break; - } - } - } - for (int64_t iii=_property.m_subProperty.size()-1; iii>=0; --iii) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) sub=" << _property.m_subProperty[iii]); - if (_property.m_subProperty[iii].getStatus() == parseStatusPartial) { - _property.setStatus(parseStatusPartial); - break; - } - } - if (_property.m_subProperty.size() == 0) { - _property.setPositionStop(_property.getPositionStart()); - } else { - _property.setPositionStop(_property.m_subProperty.back().getPositionStop()); - } - if( _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMin - && _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax - && findLen> 0 ) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) return=" << _property); - return; - } else if( 0 == Node<CLASS_TYPE>::m_multipleMin ) { - TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (...) return=" << _property); - return; - } - _property.setStatus(parseStatusNone); - return; - }; - - void display() { - if (9999 <= Node<CLASS_TYPE>::m_nodeLevel) { - TK_INFO("regExp :" << createString(Node<CLASS_TYPE>::m_regExpData) ); - } else { - TK_INFO("Find NODE : " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << "@(...)@ {" - << Node<CLASS_TYPE>::m_multipleMin << "," - << Node<CLASS_TYPE>::m_multipleMax << "} subdata=" - << createString(Node<CLASS_TYPE>::m_regExpData) ); - for(auto &it : m_subNode) { - it->display(); - } - } - }; - /** - * @brief Just display the regExp in color ... - */ - void drawColoredRegEx() { - TK_INFO("regExp :" << createString(Node<CLASS_TYPE>::m_regExpData) ); - } - /** - * @brief get the string represented the regexp (colored) - * @return Regexp string - */ - std::string getColoredRegEx() { - return createString(Node<CLASS_TYPE>::m_regExpData); - } -}; -} -#undef __class__ -#define __class__ "RegExp" - -/** - * @brief Regular expression interface template. - * - * List of elment that can be displayed : - * - * [pre] - * (...) sub element is separate with | - * \d Digits [0-9] - * \D NOT a digit [^0-9] - * \l Letters [a-zA-Z] - * \L NOT a Letter [^a-zA-Z] - * \s Whitespace [ \t\n\r\f\v] - * \S NOT Whitespace [^ \t\n\r\f\v] - * \w "Word" character [a-zA-Z0-9_] - * \W NOT a "Word" character [^a-zA-Z0-9_] - * \@ at the start or the end not in the parsing of element ==> check if \w is not present (other regExp will be <> ...) - * [anjdi] or [a-gt-j] range - * . dot [^\x00-\x08\x0A-\x1F\x7F] - * ==> TODO : - * $ End / Start of line of line ==> ce sera un truc supl� comme le \@ - * ^in the [] invertion of the range element - * - * multiplicity : - * * ==> {0, 2147483647} - * ? ==> {0, 1} - * + ==> {1, 2147483647} - * {x} ==> {x, x} - * {x,y} ==> {x, y} - * [/pre] - * - * @param[in] CLASS_TYPE Type of theclass that might be parsed. This class might have a interface : operator[] that return a char or a char32_t. - * - * Regular is easy to use: - */ -template<class CLASS_TYPE> class RegExp { - private: - std::u32string m_expressionRequested; //!< Regular expression parsed ... - regexp::elementPos_ts m_areaFind; //!< position around selection - regexp::NodePThese<CLASS_TYPE> m_exprRootNode; //!< The tree where data is set - bool m_isOk; //!< Known if we can process with this regExp - bool m_notBeginWithChar; //!< The regular expression must not have previously a char [a-zA-Z0-9_] - bool m_notEndWithChar; //!< The regular expression must not have after the end a char [a-zA-Z0-9_] - bool m_maximize; //!< by default the regexp find the minimum size of a regexp . - public: - // create the regular expression - - /** - * @brief Constructor - * @param[in,out] _exp Regular expression to parse - */ - RegExp(const std::u32string &_exp=U"") : - m_expressionRequested(U""), - m_isOk(false), - m_notBeginWithChar(false), - m_notEndWithChar(false), - m_maximize(false) { - m_areaFind.start=0; - m_areaFind.stop=0; - if (_exp.size() != 0) { - compile(_exp); - } - } - /** - * @previous - */ - RegExp(const std::string &_exp) : - m_expressionRequested(U""), - m_isOk(false), - m_notBeginWithChar(false), - m_notEndWithChar(false), - m_maximize(false) { - m_areaFind.start=0; - m_areaFind.stop=0; - if (_exp.size() != 0) { - compile(etk::to_u32string(_exp)); - } - }; - - /** - * @brief Destructor - */ - ~RegExp() { - m_isOk = false; - }; - /** - * @brief SetMaximizing of the regexp - * @param[in] _value Maximize or not the regExp - */ - void setMaximize(bool _value) { - m_maximize = _value; - } - /** - * @brief Set a new regular expression matching - * @param[in] _exp the new expression to search - */ - void compile(const std::string &_exp) { - if (_exp.size() != 0) { - TK_REG_DEBUG("normal string parse : '" << _exp << "'"); - compile(etk::to_u32string(_exp)); - } - } - /** - * @previous - */ - void compile(const std::u32string &_regexp) { - m_expressionRequested = _regexp; - std::vector<char32_t> tmpExp; - - TK_REG_DEBUG("---------------------------------------------------------------------"); - TK_REG_DEBUG("Parse RegExp : (" << m_expressionRequested << ")" ); - m_isOk = false; - m_areaFind.start=0; - m_areaFind.stop=0; - m_notBeginWithChar = false; - m_notEndWithChar = false; - - // change in the regular Opcode ==> replace \x with the corect element ... x if needed - int32_t countBraceIn = 0; - int32_t countBraceOut = 0; - int32_t countPTheseIn = 0; - int32_t countPTheseOut = 0; - int32_t countBracketIn = 0; - int32_t countBracketOut = 0; - for (int64_t iii=0; iii<(int64_t)_regexp.size(); iii++) { - if (_regexp[iii] == '\\') { - if(iii+1>=(int64_t)_regexp.size()) { - TK_ERROR("Dangerous parse of the element pos " << iii << " \\ with nothing after"); - // TODO : Generate Exeption ... - return; - } - int64_t jjj; - // Find the element in the list... - for (jjj=0; jjj<regexp::constConvertionTableSize; jjj++) { - if ( regexp::constConvertionTable[jjj].haveBackSlash == true - && _regexp[iii+1] == (char32_t)regexp::constConvertionTable[jjj].inputValue) { - if (regexp::constConvertionTable[jjj].newValue==0) { - tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar); - } else { - tmpExp.push_back(regexp::constConvertionTable[jjj].newValue); - } - break; - } - } - // check error : - if (jjj==regexp::constConvertionTableSize) { - TK_WARNING(" parse : " << _regexp); - TK_WARNING(" " << etk::regexp::strTick(iii+1)); - TK_ERROR("Dangerous parse of the \\x with the value : '" << _regexp[iii+1] << "' at element " << iii); - return; - } - // less one char in the regular expression ... - iii++; - } else { - if (_regexp[iii] == '(') { - countPTheseIn++; - } else if (_regexp[iii] == ')') { - countPTheseOut++; - } else if (_regexp[iii] == '[') { - countBracketIn++; - } else if (_regexp[iii] == ']') { - countBracketOut++; - } else if (_regexp[iii] == '{') { - countBraceIn++; - } else if (_regexp[iii] == '}') { - countBraceOut++; - } - int64_t jjj; - // find the element in the list... - for (jjj=0; jjj<regexp::constConvertionTableSize; jjj++) { - if( false == regexp::constConvertionTable[jjj].haveBackSlash - && _regexp[iii] == (char32_t)regexp::constConvertionTable[jjj].inputValue) - { - if (regexp::constConvertionTable[jjj].newValue==0) { - tmpExp.push_back(regexp::constConvertionTable[jjj].specialChar); - } else { - tmpExp.push_back(regexp::constConvertionTable[jjj].newValue); - } - break; - } - } - // not find : normal element - if (jjj==regexp::constConvertionTableSize) { - //TK_REG_DEBUG("parse : '" << _regexp[iii] << "'" ); - tmpExp.push_back(_regexp[iii]); - } - } - } - - // count the number of '(' and ')' - if (countPTheseIn != countPTheseOut ) { - TK_ERROR("Error in the number of '('=" << countPTheseIn << " and ')'=" << countPTheseOut << " elements"); - return; - } - // count the number of '{' and '}' - if (countBraceIn != countBraceOut ) { - TK_ERROR("Error in the number of '{'=" << countBraceIn << " and '}'=" << countBraceOut << " elements"); - return; - } - // count the number of '[' and ']' - if (countBracketIn != countBracketOut ) { - TK_ERROR("Error in the number of '['=" << countBracketIn << " and ']'=" << countBracketOut << " elements"); - return; - } - // need to check if all () [] and {} is well set ... - if (false == checkGoodPosition(tmpExp) ) { - return; - } - - //TK_REG_DEBUG("Main element :" << createString(tmpExp) ); - if ( tmpExp.size()>0 - && tmpExp[0] == regexpOpcodeNoChar) - { - //TK_DEBUG("=> must not begin with char"); - m_notBeginWithChar = true; - // remove element - tmpExp.erase(tmpExp.begin()); - } - if ( tmpExp.size()>0 - && tmpExp[tmpExp.size()-1] == regexpOpcodeNoChar) - { - //TK_DEBUG("=> must not end with char"); - m_notEndWithChar = true; - // remove element - tmpExp.erase(tmpExp.end()-1); - } - - if ((int64_t)tmpExp.size() != (int64_t)m_exprRootNode.generate(tmpExp) ) { - return; - } - // TODO : optimize node here ... - //drawColoredRegEx(); - //display(); - - // all OK ... play again - m_isOk = true; - }; - - /** - * @brief Get the regular expression string - * @return the string representing the RegExp - */ - std::string getRegExp() const { - return etk::to_string(m_expressionRequested); - }; - /** - * @previous - */ - const std::u32string& getURegExp() const { - return m_expressionRequested; - }; - - /** - * @brief Get the status if the regular expression parsing - * @return true : the regExp is correctly parsed - * @return false : an error occcured (check log ...) - */ - bool getStatus() { - return m_isOk; - }; - // process the regular expression - - /** - * @brief Parse the defined data with the compiled regular expression. - * @param[in] _SearchIn Data where to search the regular expression. - * @param[in] _startPos start position to search - * @param[in] _endPos end position to search - * @return true : find something, false otherwise - */ - bool parse(const CLASS_TYPE& _SearchIn, - int64_t _startPos, - int64_t _endPos) { - if (false == m_isOk) { - return false; - } - int64_t buflen = _SearchIn.size(); - if (_endPos > buflen) { - _endPos = buflen; - } - if (_startPos > _endPos) { - return false; - } - for (int64_t iii=_startPos; iii<_endPos; iii++) { - int64_t findLen=0; - int64_t maxlen = _endPos-iii; - TK_REG_DEBUG("----------------------------------------------"); - TK_REG_DEBUG("parse element : " << iii << " : '" << _SearchIn[iii] << "'"); - if (true == m_notBeginWithChar) { - if (iii>0) { - char32_t tmpVal = _SearchIn[iii-1]; - if( ( tmpVal >= 'a' - && tmpVal <= 'z' ) - || ( tmpVal >= 'A' - && tmpVal <= 'Z' ) - || ( tmpVal >= '0' - && tmpVal <= '9' ) - || ( tmpVal == '_' ) ) { - // go on the next char ... - continue; - } - } - } - regexp::FindProperty prop; - prop.setPositionStart(iii); - bool needOneMoreCycle = true; - bool oneCycleDone = false; - while (needOneMoreCycle == true) { - needOneMoreCycle = false; - m_exprRootNode.parse(_SearchIn, iii, _endPos, prop); - TK_REG_DEBUG("res=" << prop.getStatus()); - if ( prop.getStatus() == regexp::parseStatusNone - && m_maximize == true - && oneCycleDone == false) { - // TODO : do it better Patch the case of ".*" seach with maximizing - oneCycleDone = true; - needOneMoreCycle = true; - } - if ( prop.getStatus() == regexp::parseStatusFull - || prop.getStatus() == regexp::parseStatusPartial ) { - findLen = prop.getFindLen(); - TK_REG_DEBUG_3("main search find : " << findLen << " elements data=" << std::string(_SearchIn, prop.getPositionStart(), prop.getFindLen())); - // Check end : - if (m_notEndWithChar == true) { - TK_REG_DEBUG("Check end is not a char: '" << (char)_SearchIn[iii+findLen] << "'"); - if (_startPos+findLen < (int64_t)_SearchIn.size() ) { - char32_t tmpVal = _SearchIn[iii+findLen]; - if( ( tmpVal >= 'a' - && tmpVal <= 'z' ) - || ( tmpVal >= 'A' - && tmpVal <= 'Z' ) - || ( tmpVal >= '0' - && tmpVal <= '9' ) - || ( tmpVal == '_' ) ) { - // go on the next char ... - TK_REG_DEBUG("Need one more cycle ..."); - needOneMoreCycle = true; - } - } - } - if ( m_maximize == true - && prop.getStatus() == regexp::parseStatusPartial) { - needOneMoreCycle = true; - } - if (needOneMoreCycle == false) { - m_areaFind.start = iii; - m_areaFind.stop = iii + findLen; - return true; - } - if (prop.getStatus() == regexp::parseStatusFull) { - // We really not find the elemnent ==> stop ... - break; - } - } - } - } - return false; - }; - - - bool processOneElement(const CLASS_TYPE& _SearchIn, - int64_t _startPos, - int64_t _endPos) { - if (false == m_isOk) { - return false; - } - int64_t buflen = _SearchIn.size(); - if (_endPos > buflen) { - _endPos = buflen; - } - if (_startPos > _endPos) { - return false; - } - int64_t findLen=0; - int64_t maxlen = _endPos-_startPos; - if (true == m_notBeginWithChar) { - if (_startPos>0) { - char32_t tmpVal = _SearchIn[_startPos-1]; - if( ( tmpVal >= 'a' - && tmpVal <= 'z' ) - || ( tmpVal >= 'A' - && tmpVal <= 'Z' ) - || ( tmpVal >= '0' - && tmpVal <= '9' ) - || ( tmpVal == '_' ) ) { - // go on the next char ... - return false; - } - } - } - regexp::FindProperty prop; - prop.setPositionStart(_startPos); - bool needOneMoreCycle = true; - while (needOneMoreCycle == true) { - needOneMoreCycle = false; - m_exprRootNode.parse(_SearchIn, _startPos, _endPos, prop); - if ( prop.getStatus() == regexp::parseStatusFull - || prop.getStatus() == regexp::parseStatusPartial ) { - findLen = prop.getFindLen(); - TK_REG_DEBUG_3("main search find : " << findLen << " elements"); - // Check end : - if (m_notEndWithChar == true) { - if (_startPos+findLen < (int64_t)_SearchIn.size() ) { - char32_t tmpVal = _SearchIn[_startPos+findLen]; - if( ( tmpVal >= 'a' - && tmpVal <= 'z' ) - || ( tmpVal >= 'A' - && tmpVal <= 'Z' ) - || ( tmpVal >= '0' - && tmpVal <= '9' ) - || ( tmpVal == '_' ) ) { - // go on the next char ... - needOneMoreCycle = true; - } - } - } - if ( m_maximize == true - && prop.getStatus() == regexp::parseStatusPartial) { - needOneMoreCycle = true; - } - if (needOneMoreCycle == false) { - m_areaFind.start = _startPos; - m_areaFind.stop = _startPos + findLen; - return true; - } - if (prop.getStatus() == regexp::parseStatusFull) { - // We really not find the elemnent ==> stop ... - return false; - } - } - } - return false; - }; - - - /** - * @brief Get the expression start position detected - * @return position of the start regExp - */ - int64_t start() { - return m_areaFind.start; - }; - - /** - * @brief Get the expression stop position detected - * @return position of the stop regExp - */ - int64_t stop() { - return m_areaFind.stop; - }; - - /** - * @brief Display the reg Exp - */ - void display() { - m_exprRootNode.display(); - }; - /** - * @brief Just display the regExp in color ... - */ - void drawColoredRegEx() { - m_exprRootNode.drawColoredRegEx(); - } - /** - * @brief Get decorated regular expression. This generate a [class[ewol::compositing::Text]] decoration text. Note that can be use in [class[ewol::widget::Label]]. - * @return The decorated string - */ - std::string getRegExDecorated() { - return m_exprRootNode.getColoredRegEx(); - } - private: - /** - * @brief Check forbidden element in a regular expression element. - * @param[in] _tmpExp The regular expression to check. - * @param[in] _pos Position to start the check. - * @return true The current node is correct. - * @return false An error in parsing has appeared. - */ - bool checkGoodPosition(const std::vector<char32_t>& _tmpExp, int64_t& _pos) { - char32_t curentCode = _tmpExp[_pos]; - char32_t endCode = regexpOpcodePTheseOut; - const char *input = "(...)"; - if (curentCode == regexpOpcodeBracketIn) { - endCode = regexpOpcodeBracketOut; - input = "[...]"; - } else if (curentCode == regexpOpcodeBracetIn){ - endCode = regexpOpcodeBracetOut; - input = "{x,x}"; - } - _pos++; - if (_pos >= (int64_t)_tmpExp.size()) { - TK_ERROR("ended with: ( or { or [ ... not permited"); - return false; - } - //TK_DEBUG(" ==> Find ELEMENT : ([{"); - // case dependent : - if ( curentCode == regexpOpcodeBracketIn - || curentCode == regexpOpcodeBracetIn) { - while(_pos<(int64_t)_tmpExp.size()) { - //TK_DEBUG("check : " << tmpExp[pos]); - // if we find the end : - if (endCode == _tmpExp[_pos]) { - return true; - } else { - // otherwise, we check the error in the element ... - char *find = NULL; - switch (_tmpExp[_pos]) { - case regexpOpcodePTheseIn: find = (char*)"("; break; - case regexpOpcodeBracketIn: find = (char*)"["; break; - case regexpOpcodeBracetIn: find = (char*)"{"; break; - case regexpOpcodePTheseOut: find = (char*)")"; break; - case regexpOpcodeBracketOut: find = (char*)"]"; break; - case regexpOpcodeBracetOut: find = (char*)"}"; break; - case regexpOpcodeStar: find = (char*)"*"; break; - case regexpOpcodeDot: find = (char*)"."; break; - case regexpOpcodeQuestion: find = (char*)"?"; break; - case regexpOpcodePlus: find = (char*)"+"; break; - case regexpOpcodePipe: find = (char*)"|"; break; - case regexpOpcodeStartOfLine: find = (char*)"^"; break; - case regexpOpcodeEndOfLine: find = (char*)"$"; break; - case regexpOpcodeDigit: find = (char*)"\\d"; break; - case regexpOpcodeDigitNot: find = (char*)"\\D"; break; - case regexpOpcodeLetter: find = (char*)"\\l"; break; - case regexpOpcodeLetterNot: find = (char*)"\\L"; break; - case regexpOpcodeSpace: find = (char*)"\\s"; break; - case regexpOpcodeSpaceNot: find = (char*)"\\S"; break; - case regexpOpcodeWord: find = (char*)"\\w"; break; - case regexpOpcodeWordNot: find = (char*)"\\W"; break; - case regexpOpcodeNoChar: find = (char*)"\\@"; break; - default: break; - } - if (NULL != find) { - (void)input; - TK_ERROR("can not have : '" << find << "' inside " << input << " element"); - return false; - } - } - _pos++; - } - } else { - while(_pos< (int64_t)_tmpExp.size()) { - if (endCode == _tmpExp[_pos]) { - // find the last element - return true; - } else if ( _tmpExp[_pos] == regexpOpcodeBracetOut) { - TK_ERROR("find } inside a (...) without start {"); - return false; - } else if ( _tmpExp[_pos] == regexpOpcodeBracketOut) { - TK_ERROR("find ] inside a (...) without start ["); - return false; - } else { - if( _tmpExp[_pos] == regexpOpcodePTheseIn - || _tmpExp[_pos] == regexpOpcodeBracketIn - || _tmpExp[_pos] == regexpOpcodeBracetIn ) { - if (false==checkGoodPosition(_tmpExp, _pos) ) { - return false; - } - } - } - _pos++; - } - } - - // we did not find the cloder . ... - if (endCode == regexpOpcodeBracketOut) { - TK_ERROR("Missing ']' at the end"); - } - if (endCode == regexpOpcodeBracetOut) { - TK_ERROR("Missing '}' at the end"); - } - if (endCode == regexpOpcodePTheseOut) { - TK_ERROR("Missing ')' at the end"); - } - return false; - }; - - /** - * @brief Check all the element in a regular expression ( count [],{},(),...) - * @param[in] _tmpExp Regular expression to check. - * @return true The regular expression is correct. - * @return false an error occured in the regular expression. - */ - bool checkGoodPosition(const std::vector<char32_t>& _tmpExp) { - int64_t pos = 0; - while (pos < (int64_t)_tmpExp.size()) { - //TK_DEBUG("check : " << tmpExp[pos]); - if( _tmpExp[pos] == regexpOpcodePTheseIn - || _tmpExp[pos] == regexpOpcodeBracketIn - || _tmpExp[pos] == regexpOpcodeBracetIn) - { - // attention the i position change inside the finction... - if (false==checkGoodPosition(_tmpExp, pos) ) { - TK_ERROR("Error at position : " << pos+1 ); - return false; - } else { - //TK_DEBUG(" <== Find ELEMENT : ]})"); - } - } else if(_tmpExp[pos] == regexpOpcodePTheseOut) { - TK_ERROR("can find ')' with no start : ')'"); - return false; - } else if(_tmpExp[pos] == regexpOpcodeBracketOut) { - TK_ERROR("can find ']' with no start : '['"); - return false; - } else if(_tmpExp[pos] == regexpOpcodeBracetOut) { - TK_ERROR("can find '}' with no start : '{'"); - return false; - } - pos++; - } - return true; - }; - public: - /* **************************************************** - * == operator - *****************************************************/ - bool operator== (const RegExp<CLASS_TYPE>& _obj) const { - return _obj.m_expressionRequested == m_expressionRequested; - } - bool operator!= (const RegExp<CLASS_TYPE>& _obj) const { - return _obj.m_expressionRequested != m_expressionRequested; - } - - -}; - -}; // end of etk namespace - -#undef __class__ -#define __class__ (NULL) - -#endif diff --git a/etk/stdTools.cpp b/etk/stdTools.cpp index b78a5a6..19be112 100644 --- a/etk/stdTools.cpp +++ b/etk/stdTools.cpp @@ -237,6 +237,76 @@ std::u32string utf8::convertUnicode(const std::string& _input) { return U"TODO ... std::u32string utf8::convertUnicode(const std::string& _input)"; } +utf8::iterator& utf8::iterator::operator++ () { + m_value = u32char::Null; + if (m_current <= 0) { + m_current = 0; + return *this; + } + if (m_data != nullptr) { + if (m_current < (int64_t)m_data->size() ) { + int8_t nbChar = utf8::theoricLen((*m_data)[m_current]); + if (nbChar != 0) { + m_current+=nbChar; + } else { + m_current++; + } + } + if (m_current >= (int64_t)m_data->size()) { + m_current = m_data->size(); + } + } + return *this; +} + +utf8::iterator& utf8::iterator::operator-- () { + m_value = u32char::Null; + if (m_data != nullptr) { + if (m_current > 0) { + int32_t iii = -1; + while( utf8::theoricFirst((*m_data)[m_current+iii]) == false + && iii >= -6 + && m_current-iii>0) { + --iii; + }; + m_current += iii; + } else { + m_current = 0; + } + } else { + m_current = 0; + } + if (m_current < 0) { + m_current = 0; + } + return *this; +} + +char32_t utf8::iterator::operator* () { + if (m_value != u32char::Null) { + return m_value; + } + if (m_data == nullptr) { + TK_ERROR("request an element that iterator not link"); + return m_value; + } + if ( m_current < 0 + || m_current >= (int64_t)m_data->size()) { + TK_ERROR("request an element out of bounding !!! 0 <= " << m_current << " < " << m_data->size()); + return m_value; + } + char tmpVal[5]; + memset(tmpVal, 0, sizeof(tmpVal)); + tmpVal[0] = (*m_data)[m_current]; + int8_t nbChar = utf8::theoricLen(tmpVal[0]); + for (int32_t iii=1; iii<nbChar && m_current+iii<(int64_t)m_data->size(); ++iii) { + tmpVal[iii] = (*m_data)[m_current+iii]; + } + // transform ... + m_value = utf8::convertChar32(tmpVal); + return m_value; +} + #undef __class__ #define __class__ "etk" diff --git a/etk/stdTools.h b/etk/stdTools.h index 684c1bf..a44d54b 100644 --- a/etk/stdTools.h +++ b/etk/stdTools.h @@ -60,6 +60,292 @@ namespace utf8 { char32_t convertChar32(const char* _input); std::u32string convertUnicode(const std::string& _input); + + class iterator { + private: + char32_t m_value; //!< store vlue to prevent multiple calcule of getting the data + std::string* m_data; //!< Pointer on the current Buffer + int64_t m_current; //!< curent Id in the Buffer + public: + iterator(): + m_value(u32char::Null), + m_data(nullptr), + m_current(0) { + // nothing to do ... + }; + iterator(std::string& _str) : + m_value(u32char::Null), + m_data(&_str), + m_current(0) { + // nothing to do ... + }; + iterator(std::string& _str, const std::string::iterator& _pos) : + m_value(u32char::Null), + m_data(&_str), + m_current(0) { + if (m_data != nullptr) { + m_current = std::distance(m_data->begin(), _pos); + } + }; + iterator(std::string& _str, size_t _pos) : + m_value(u32char::Null), + m_data(&_str), + m_current(0) { + if (m_data != nullptr) { + if (_pos > m_data->size()) { + m_current = m_data->size(); + } else { + m_current = _pos; + } + } + }; + iterator(std::string* _str, const std::string::iterator& _pos) : + m_value(u32char::Null), + m_data(_str), + m_current(0) { + if (m_data != nullptr) { + m_current = std::distance(m_data->begin(), _pos); + } + }; + iterator(std::string* _str, size_t _pos) : + m_value(u32char::Null), + m_data(_str), + m_current(0) { + if (m_data != nullptr) { + if (_pos > m_data->size()) { + m_current = m_data->size(); + } else { + m_current = _pos; + } + } + }; + /** + * @brief Recopy constructor. + * @param[in] _obj The Iterator that might be copy + */ + iterator(const iterator& _obj): + m_value(u32char::Null), + m_data(_obj.m_data), + m_current(_obj.m_current) { + // nothing to do ... + }; + /** + * @brief Asignation operator. + * @param[in] _otherIterator The Iterator that might be copy + * @return reference on the curent Iterator + */ + iterator& operator=(const iterator & _obj) { + m_current = _obj.m_current; + m_data = _obj.m_data; + m_value = u32char::Null; + return *this; + }; + /** + * @brief Basic destructor + */ + virtual ~iterator() { + m_current = 0; + m_data = nullptr; + m_value = u32char::Null; + }; + /** + * @brief basic boolean cast + * @return true if the element is present in buffer + */ + operator size_t () const { + if (m_data == nullptr) { + return 0; + } + if (m_current < 0) { + return 0; + } + if (m_current > (int64_t)m_data->size()) { + return m_data->size(); + } + return (size_t)m_current; + }; + /** + * @brief Incremental operator + * @return Reference on the current iterator incremented + */ + iterator& operator++ (); + /** + * @brief Decremental operator + * @return Reference on the current iterator decremented + */ + iterator& operator-- (); + /** + * @brief Incremental operator + * @return Reference on a new iterator and increment the other one + */ + iterator operator++ (int32_t) { + iterator it(*this); + ++(*this); + return it; + }; + /** + * @brief Decremental operator + * @return Reference on a new iterator and decrement the other one + */ + iterator operator-- (int32_t) { + iterator it(*this); + --(*this); + return it; + }; + /** + * @brief egality iterator + * @return true if the iterator is identical pos + */ + bool operator== (const iterator& _obj) const { + if ( m_current == _obj.m_current + && m_data == _obj.m_data) { + return true; + } + return false; + }; + /** + * @brief egality iterator + * @return true if the iterator is identical pos + */ + bool operator!= (const iterator& _obj) const { + if ( m_current != _obj.m_current + || m_data != _obj.m_data) { + return true; + } + return false; + }; + /** + * @brief <= iterator + * @return true if the iterator is identical pos + */ + bool operator<= (const iterator& _obj) const { + if (m_data != _obj.m_data) { + return false; + } + if (m_current <= _obj.m_current) { + return true; + } + return false; + }; + /** + * @brief >= iterator + * @return true if the iterator is identical pos + */ + bool operator>= (const iterator& _obj) const { + if (m_data != _obj.m_data) { + return false; + } + if (m_current >= _obj.m_current) { + return true; + } + return false; + }; + /** + * @brief < iterator + * @return true if the iterator is identical pos + */ + bool operator< (const iterator& _obj) const { + if (m_data != _obj.m_data) { + return false; + } + if (m_current < _obj.m_current) { + return true; + } + return false; + }; + /** + * @brief > iterator + * @return true if the iterator is identical pos + */ + bool operator> (const iterator& _obj) const { + if (m_data != _obj.m_data) { + return false; + } + if (m_current > _obj.m_current) { + return true; + } + return false; + }; + /** + * @brief Get the value on the current element + * @return The request element value + */ + char32_t operator* (); + /** + * @brief Get the position in the buffer + * @return The requested position. + */ + size_t getPos() const { + if (m_data == nullptr) { + return 0; + } + if (m_current < 0) { + return 0; + } + if (m_current >= (int64_t)m_data->size()) { + return m_data->size()-1; + } + return (size_t)m_current; + }; + /** + * @brief move the element position + * @return a new iterator. + */ + iterator operator+ (const int64_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<_val; ++iii) { + ++tmpp; + } + return tmpp; + }; + iterator operator+ (const int32_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<_val; ++iii) { + ++tmpp; + } + return tmpp; + }; + iterator operator+ (const size_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<(int64_t)_val; ++iii) { + ++tmpp; + } + return tmpp; + }; + /** + * @brief move the element position + * @return a new iterator. + */ + iterator operator- (const int64_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<_val; ++iii) { + --tmpp; + } + return tmpp; + }; + iterator operator- (const int32_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<_val; ++iii) { + --tmpp; + } + return tmpp; + }; + iterator operator- (const size_t _val) const { + iterator tmpp(*this); + for (int64_t iii=0; iii<(int64_t)_val; ++iii) { + --tmpp; + } + return tmpp; + }; + /* + iterator begin() const { + return iterator(m_data); + } + iterator end() const { + return --iterator(m_data, m_data.end()); + } + */ + }; }; namespace std { diff --git a/lutin_etk.py b/lutin_etk.py index e0e3fd5..d2d88b7 100644 --- a/lutin_etk.py +++ b/lutin_etk.py @@ -16,7 +16,6 @@ def create(target): 'etk/debug.cpp', 'etk/stdTools.cpp', 'etk/log.cpp', - 'etk/RegExp.cpp', 'etk/tool.cpp', 'etk/Noise.cpp', 'etk/Color.cpp', diff --git a/test/main.cpp b/test/main.cpp index a2de6ab..7dc7a0c 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -15,10 +15,8 @@ #include <etk/archive/Archive.h> #include <etk/log.h> #include <etk/Color.h> -#include <etk/RegExp.h> #include <etk/stdTools.h> #include <string> -#include <regex> #undef __class__ #define __class__ "etktest" @@ -191,545 +189,17 @@ void testRegExpSingle(const std::string& _expression, const std::string& _search } } - -void testRegExp() { - std::string data; - //std::string data = " a /* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n"; - //std::string data = "alpha /* plop */ test"; - //std::string data = "pp \n // qdfqdfsdf \nde"; - //testRegExpSingle("/\\*.*\\*/", data); - //testRegExpSingle("//.*$", data); - //testRegExpSingle("/\\*.*", data); - //testRegExpSingle("[a-z]", data); - //std::string data = " eesd a lzzml plophzzzzzhhhhhrlkmlkml"; - //testRegExpSingle("a.*plop(z{2,3}|h+)+r", data); - - //std::string data = "pp \n# plop // qdfqdfsdf \nde"; - //std::string data = "pp \n# plop //\\\n qdfqdfsdf \nde"; - //std::string data = "p#\ne"; - //testRegExpSingle("#(\\\\\\\\|\\\\\\n|.)*$", data); - //testRegExpSingle("#.*$", data); - - //std::string data = "p//TODO:\ndse"; - //std::string data = "p// TODO:\ndse"; - //std::string data = "p// TODO :\ndse"; - //std::string data = "p// TODO : sdfgsdfsd \ndse"; - //testRegExpSingle("//[ \\t]*TODO[ \\t]*:.*$", data); - - data = "abc m_def ghi"; - data = " protected:\n" - " vec2 m_offset; \n"; - //testRegExpSingle("\\@m_[A-Za-z_0-9]*\\@", data); - - - data = " * @param[in] _mode Configuring mode.\n" - " * @param[in] _time Time in second of the annimation display\n" - " */\n" - " void setAnnimationTime(enum "; - data = "virtual vec2 relativePosition(const vec2& _pos);"; - - //testRegExpSingle("\\@(\\w|_)+[ \\t]*\\(", data); - - data = "include <ewol/Dimensio2n.h>\n" - "#include <ewol/Dimension.h>\n" - "'dfgd\'fg'\n" - "\"dqf\\\"gsdfg\" // \"\n" - "// TODO : sqdkfjsdldkqfj\n" - "\n" - "namespace ewol {\n" - " class Widget;\n" - " namespace widget {\n" - " class Manager;\n" - " class Windows;\n" - " };\n" - "};\n" - "#include <etk/types.h>\n"; - //testRegExpSingle("#(\\\\[\\\\\\n]|.)*$", data); - - - data = " 'dfgd\\'fg' \n" - " vec2 m_offset; \n"; - //testRegExpSingle("'((\\\\[\\\\'])|.)*'", data); - - - /* - data = "ddfgdfgh"; - etk::RegExp<std::string> reg(".*"); - reg.setMaximize(true); - - TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); - if (reg.parse(data, 0, data.size()) == true) { - //if (reg.processOneElement(data, 0, data.size()) == true) { - TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); - TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); - } - - data = "plop \"\" sdfsdf s\"swdfsqd sdfgsdfg \" \" sdfsf"; - reg = etk::RegExp<std::string>("\"(\\\\[\\\\\"]|.)*\""); - reg.setMaximize(false); - TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); - if (reg.parse(data, 0, data.size()) == true) { - //if (reg.processOneElement(data, 0, data.size()) == true) { - TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); - TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); - } - //TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*" - - data = "void limit(const vec2& _origin, const vec2& _size);\n"; - reg = etk::RegExp<std::string>("\\@(\\w|_)+[ \\t]*\\("); - reg.setMaximize(false); - TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'"); - if (reg.parse(data, 0, data.size()) == true) { - //if (reg.processOneElement(data, 0, data.size()) == true) { - TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); - TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); - } - data = "void limit const vec2& _origin, const vec2& _size);\n"; - if (reg.parse(data, 0, data.size()) == true) { - //if (reg.processOneElement(data, 0, data.size()) == true) { - TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] "); - TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'"); - } - */ - /* - std::tr1::cmatch res; - str = "<h2>Egg prices</h2>"; - std::tr1::regex rx("<h(.)>([^<]+)"); - std::tr1::regex_search(str.c_str(), res, rx); - std::cout << res[1] << ". " << res[2] << "\n"; - */ - { - std::string lines[] = {"Roses are #ff0000", - "violets are #0000ff", - "all of my base are belong to you"}; - - std::regex myRegex("#([a-f0-9]{6})"); - /* - for (const auto &line : lines) { - std::cout << line << ": " << std::regex_search(line, color_regex) << '\n'; - } - */ - - std::smatch resultMatch; - for (const auto &line : lines) { - TK_DEBUG("in line : '" << line << "'"); - std::regex_search(line, resultMatch, myRegex); - TK_DEBUG(" Find " << resultMatch.size() << " elements"); - for (size_t iii=0; iii<resultMatch.size(); ++iii) { - int32_t posStart = std::distance(line.begin(), resultMatch[iii].first); - int32_t posStop = std::distance(line.begin(), resultMatch[iii].second); - TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].first); - TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].second); - TK_DEBUG(" [" << iii << "] " << std::string(line, posStart, posStop-posStart)); - /* - std::ssub_match sub_match = color_match[i]; - std::string sub_match_str = sub_match.str(); - */ - } - } - } - - { - const std::string myData = "void limit(const vec2& _origin, const vec2& _size);\n plop(sf)"; - std::regex myRegex("\\b(\\w|_)+[ \\t]*\\("); - - std::smatch resultMatch; - TK_DEBUG("in line : '" << myData << "'"); - std::regex_search(myData, resultMatch, myRegex); - TK_DEBUG(" Find " << resultMatch.size() << " elements"); - for (size_t iii=0; iii<resultMatch.size(); ++iii) { - int32_t posStart = std::distance(myData.begin(), resultMatch[iii].first); - int32_t posStop = std::distance(myData.begin(), resultMatch[iii].second); - TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].first); - TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].second); - TK_DEBUG(" [" << iii << "] " << std::string(myData, posStart, posStop-posStart)); - - } - - } - - { - const std::u32string data = utf8::convertUnicode("kjhkjhk"); - const std::u32string data2(U"kjhkjhk"); - const std::string data3("kjhkjhk"); - const char32_t data5[] = U"kjhkjhk"; - //std::basic_regex<char32_t, std::u32string> regexp(data2); - //std::basic_regex<char32_t> regexp((const char32_t*)data2.c_str()); - std::basic_regex<char32_t> regexp2(data5); - } -} -#if 0 -// http://en.cppreference.com/w/cpp/regex/regex_traits/lookup_classname -namespace std { - // specify char32_t traits - /** - * @brief Describes aspects of a regular expression. - * - * A regular expression traits class that satisfies the requirements of - * section [28.7]. - * - * The class %regex is paramete rized around a set of related types and - * functions used to complete the definition of its semantics. This class - * satisfies the requirements of such a traits class. - */ - template<> struct regex_traits<char32_t> { - public: - typedef _Ch_type char32_t; - typedef std::basic_string<char32_t> string_type; - typedef std::locale locale_type; - private: - struct _RegexMask { - typedef typename std::ctype<char32_t>::mask _BaseType; - _BaseType _M_base; - unsigned char _M_extended; - static constexpr unsigned char _S_under = 1 << 0; - // FIXME: _S_blank should be removed in the future, - // when locale's complete. - static constexpr unsigned char _S_blank = 1 << 1; - static constexpr unsigned char _S_valid_mask = 0x3; - constexpr _RegexMask(_BaseType __base = 0, unsigned char __extended = 0) : - _M_base(__base), _M_extended(__extended) { - - } - constexpr _RegexMask operator&(_RegexMask __other) const { - return _RegexMask(_M_base & __other._M_base, _M_extended & __other._M_extended); - } - constexpr _RegexMask operator|(_RegexMask __other) const { - return _RegexMask(_M_base | __other._M_base, _M_extended | __other._M_extended); - } - constexpr _RegexMask operator^(_RegexMask __other) const { - return _RegexMask(_M_base ^ __other._M_base, _M_extended ^ __other._M_extended); - } - constexpr _RegexMask operator~() const { - return _RegexMask(~_M_base, ~_M_extended); - } - _RegexMask& operator&=(_RegexMask __other) { - return *this = (*this) & __other; - } - _RegexMask& operator|=(_RegexMask __other) { - return *this = (*this) | __other; - } - _RegexMask& operator^=(_RegexMask __other) { - return *this = (*this) ^ __other; - } - constexpr bool operator==(_RegexMask __other) const { - return (_M_extended & _S_valid_mask) == (__other._M_extended & _S_valid_mask) - && _M_base == __other._M_base; - } - constexpr bool operator!=(_RegexMask __other) const { - return !((*this) == __other); - } - }; - public: - typedef _RegexMask char_class_type; - public: - /** - * @brief Constructs a default traits object. - */ - regex_traits() { - - } - /** - * @brief Gives the length of a C-style string starting at @p __p. - * - * @param __p a pointer to the start of a character sequence. - * - * @returns the number of characters between @p *__p and the first - * default-initialized value of type @p char32_t. In other words, uses - * the C-string algorithm for determining the length of a sequence of - * characters. - */ - static std::size_t length(const char32_t* __p) { - return string_type::traits_type::length(__p); - } - - /** - * @brief Performs the identity translation. - * - * @param __c A character to the locale-specific character set. - * - * @returns __c. - */ - char32_t translate(char32_t __c) const { - return __c; - } - - /** - * @brief Translates a character into a case-insensitive equivalent. - * - * @param __c A character to the locale-specific character set. - * - * @returns the locale-specific lower-case equivalent of __c. - * @throws std::bad_cast if the imbued locale does not support the ctype - * facet. - */ - char32_t translate_nocase(char32_t __c) const { - typedef std::ctype<char32_t> __ctype_type; - const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - return __fctyp.tolower(__c); - } - - /** - * @brief Gets a sort key for a character sequence. - * - * @param __first beginning of the character sequence. - * @param __last one-past-the-end of the character sequence. - * - * Returns a sort key for the character sequence designated by the - * iterator range [F1, F2) such that if the character sequence [G1, G2) - * sorts before the character sequence [H1, H2) then - * v.transform(G1, G2) < v.transform(H1, H2). - * - * What this really does is provide a more efficient way to compare a - * string to multiple other strings in locales with fancy collation - * rules and equivalence classes. - * - * @returns a locale-specific sort key equivalent to the input range. - * - * @throws std::bad_cast if the current locale does not have a collate - * facet. - */ - template<typename _Fwd_iter> string_type transform(_Fwd_iter __first, _Fwd_iter __last) const { - typedef std::collate<char32_t> __collate_type; - const __collate_type& __fclt(use_facet<__collate_type>(_M_locale)); - string_type __s(__first, __last); - return __fclt.transform(__s.data(), __s.data() + __s.size()); - } - - /** - * @brief Gets a sort key for a character sequence, independent of case. - * - * @param __first beginning of the character sequence. - * @param __last one-past-the-end of the character sequence. - * - * Effects: if typeid(use_facet<collate<_Ch_type> >) == - * typeid(collate_byname<_Ch_type>) and the form of the sort key - * returned by collate_byname<_Ch_type>::transform(__first, __last) - * is known and can be converted into a primary sort key - * then returns that key, otherwise returns an empty string. - * - * @todo Implement this function correctly. - */ - template<typename _Fwd_iter> string_type transform_primary(_Fwd_iter __first, _Fwd_iter __last) const { - // TODO : this is not entirely correct. - // This function requires extra support from the platform. - // - // Read http://gcc.gnu.org/ml/libstdc++/2013-09/msg00117.html and - // http://www.open-std.org/Jtc1/sc22/wg21/docs/papers/2003/n1429.htm - // for details. - typedef std::ctype<char32_t> __ctype_type; - const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - std::vector<char32_t> __s(__first, __last); - __fctyp.tolower(__s.data(), __s.data() + __s.size()); - return this->transform(__s.data(), __s.data() + __s.size()); - } - - /** - * @brief Gets a collation element by name. - * - * @param __first beginning of the collation element name. - * @param __last one-past-the-end of the collation element name. - * - * @returns a sequence of one or more characters that represents the - * collating element consisting of the character sequence designated by - * the iterator range [__first, __last). Returns an empty string if the - * character sequence is not a valid collating element. - */ - template<typename _Fwd_iter> string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const; - - /** - * @brief Maps one or more characters to a named character - * classification. - * - * @param __first beginning of the character sequence. - * @param __last one-past-the-end of the character sequence. - * @param __icase ignores the case of the classification name. - * - * @returns an unspecified value that represents the character - * classification named by the character sequence designated by - * the iterator range [__first, __last). If @p icase is true, - * the returned mask identifies the classification regardless of - * the case of the characters to be matched (for example, - * [[:lower:]] is the same as [[:alpha:]]), otherwise a - * case-dependent classification is returned. The value - * returned shall be independent of the case of the characters - * in the character sequence. If the name is not recognized then - * returns a value that compares equal to 0. - * - * At least the following names (or their wide-character equivalent) are - * supported. - * - d - * - w - * - s - * - alnum - * - alpha - * - blank - * - cntrl - * - digit - * - graph - * - lower - * - print - * - punct - * - space - * - upper - * - xdigit - */ - template<typename _Fwd_iter> char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase = false) const; - - /** - * @brief Determines if @p c is a member of an identified class. - * - * @param __c a character. - * @param __f a class type (as returned from lookup_classname). - * - * @returns true if the character @p __c is a member of the classification - * represented by @p __f, false otherwise. - * - * @throws std::bad_cast if the current locale does not have a ctype - * facet. - */ - bool isctype(_Ch_type __c, char_class_type __f) const; - - /** - * @brief Converts a digit to an int. - * - * @param __ch a character representing a digit. - * @param __radix the radix if the numeric conversion (limited to 8, 10, - * or 16). - * - * @returns the value represented by the digit __ch in base radix if the - * character __ch is a valid digit in base radix; otherwise returns -1. - */ - int value(_Ch_type __ch, int __radix) const; - - /** - * @brief Imbues the regex_traits object with a copy of a new locale. - * - * @param __loc A locale. - * - * @returns a copy of the previous locale in use by the regex_traits - * object. - * - * @note Calling imbue with a different locale than the one currently in - * use invalidates all cached data held by *this. - */ - locale_type imbue(locale_type __loc) { - std::swap(_M_locale, __loc); - return __loc; - } - - /** - * @brief Gets a copy of the current locale in use by the regex_traits - * object. - */ - locale_type getloc() const { - return _M_locale; - } - protected: - locale_type _M_locale; - }; -}; -#endif - -struct unicode_traits : std::regex_traits<char32_t> { - static std::map<char32_t, int> data; - int value(char32_t ch, int radix ) const { - char32_t up = std::toupper(ch, getloc()); - return data.count(up) ? data[up] : regex_traits::value(ch, radix); - } - bool isctype(char32_t __c, char_class_type __f) const { - TK_ERROR("plop 10"); - bool plop = std::regex_traits<char32_t>::isctype(__c, __f); - TK_ERROR("plop 11"); - return plop; - } - char32_t translate_nocase(char32_t __c) const { - TK_ERROR("plop 20"); - typedef std::ctype<char32_t> __ctype_type; - TK_ERROR("plop 21"); - const __ctype_type& __fctyp(std::use_facet<__ctype_type>(_M_locale)); - TK_ERROR("plop 22"); - char32_t plop = __fctyp.tolower(__c); - TK_ERROR("plop 23"); - return plop; - } - - template<typename _Fwd_iter> std::u32string transform(_Fwd_iter __first, _Fwd_iter __last) const { - TK_ERROR("plop 30"); - typedef std::collate<char32_t> __collate_type; - TK_ERROR("plop 31"); - const __collate_type& __fclt(std::use_facet<__collate_type>(_M_locale)); - TK_ERROR("plop 32"); - std::u32string __s(__first, __last); - TK_ERROR("plop 33"); - std::u32string plop = __fclt.transform(__s.data(), __s.data() + __s.size()); - TK_ERROR("plop 34"); - return plop; - } -}; -std::map<char32_t, int> unicode_traits::data = {{U'〇',0}, {U'一',1}, {U'二',2}, - {U'三',3}, {U'四',4}, {U'五',5}, - {U'六',6}, {U'七',7}, {U'八',8}, - {U'九',9}, {U'A',10}, {U'B',11}, - {U'C',12}, {U'D',13}, {U'E',14}, - {U'F',15}}; - -/* -int main() { - std::locale::global(std::locale("ja_JP.utf8")); - std::wcout.sync_with_stdio(false); - std::wcout.imbue(std::locale()); - - std::wstring in = L"�"; - - if(std::regex_match(in, std::wregex(L"\\u98a8"))) - std::wcout << "\\u98a8 matched " << in << '\n'; - - if(std::regex_match(in, std::basic_regex<wchar_t, jnum_traits>(L"\\u]kAk"))) - std::wcout << L"\\u]kAk with custom traits matched " << in << '\n'; -} -*/ - -void testRegExp2() { - std::u32string lines[] = {U"Roses are #ff0000", - U"violets are #0000ff", - U"all of my base are belong to you"}; - //std::locale::global(std::locale("fr_FR.utf8")); - //std::basic_regex<char32_t, unicode_traits> color_regex(U"a");//([a-f0-9]{2})([a-f0-9]{2})([a-f0-9]{2})"); - #if 0 - for (const auto &line : lines) { - std::cout << "search : " << std::regex_search(line, color_regex) << '\n'; - } - std::match_results<std::u32string::const_iterator> color_match; - for (const auto &line : lines) { - std::regex_search(line, color_match, color_regex); - std::cout << "matches for \n"; - /* - for (size_t i = 0; i < color_match.size(); ++i) { - std::ssub_match sub_match = color_match[i]; - std::string sub_match_str = sub_match.str(); - std::cout << i << ": " << sub_match_str << '\n'; - } - */ - } - #endif -}; - - - int main(int argc, const char *argv[]) { // the only one init for etk: etk::log::setLevel(etk::log::logLevelVerbose); etk::setArgZero(argv[0]); etk::initDefaultFolder("ewolApplNoName"); - //testHash(); - ////testFSNode(); - ////testDimension(); - ////testArchive(); - //testColor(); - //testRegExp(); - testRegExp2(); + testHash(); + //testFSNode(); + //testDimension(); + //testArchive(); + testColor(); return 0; }