[DEV] remove etkRegExp ==> now use generic std::regex
This commit is contained in:
parent
6c596f8f4c
commit
0b42adbec8
@ -38,8 +38,6 @@ set(src_files
|
||||
etk/stdTools.h
|
||||
etk/Stream.cpp
|
||||
etk/Stream.h
|
||||
etk/RegExp.cpp
|
||||
etk/RegExp.h
|
||||
etk/tool.cpp
|
||||
etk/tool.h
|
||||
etk/Noise.cpp
|
||||
|
468
etk/RegExp.cpp
468
etk/RegExp.cpp
@ -1,468 +0,0 @@
|
||||
/**
|
||||
* @author Edouard DUPIN
|
||||
*
|
||||
* @copyright 2011, Edouard DUPIN, all right reserved
|
||||
*
|
||||
* @license APACHE v2.0 (see license file)
|
||||
*/
|
||||
|
||||
|
||||
#include <etk/RegExp.h>
|
||||
|
||||
|
||||
|
||||
const struct etk::convertionTable etk::regexp::constConvertionTable[] = {
|
||||
// haveBackSlash, inputValue, newValue
|
||||
{ false , '(' , 0 , etk::regexpOpcodePTheseIn},
|
||||
{ true , '(' , '(' , etk::regexpOpcodeError},
|
||||
{ false , ')' , 0 , etk::regexpOpcodePTheseOut},
|
||||
{ true , ')' , ')' , etk::regexpOpcodeError},
|
||||
{ false , '[' , 0 , etk::regexpOpcodeBracketIn},
|
||||
{ true , '[' , '[' , etk::regexpOpcodeError},
|
||||
{ false , ']' , 0 , etk::regexpOpcodeBracketOut},
|
||||
{ true , ']' , ']' , etk::regexpOpcodeError},
|
||||
{ false , '{' , 0 , etk::regexpOpcodeBracetIn},
|
||||
{ true , '{' , '{' , etk::regexpOpcodeError},
|
||||
{ false , '}' , 0 , etk::regexpOpcodeBracetOut},
|
||||
{ true , '}' , '}' , etk::regexpOpcodeError},
|
||||
{ false , '-' , 0 , etk::regexpOpcodeTo},
|
||||
{ true , '-' , '-' , etk::regexpOpcodeError},
|
||||
{ false , '*' , 0 , etk::regexpOpcodeStar},
|
||||
{ true , '*' , '*' , etk::regexpOpcodeError},
|
||||
{ false , '.' , 0 , etk::regexpOpcodeDot},
|
||||
{ true , '.' , '.' , etk::regexpOpcodeError},
|
||||
{ true , 'e' , 0 , etk::regexpOpcodeEOF},
|
||||
{ false , 'e' , 'e' , etk::regexpOpcodeError},
|
||||
{ false , '?' , 0 , etk::regexpOpcodeQuestion},
|
||||
{ true , '?' , '?' , etk::regexpOpcodeError},
|
||||
{ false , '+' , 0 , etk::regexpOpcodePlus},
|
||||
{ true , '+' , '+' , etk::regexpOpcodeError},
|
||||
{ false , '|' , 0 , etk::regexpOpcodePipe},
|
||||
{ true , '|' , '|' , etk::regexpOpcodeError},
|
||||
{ false , '^' , 0 , etk::regexpOpcodeStartOfLine},
|
||||
{ true , '^' , '^' , etk::regexpOpcodeError},
|
||||
{ false , '$' , 0 , etk::regexpOpcodeEndOfLine},
|
||||
{ true , '$' , '$' , etk::regexpOpcodeError},
|
||||
{ true , 'd' , 0 , etk::regexpOpcodeDigit},
|
||||
{ true , 'D' , 0 , etk::regexpOpcodeDigitNot},
|
||||
{ true , 'l' , 0 , etk::regexpOpcodeLetter},
|
||||
{ true , 'L' , 0 , etk::regexpOpcodeLetterNot},
|
||||
{ true , 's' , 0 , etk::regexpOpcodeSpace},
|
||||
{ true , 'S' , 0 , etk::regexpOpcodeSpaceNot},
|
||||
{ true , 'w' , 0 , etk::regexpOpcodeWord},
|
||||
{ true , 'W' , 0 , etk::regexpOpcodeWordNot},
|
||||
{ true , 'a' , '\a', etk::regexpOpcodeError},
|
||||
{ true , 'b' , '\b', etk::regexpOpcodeError},
|
||||
{ true , 'e' , 0x1B, etk::regexpOpcodeError}, // Escape character <Esc>
|
||||
{ true , 'f' , '\f', etk::regexpOpcodeError},
|
||||
{ true , 'n' , '\n', etk::regexpOpcodeError},
|
||||
{ true , 'r' , '\r', etk::regexpOpcodeError},
|
||||
{ true , 't' , '\t', etk::regexpOpcodeError},
|
||||
{ true , 'v' , '\v', etk::regexpOpcodeError},
|
||||
{ true , '\\' , '\\', etk::regexpOpcodeError},
|
||||
{ true , '&' , '&' , etk::regexpOpcodeError},
|
||||
{ true , '0' , '\0', etk::regexpOpcodeError},
|
||||
{ true , '@' , 0 , etk::regexpOpcodeNoChar},
|
||||
};
|
||||
const int64_t etk::regexp::constConvertionTableSize = sizeof(etk::regexp::constConvertionTable) / sizeof(struct etk::convertionTable) ;
|
||||
|
||||
static const char* parseStatusTable[] = {
|
||||
"parseStatusUnknow",
|
||||
"parseStatusNone",
|
||||
"parseStatusPartial",
|
||||
"parseStatusFull"
|
||||
};
|
||||
std::ostream& etk::regexp::operator <<(std::ostream& _os, enum etk::regexp::parseStatus _obj) {
|
||||
_os << parseStatusTable[_obj];
|
||||
return _os;
|
||||
}
|
||||
std::ostream& etk::regexp::operator <<(std::ostream& _os, const etk::regexp::FindProperty& _obj) {
|
||||
_os << "property([" << _obj.getPositionStart() << "," << _obj.getPositionStop() << "]*" << _obj.getMultiplicity() << " " << _obj.getStatus() << ")";
|
||||
return _os;
|
||||
}
|
||||
|
||||
std::string etk::regexp::createString(const std::vector<char32_t>& _data, int64_t _start, int64_t _stop) {
|
||||
std::string output(ETK_BASH_COLOR_NORMAL);
|
||||
for (int64_t iii=_start; iii<(int64_t)_data.size() && iii<_stop ; iii++) {
|
||||
switch(_data[iii]) {
|
||||
case regexpOpcodePTheseIn: output += std::string(ETK_BASH_COLOR_RED) + (char*)"(" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodePTheseOut: output += std::string(ETK_BASH_COLOR_RED) + (char*)")" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeBracketIn: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"[" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeBracketOut: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"]" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeTo: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"-" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeBracetIn: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"{" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeBracetOut: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"}" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeStar: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"*" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeDot: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"." + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeQuestion: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"?" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodePlus: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"+" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodePipe: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"|" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeNoChar: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"@" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeStartOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"^" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeEndOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"$" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeDigit: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\d" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeDigitNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\D" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeLetter: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\l" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeLetterNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\L" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeSpace: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\s" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeSpaceNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\S" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeWord: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\w" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeWordNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\W" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case regexpOpcodeEOF: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\e" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case '\n': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\n" + ETK_BASH_COLOR_NORMAL; break;
|
||||
case '\t': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\t" + ETK_BASH_COLOR_NORMAL; break;
|
||||
default:
|
||||
char plop[10];
|
||||
int8_t nb = u32char::convertUtf8(_data[iii], plop);
|
||||
plop[nb] = '\0';
|
||||
output += plop;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
char* etk::regexp::levelSpace(uint32_t _level) {
|
||||
static char* tmpSpace = " ";
|
||||
if (_level>30) {
|
||||
return tmpSpace;
|
||||
}
|
||||
return tmpSpace + 60 - 2*_level;
|
||||
}
|
||||
|
||||
|
||||
int64_t etk::regexp::getLenOfPTheseElem(const std::vector<char32_t>& _data, int64_t _startPos) {
|
||||
if (_startPos>=(int64_t)_data.size()){
|
||||
return 0;
|
||||
}
|
||||
int64_t pos = _startPos;
|
||||
int32_t nbOpen = 0;
|
||||
// special case of the (...) or | ==> we search '|' or ')'
|
||||
if( _data[pos] == regexpOpcodePTheseOut
|
||||
|| _data[pos] == regexpOpcodePipe) {
|
||||
return 0;
|
||||
}
|
||||
// find size ...
|
||||
while (pos < (int64_t)_data.size() ) {
|
||||
if(_data[pos] == regexpOpcodePTheseIn) {
|
||||
// find a sub section :
|
||||
nbOpen++;
|
||||
} else if(0 < nbOpen) {
|
||||
if (_data[pos] == regexpOpcodePTheseOut) {
|
||||
nbOpen--;
|
||||
if (0 > nbOpen) {
|
||||
TK_ERROR("Error in the (...) find element at "<< pos);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} else if( _data[pos] == regexpOpcodePTheseOut
|
||||
|| _data[pos] == regexpOpcodePipe) {
|
||||
// Find the end of the (...)
|
||||
// just return the size inside
|
||||
int32_t sizeInside = pos - _startPos;
|
||||
if (0 >= sizeInside) {
|
||||
TK_ERROR("Error in the (...) no data at "<< pos-1);
|
||||
return 0;
|
||||
}
|
||||
return sizeInside;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return pos - _startPos;
|
||||
}
|
||||
|
||||
int64_t etk::regexp::getLenOfPThese(const std::vector<char32_t>& _data, int64_t _startPos) {
|
||||
int64_t pos = _startPos;
|
||||
int32_t nbOpen = 0;
|
||||
// special case of the (...) or | ==> we search '|' or ')'
|
||||
if(_data[pos]==regexpOpcodePTheseOut) {
|
||||
return 0;
|
||||
}
|
||||
if(_data[pos]!=regexpOpcodePTheseIn) {
|
||||
TK_ERROR(" find error in PThese");
|
||||
return 0;
|
||||
}
|
||||
pos++;
|
||||
// find size ...
|
||||
while (pos < (int64_t)_data.size() ) {
|
||||
if(_data[pos]==regexpOpcodePTheseIn) {
|
||||
// find a sub section :
|
||||
nbOpen++;
|
||||
} else if(0 < nbOpen) {
|
||||
if (_data[pos]==regexpOpcodePTheseOut) {
|
||||
nbOpen--;
|
||||
if (0 > nbOpen) {
|
||||
TK_ERROR("Error in the (...) find element at "<< pos);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else if(_data[pos]==regexpOpcodePTheseOut) {
|
||||
// Find the end of the (...)
|
||||
// just return the size inside
|
||||
int32_t sizeInside = pos - _startPos-1;
|
||||
if (0 >= sizeInside) {
|
||||
TK_ERROR("Error in the (...) no data at "<< pos-1);
|
||||
return 0;
|
||||
}
|
||||
return sizeInside;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int64_t etk::regexp::getLenOfBracket(const std::vector<char32_t>& _data, int64_t _startPos) {
|
||||
int64_t pos = _startPos;
|
||||
// special case of the (...) or | ==> we search '|' or ')'
|
||||
if(_data[pos]==regexpOpcodeBracketOut) {
|
||||
return 0;
|
||||
}
|
||||
if(_data[pos] != regexpOpcodeBracketIn) {
|
||||
TK_ERROR("find no {...");
|
||||
return 0;
|
||||
}
|
||||
pos++;
|
||||
// find size ...
|
||||
while (pos < (int64_t)_data.size() ) {
|
||||
if(_data[pos]==regexpOpcodeBracketOut) {
|
||||
// Find the end of the [...]
|
||||
// just return the size inside
|
||||
int32_t sizeInside = pos - _startPos -1 ;
|
||||
if (0 >= sizeInside) {
|
||||
TK_ERROR("Error in the [...] no data at "<< pos-1);
|
||||
return 0;
|
||||
}
|
||||
return sizeInside;
|
||||
} else if( _data[pos] != regexpOpcodeTo
|
||||
&& _data[pos] > 0xFF ) {
|
||||
TK_ERROR("Error in the [...] not permited element at "<< pos << " '" << (char)_data[pos] << "'");
|
||||
return 0;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int64_t etk::regexp::getLenOfBrace(const std::vector<char32_t>& _data, int64_t _startPos) {
|
||||
int32_t pos = _startPos;
|
||||
// special case of the (...) or | ==> we search '|' or ')'
|
||||
if(_data[pos]==regexpOpcodeBracetOut) {
|
||||
return 0;
|
||||
}
|
||||
if(_data[pos]!=regexpOpcodeBracetIn) {
|
||||
TK_ERROR(" did not find brace IN { ");
|
||||
return 0;
|
||||
}
|
||||
pos++;
|
||||
// find size ...
|
||||
while (pos < (int64_t)_data.size() ) {
|
||||
if(_data[pos]==regexpOpcodeBracetOut) {
|
||||
// Find the end of the [...]
|
||||
// just return the size inside
|
||||
int32_t sizeInside = pos - _startPos -1 ;
|
||||
if (0 >= sizeInside) {
|
||||
TK_ERROR("Error in the {...} no data at "<< pos-1);
|
||||
return 0;
|
||||
}
|
||||
return sizeInside;
|
||||
} else if( _data[pos] != ','
|
||||
&& ( _data[pos] < '0'
|
||||
|| _data[pos] > '9') ) {
|
||||
TK_ERROR("Error in the {...} not permited element at "<< pos << " '" << _data[pos] << "'");
|
||||
return 0;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int64_t etk::regexp::getLenOfNormal(const std::vector<char32_t>& _data, int64_t _startPos) {
|
||||
int64_t pos = _startPos;
|
||||
// find size ...
|
||||
while (pos < (int64_t)_data.size() ) {
|
||||
switch(_data[pos]) {
|
||||
case regexpOpcodePTheseIn:
|
||||
case regexpOpcodePTheseOut:
|
||||
case regexpOpcodeBracketIn:
|
||||
case regexpOpcodeBracketOut:
|
||||
case regexpOpcodeBracetIn:
|
||||
case regexpOpcodeBracetOut:
|
||||
case regexpOpcodeTo:
|
||||
case regexpOpcodeStar:
|
||||
case regexpOpcodeDot:
|
||||
case regexpOpcodeQuestion:
|
||||
case regexpOpcodePlus:
|
||||
case regexpOpcodePipe:
|
||||
case regexpOpcodeStartOfLine:
|
||||
case regexpOpcodeEndOfLine:
|
||||
case regexpOpcodeDigit:
|
||||
case regexpOpcodeDigitNot:
|
||||
case regexpOpcodeLetter:
|
||||
case regexpOpcodeLetterNot:
|
||||
case regexpOpcodeSpace:
|
||||
case regexpOpcodeSpaceNot:
|
||||
case regexpOpcodeWord:
|
||||
case regexpOpcodeWordNot:
|
||||
{
|
||||
// just return the size inside
|
||||
int32_t sizeInside = pos - _startPos;
|
||||
if (0 >= sizeInside) {
|
||||
TK_ERROR("Error in the normal data : no data ...");
|
||||
}
|
||||
return sizeInside;
|
||||
}
|
||||
break;
|
||||
default :
|
||||
// nothing to do ...
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
if ((int64_t)pos - (int64_t)_startPos < 0) {
|
||||
return 0;
|
||||
}
|
||||
return pos - _startPos ;
|
||||
}
|
||||
|
||||
|
||||
bool etk::regexp::parseBrace(const std::vector<char32_t>& _data, uint32_t& _min, uint32_t& _max) {
|
||||
//TK_INFO("parse {...} in "; DisplayElem(data); );
|
||||
int64_t k=0;
|
||||
|
||||
int32_t firstElement = 0;
|
||||
int32_t SecondElement = 0;
|
||||
|
||||
while(k < (int64_t)_data.size()) {
|
||||
if (_data[k] == ',') {
|
||||
k++;
|
||||
break;
|
||||
} if (_data[k] == '}' ) {
|
||||
SecondElement = firstElement;
|
||||
goto allIsSet;
|
||||
} else if(u32char::isInteger(_data[k]) == true) {
|
||||
firstElement *= 10;
|
||||
firstElement += u32char::toInt(_data[k]);
|
||||
} else {
|
||||
TK_ERROR("Can not parse this element " << (char)_data[k] << " at pos " << k);
|
||||
return false;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
if (k == (int64_t)_data.size()) {
|
||||
SecondElement = firstElement;
|
||||
}
|
||||
while(k < (int64_t)_data.size()) {
|
||||
if (_data[k] == ',') {
|
||||
TK_ERROR("Can not find a second , in {} at pos " << k);
|
||||
return false;
|
||||
} if (_data[k] == '}') {
|
||||
goto allIsSet;
|
||||
} else if (true == u32char::isInteger(_data[k])) {
|
||||
SecondElement *= 10;
|
||||
SecondElement += u32char::toInt(_data[k]);
|
||||
} else {
|
||||
TK_ERROR("Can not parse this element " << _data[k] << " at pos " << k);
|
||||
return false;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
|
||||
allIsSet:
|
||||
if (SecondElement == 0 && firstElement != 0) {
|
||||
_min = 0;
|
||||
_max = firstElement;
|
||||
} else {
|
||||
_min = firstElement;
|
||||
_max = SecondElement;
|
||||
}
|
||||
if (_min > _max) {
|
||||
TK_ERROR("Minimum=" << _min << " can not be < maximum=" << _max );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string etk::regexp::autoStr(const std::string& _data) {
|
||||
std::string out;
|
||||
for (auto &it : _data) {
|
||||
if (it == '\n') {
|
||||
out += "\\n";
|
||||
} else if (it == '\t') {
|
||||
out += "\\t";
|
||||
} else if (it == '\r') {
|
||||
out += "\\r";
|
||||
} else if (it == '\0') {
|
||||
out += "\\0";
|
||||
} else if (it <= 0x20) {
|
||||
out += std::to_string((int32_t)it);
|
||||
} else {
|
||||
out += it;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
std::string etk::regexp::autoStr(char _data) {
|
||||
std::string out;
|
||||
if (_data == '\n') {
|
||||
out += "\\n";
|
||||
} else if (_data == '\t') {
|
||||
out += "\\t";
|
||||
} else if (_data == '\r') {
|
||||
out += "\\r";
|
||||
} else if (_data == '\0') {
|
||||
out += "\\0";
|
||||
} else if (_data <= 0x20) {
|
||||
out += std::to_string((int32_t)_data);
|
||||
} else {
|
||||
out += _data;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string etk::regexp::strTick(int32_t _pos) {
|
||||
std::string out;
|
||||
for (int32_t iii=0; iii<_pos; ++iii) {
|
||||
out += " ";
|
||||
}
|
||||
out += "^";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
namespace etk {
|
||||
template<> std::string to_string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) {
|
||||
return _val.getRegExp();
|
||||
}
|
||||
template<> std::string to_string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) {
|
||||
return _val.getRegExp();
|
||||
}
|
||||
template<> std::u32string to_u32string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) {
|
||||
return _val.getURegExp();
|
||||
}
|
||||
template<> std::u32string to_u32string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) {
|
||||
return _val.getURegExp();
|
||||
}
|
||||
|
||||
template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::u32string& _value) {
|
||||
_variableRet.compile(_value);
|
||||
return true;
|
||||
}
|
||||
template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::u32string& _value) {
|
||||
_variableRet.compile(_value);
|
||||
return true;
|
||||
}
|
||||
template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::string& _value) {
|
||||
_variableRet.compile(_value);
|
||||
return true;
|
||||
}
|
||||
template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::string& _value) {
|
||||
_variableRet.compile(_value);
|
||||
return true;
|
||||
}
|
||||
};
|
1947
etk/RegExp.h
1947
etk/RegExp.h
File diff suppressed because it is too large
Load Diff
@ -237,6 +237,76 @@ std::u32string utf8::convertUnicode(const std::string& _input) {
|
||||
return U"TODO ... std::u32string utf8::convertUnicode(const std::string& _input)";
|
||||
}
|
||||
|
||||
utf8::iterator& utf8::iterator::operator++ () {
|
||||
m_value = u32char::Null;
|
||||
if (m_current <= 0) {
|
||||
m_current = 0;
|
||||
return *this;
|
||||
}
|
||||
if (m_data != nullptr) {
|
||||
if (m_current < (int64_t)m_data->size() ) {
|
||||
int8_t nbChar = utf8::theoricLen((*m_data)[m_current]);
|
||||
if (nbChar != 0) {
|
||||
m_current+=nbChar;
|
||||
} else {
|
||||
m_current++;
|
||||
}
|
||||
}
|
||||
if (m_current >= (int64_t)m_data->size()) {
|
||||
m_current = m_data->size();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
utf8::iterator& utf8::iterator::operator-- () {
|
||||
m_value = u32char::Null;
|
||||
if (m_data != nullptr) {
|
||||
if (m_current > 0) {
|
||||
int32_t iii = -1;
|
||||
while( utf8::theoricFirst((*m_data)[m_current+iii]) == false
|
||||
&& iii >= -6
|
||||
&& m_current-iii>0) {
|
||||
--iii;
|
||||
};
|
||||
m_current += iii;
|
||||
} else {
|
||||
m_current = 0;
|
||||
}
|
||||
} else {
|
||||
m_current = 0;
|
||||
}
|
||||
if (m_current < 0) {
|
||||
m_current = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
char32_t utf8::iterator::operator* () {
|
||||
if (m_value != u32char::Null) {
|
||||
return m_value;
|
||||
}
|
||||
if (m_data == nullptr) {
|
||||
TK_ERROR("request an element that iterator not link");
|
||||
return m_value;
|
||||
}
|
||||
if ( m_current < 0
|
||||
|| m_current >= (int64_t)m_data->size()) {
|
||||
TK_ERROR("request an element out of bounding !!! 0 <= " << m_current << " < " << m_data->size());
|
||||
return m_value;
|
||||
}
|
||||
char tmpVal[5];
|
||||
memset(tmpVal, 0, sizeof(tmpVal));
|
||||
tmpVal[0] = (*m_data)[m_current];
|
||||
int8_t nbChar = utf8::theoricLen(tmpVal[0]);
|
||||
for (int32_t iii=1; iii<nbChar && m_current+iii<(int64_t)m_data->size(); ++iii) {
|
||||
tmpVal[iii] = (*m_data)[m_current+iii];
|
||||
}
|
||||
// transform ...
|
||||
m_value = utf8::convertChar32(tmpVal);
|
||||
return m_value;
|
||||
}
|
||||
|
||||
|
||||
#undef __class__
|
||||
#define __class__ "etk"
|
||||
|
286
etk/stdTools.h
286
etk/stdTools.h
@ -60,6 +60,292 @@ namespace utf8 {
|
||||
|
||||
char32_t convertChar32(const char* _input);
|
||||
std::u32string convertUnicode(const std::string& _input);
|
||||
|
||||
class iterator {
|
||||
private:
|
||||
char32_t m_value; //!< store vlue to prevent multiple calcule of getting the data
|
||||
std::string* m_data; //!< Pointer on the current Buffer
|
||||
int64_t m_current; //!< curent Id in the Buffer
|
||||
public:
|
||||
iterator():
|
||||
m_value(u32char::Null),
|
||||
m_data(nullptr),
|
||||
m_current(0) {
|
||||
// nothing to do ...
|
||||
};
|
||||
iterator(std::string& _str) :
|
||||
m_value(u32char::Null),
|
||||
m_data(&_str),
|
||||
m_current(0) {
|
||||
// nothing to do ...
|
||||
};
|
||||
iterator(std::string& _str, const std::string::iterator& _pos) :
|
||||
m_value(u32char::Null),
|
||||
m_data(&_str),
|
||||
m_current(0) {
|
||||
if (m_data != nullptr) {
|
||||
m_current = std::distance(m_data->begin(), _pos);
|
||||
}
|
||||
};
|
||||
iterator(std::string& _str, size_t _pos) :
|
||||
m_value(u32char::Null),
|
||||
m_data(&_str),
|
||||
m_current(0) {
|
||||
if (m_data != nullptr) {
|
||||
if (_pos > m_data->size()) {
|
||||
m_current = m_data->size();
|
||||
} else {
|
||||
m_current = _pos;
|
||||
}
|
||||
}
|
||||
};
|
||||
iterator(std::string* _str, const std::string::iterator& _pos) :
|
||||
m_value(u32char::Null),
|
||||
m_data(_str),
|
||||
m_current(0) {
|
||||
if (m_data != nullptr) {
|
||||
m_current = std::distance(m_data->begin(), _pos);
|
||||
}
|
||||
};
|
||||
iterator(std::string* _str, size_t _pos) :
|
||||
m_value(u32char::Null),
|
||||
m_data(_str),
|
||||
m_current(0) {
|
||||
if (m_data != nullptr) {
|
||||
if (_pos > m_data->size()) {
|
||||
m_current = m_data->size();
|
||||
} else {
|
||||
m_current = _pos;
|
||||
}
|
||||
}
|
||||
};
|
||||
/**
|
||||
* @brief Recopy constructor.
|
||||
* @param[in] _obj The Iterator that might be copy
|
||||
*/
|
||||
iterator(const iterator& _obj):
|
||||
m_value(u32char::Null),
|
||||
m_data(_obj.m_data),
|
||||
m_current(_obj.m_current) {
|
||||
// nothing to do ...
|
||||
};
|
||||
/**
|
||||
* @brief Asignation operator.
|
||||
* @param[in] _otherIterator The Iterator that might be copy
|
||||
* @return reference on the curent Iterator
|
||||
*/
|
||||
iterator& operator=(const iterator & _obj) {
|
||||
m_current = _obj.m_current;
|
||||
m_data = _obj.m_data;
|
||||
m_value = u32char::Null;
|
||||
return *this;
|
||||
};
|
||||
/**
|
||||
* @brief Basic destructor
|
||||
*/
|
||||
virtual ~iterator() {
|
||||
m_current = 0;
|
||||
m_data = nullptr;
|
||||
m_value = u32char::Null;
|
||||
};
|
||||
/**
|
||||
* @brief basic boolean cast
|
||||
* @return true if the element is present in buffer
|
||||
*/
|
||||
operator size_t () const {
|
||||
if (m_data == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
if (m_current < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (m_current > (int64_t)m_data->size()) {
|
||||
return m_data->size();
|
||||
}
|
||||
return (size_t)m_current;
|
||||
};
|
||||
/**
|
||||
* @brief Incremental operator
|
||||
* @return Reference on the current iterator incremented
|
||||
*/
|
||||
iterator& operator++ ();
|
||||
/**
|
||||
* @brief Decremental operator
|
||||
* @return Reference on the current iterator decremented
|
||||
*/
|
||||
iterator& operator-- ();
|
||||
/**
|
||||
* @brief Incremental operator
|
||||
* @return Reference on a new iterator and increment the other one
|
||||
*/
|
||||
iterator operator++ (int32_t) {
|
||||
iterator it(*this);
|
||||
++(*this);
|
||||
return it;
|
||||
};
|
||||
/**
|
||||
* @brief Decremental operator
|
||||
* @return Reference on a new iterator and decrement the other one
|
||||
*/
|
||||
iterator operator-- (int32_t) {
|
||||
iterator it(*this);
|
||||
--(*this);
|
||||
return it;
|
||||
};
|
||||
/**
|
||||
* @brief egality iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator== (const iterator& _obj) const {
|
||||
if ( m_current == _obj.m_current
|
||||
&& m_data == _obj.m_data) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief egality iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator!= (const iterator& _obj) const {
|
||||
if ( m_current != _obj.m_current
|
||||
|| m_data != _obj.m_data) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief <= iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator<= (const iterator& _obj) const {
|
||||
if (m_data != _obj.m_data) {
|
||||
return false;
|
||||
}
|
||||
if (m_current <= _obj.m_current) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief >= iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator>= (const iterator& _obj) const {
|
||||
if (m_data != _obj.m_data) {
|
||||
return false;
|
||||
}
|
||||
if (m_current >= _obj.m_current) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief < iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator< (const iterator& _obj) const {
|
||||
if (m_data != _obj.m_data) {
|
||||
return false;
|
||||
}
|
||||
if (m_current < _obj.m_current) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief > iterator
|
||||
* @return true if the iterator is identical pos
|
||||
*/
|
||||
bool operator> (const iterator& _obj) const {
|
||||
if (m_data != _obj.m_data) {
|
||||
return false;
|
||||
}
|
||||
if (m_current > _obj.m_current) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @brief Get the value on the current element
|
||||
* @return The request element value
|
||||
*/
|
||||
char32_t operator* ();
|
||||
/**
|
||||
* @brief Get the position in the buffer
|
||||
* @return The requested position.
|
||||
*/
|
||||
size_t getPos() const {
|
||||
if (m_data == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
if (m_current < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (m_current >= (int64_t)m_data->size()) {
|
||||
return m_data->size()-1;
|
||||
}
|
||||
return (size_t)m_current;
|
||||
};
|
||||
/**
|
||||
* @brief move the element position
|
||||
* @return a new iterator.
|
||||
*/
|
||||
iterator operator+ (const int64_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<_val; ++iii) {
|
||||
++tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
iterator operator+ (const int32_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<_val; ++iii) {
|
||||
++tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
iterator operator+ (const size_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<(int64_t)_val; ++iii) {
|
||||
++tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
/**
|
||||
* @brief move the element position
|
||||
* @return a new iterator.
|
||||
*/
|
||||
iterator operator- (const int64_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<_val; ++iii) {
|
||||
--tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
iterator operator- (const int32_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<_val; ++iii) {
|
||||
--tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
iterator operator- (const size_t _val) const {
|
||||
iterator tmpp(*this);
|
||||
for (int64_t iii=0; iii<(int64_t)_val; ++iii) {
|
||||
--tmpp;
|
||||
}
|
||||
return tmpp;
|
||||
};
|
||||
/*
|
||||
iterator begin() const {
|
||||
return iterator(m_data);
|
||||
}
|
||||
iterator end() const {
|
||||
return --iterator(m_data, m_data.end());
|
||||
}
|
||||
*/
|
||||
};
|
||||
};
|
||||
|
||||
namespace std {
|
||||
|
@ -16,7 +16,6 @@ def create(target):
|
||||
'etk/debug.cpp',
|
||||
'etk/stdTools.cpp',
|
||||
'etk/log.cpp',
|
||||
'etk/RegExp.cpp',
|
||||
'etk/tool.cpp',
|
||||
'etk/Noise.cpp',
|
||||
'etk/Color.cpp',
|
||||
|
540
test/main.cpp
540
test/main.cpp
@ -15,10 +15,8 @@
|
||||
#include <etk/archive/Archive.h>
|
||||
#include <etk/log.h>
|
||||
#include <etk/Color.h>
|
||||
#include <etk/RegExp.h>
|
||||
#include <etk/stdTools.h>
|
||||
#include <string>
|
||||
#include <regex>
|
||||
|
||||
#undef __class__
|
||||
#define __class__ "etktest"
|
||||
@ -191,545 +189,17 @@ void testRegExpSingle(const std::string& _expression, const std::string& _search
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void testRegExp() {
|
||||
std::string data;
|
||||
//std::string data = " a /* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n";
|
||||
//std::string data = "alpha /* plop */ test";
|
||||
//std::string data = "pp \n // qdfqdfsdf \nde";
|
||||
//testRegExpSingle("/\\*.*\\*/", data);
|
||||
//testRegExpSingle("//.*$", data);
|
||||
//testRegExpSingle("/\\*.*", data);
|
||||
//testRegExpSingle("[a-z]", data);
|
||||
//std::string data = " eesd a lzzml plophzzzzzhhhhhrlkmlkml";
|
||||
//testRegExpSingle("a.*plop(z{2,3}|h+)+r", data);
|
||||
|
||||
//std::string data = "pp \n# plop // qdfqdfsdf \nde";
|
||||
//std::string data = "pp \n# plop //\\\n qdfqdfsdf \nde";
|
||||
//std::string data = "p#\ne";
|
||||
//testRegExpSingle("#(\\\\\\\\|\\\\\\n|.)*$", data);
|
||||
//testRegExpSingle("#.*$", data);
|
||||
|
||||
//std::string data = "p//TODO:\ndse";
|
||||
//std::string data = "p// TODO:\ndse";
|
||||
//std::string data = "p// TODO :\ndse";
|
||||
//std::string data = "p// TODO : sdfgsdfsd \ndse";
|
||||
//testRegExpSingle("//[ \\t]*TODO[ \\t]*:.*$", data);
|
||||
|
||||
data = "abc m_def ghi";
|
||||
data = " protected:\n"
|
||||
" vec2 m_offset; \n";
|
||||
//testRegExpSingle("\\@m_[A-Za-z_0-9]*\\@", data);
|
||||
|
||||
|
||||
data = " * @param[in] _mode Configuring mode.\n"
|
||||
" * @param[in] _time Time in second of the annimation display\n"
|
||||
" */\n"
|
||||
" void setAnnimationTime(enum ";
|
||||
data = "virtual vec2 relativePosition(const vec2& _pos);";
|
||||
|
||||
//testRegExpSingle("\\@(\\w|_)+[ \\t]*\\(", data);
|
||||
|
||||
data = "include <ewol/Dimensio2n.h>\n"
|
||||
"#include <ewol/Dimension.h>\n"
|
||||
"'dfgd\'fg'\n"
|
||||
"\"dqf\\\"gsdfg\" // \"\n"
|
||||
"// TODO : sqdkfjsdldkqfj\n"
|
||||
"\n"
|
||||
"namespace ewol {\n"
|
||||
" class Widget;\n"
|
||||
" namespace widget {\n"
|
||||
" class Manager;\n"
|
||||
" class Windows;\n"
|
||||
" };\n"
|
||||
"};\n"
|
||||
"#include <etk/types.h>\n";
|
||||
//testRegExpSingle("#(\\\\[\\\\\\n]|.)*$", data);
|
||||
|
||||
|
||||
data = " 'dfgd\\'fg' \n"
|
||||
" vec2 m_offset; \n";
|
||||
//testRegExpSingle("'((\\\\[\\\\'])|.)*'", data);
|
||||
|
||||
|
||||
/*
|
||||
data = "ddfgdfgh";
|
||||
etk::RegExp<std::string> reg(".*");
|
||||
reg.setMaximize(true);
|
||||
|
||||
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
|
||||
if (reg.parse(data, 0, data.size()) == true) {
|
||||
//if (reg.processOneElement(data, 0, data.size()) == true) {
|
||||
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
|
||||
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
|
||||
}
|
||||
|
||||
data = "plop \"\" sdfsdf s\"swdfsqd sdfgsdfg \" \" sdfsf";
|
||||
reg = etk::RegExp<std::string>("\"(\\\\[\\\\\"]|.)*\"");
|
||||
reg.setMaximize(false);
|
||||
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
|
||||
if (reg.parse(data, 0, data.size()) == true) {
|
||||
//if (reg.processOneElement(data, 0, data.size()) == true) {
|
||||
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
|
||||
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
|
||||
}
|
||||
//TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*"
|
||||
|
||||
data = "void limit(const vec2& _origin, const vec2& _size);\n";
|
||||
reg = etk::RegExp<std::string>("\\@(\\w|_)+[ \\t]*\\(");
|
||||
reg.setMaximize(false);
|
||||
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
|
||||
if (reg.parse(data, 0, data.size()) == true) {
|
||||
//if (reg.processOneElement(data, 0, data.size()) == true) {
|
||||
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
|
||||
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
|
||||
}
|
||||
data = "void limit const vec2& _origin, const vec2& _size);\n";
|
||||
if (reg.parse(data, 0, data.size()) == true) {
|
||||
//if (reg.processOneElement(data, 0, data.size()) == true) {
|
||||
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
|
||||
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
|
||||
}
|
||||
*/
|
||||
/*
|
||||
std::tr1::cmatch res;
|
||||
str = "<h2>Egg prices</h2>";
|
||||
std::tr1::regex rx("<h(.)>([^<]+)");
|
||||
std::tr1::regex_search(str.c_str(), res, rx);
|
||||
std::cout << res[1] << ". " << res[2] << "\n";
|
||||
*/
|
||||
{
|
||||
std::string lines[] = {"Roses are #ff0000",
|
||||
"violets are #0000ff",
|
||||
"all of my base are belong to you"};
|
||||
|
||||
std::regex myRegex("#([a-f0-9]{6})");
|
||||
/*
|
||||
for (const auto &line : lines) {
|
||||
std::cout << line << ": " << std::regex_search(line, color_regex) << '\n';
|
||||
}
|
||||
*/
|
||||
|
||||
std::smatch resultMatch;
|
||||
for (const auto &line : lines) {
|
||||
TK_DEBUG("in line : '" << line << "'");
|
||||
std::regex_search(line, resultMatch, myRegex);
|
||||
TK_DEBUG(" Find " << resultMatch.size() << " elements");
|
||||
for (size_t iii=0; iii<resultMatch.size(); ++iii) {
|
||||
int32_t posStart = std::distance(line.begin(), resultMatch[iii].first);
|
||||
int32_t posStop = std::distance(line.begin(), resultMatch[iii].second);
|
||||
TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].first);
|
||||
TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].second);
|
||||
TK_DEBUG(" [" << iii << "] " << std::string(line, posStart, posStop-posStart));
|
||||
/*
|
||||
std::ssub_match sub_match = color_match[i];
|
||||
std::string sub_match_str = sub_match.str();
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const std::string myData = "void limit(const vec2& _origin, const vec2& _size);\n plop(sf)";
|
||||
std::regex myRegex("\\b(\\w|_)+[ \\t]*\\(");
|
||||
|
||||
std::smatch resultMatch;
|
||||
TK_DEBUG("in line : '" << myData << "'");
|
||||
std::regex_search(myData, resultMatch, myRegex);
|
||||
TK_DEBUG(" Find " << resultMatch.size() << " elements");
|
||||
for (size_t iii=0; iii<resultMatch.size(); ++iii) {
|
||||
int32_t posStart = std::distance(myData.begin(), resultMatch[iii].first);
|
||||
int32_t posStop = std::distance(myData.begin(), resultMatch[iii].second);
|
||||
TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].first);
|
||||
TK_DEBUG(" [" << iii << "] " << *resultMatch[iii].second);
|
||||
TK_DEBUG(" [" << iii << "] " << std::string(myData, posStart, posStop-posStart));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
const std::u32string data = utf8::convertUnicode("kjhkjhk");
|
||||
const std::u32string data2(U"kjhkjhk");
|
||||
const std::string data3("kjhkjhk");
|
||||
const char32_t data5[] = U"kjhkjhk";
|
||||
//std::basic_regex<char32_t, std::u32string> regexp(data2);
|
||||
//std::basic_regex<char32_t> regexp((const char32_t*)data2.c_str());
|
||||
std::basic_regex<char32_t> regexp2(data5);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// http://en.cppreference.com/w/cpp/regex/regex_traits/lookup_classname
|
||||
namespace std {
|
||||
// specify char32_t traits
|
||||
/**
|
||||
* @brief Describes aspects of a regular expression.
|
||||
*
|
||||
* A regular expression traits class that satisfies the requirements of
|
||||
* section [28.7].
|
||||
*
|
||||
* The class %regex is paramete rized around a set of related types and
|
||||
* functions used to complete the definition of its semantics. This class
|
||||
* satisfies the requirements of such a traits class.
|
||||
*/
|
||||
template<> struct regex_traits<char32_t> {
|
||||
public:
|
||||
typedef _Ch_type char32_t;
|
||||
typedef std::basic_string<char32_t> string_type;
|
||||
typedef std::locale locale_type;
|
||||
private:
|
||||
struct _RegexMask {
|
||||
typedef typename std::ctype<char32_t>::mask _BaseType;
|
||||
_BaseType _M_base;
|
||||
unsigned char _M_extended;
|
||||
static constexpr unsigned char _S_under = 1 << 0;
|
||||
// FIXME: _S_blank should be removed in the future,
|
||||
// when locale's complete.
|
||||
static constexpr unsigned char _S_blank = 1 << 1;
|
||||
static constexpr unsigned char _S_valid_mask = 0x3;
|
||||
constexpr _RegexMask(_BaseType __base = 0, unsigned char __extended = 0) :
|
||||
_M_base(__base), _M_extended(__extended) {
|
||||
|
||||
}
|
||||
constexpr _RegexMask operator&(_RegexMask __other) const {
|
||||
return _RegexMask(_M_base & __other._M_base, _M_extended & __other._M_extended);
|
||||
}
|
||||
constexpr _RegexMask operator|(_RegexMask __other) const {
|
||||
return _RegexMask(_M_base | __other._M_base, _M_extended | __other._M_extended);
|
||||
}
|
||||
constexpr _RegexMask operator^(_RegexMask __other) const {
|
||||
return _RegexMask(_M_base ^ __other._M_base, _M_extended ^ __other._M_extended);
|
||||
}
|
||||
constexpr _RegexMask operator~() const {
|
||||
return _RegexMask(~_M_base, ~_M_extended);
|
||||
}
|
||||
_RegexMask& operator&=(_RegexMask __other) {
|
||||
return *this = (*this) & __other;
|
||||
}
|
||||
_RegexMask& operator|=(_RegexMask __other) {
|
||||
return *this = (*this) | __other;
|
||||
}
|
||||
_RegexMask& operator^=(_RegexMask __other) {
|
||||
return *this = (*this) ^ __other;
|
||||
}
|
||||
constexpr bool operator==(_RegexMask __other) const {
|
||||
return (_M_extended & _S_valid_mask) == (__other._M_extended & _S_valid_mask)
|
||||
&& _M_base == __other._M_base;
|
||||
}
|
||||
constexpr bool operator!=(_RegexMask __other) const {
|
||||
return !((*this) == __other);
|
||||
}
|
||||
};
|
||||
public:
|
||||
typedef _RegexMask char_class_type;
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a default traits object.
|
||||
*/
|
||||
regex_traits() {
|
||||
|
||||
}
|
||||
/**
|
||||
* @brief Gives the length of a C-style string starting at @p __p.
|
||||
*
|
||||
* @param __p a pointer to the start of a character sequence.
|
||||
*
|
||||
* @returns the number of characters between @p *__p and the first
|
||||
* default-initialized value of type @p char32_t. In other words, uses
|
||||
* the C-string algorithm for determining the length of a sequence of
|
||||
* characters.
|
||||
*/
|
||||
static std::size_t length(const char32_t* __p) {
|
||||
return string_type::traits_type::length(__p);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Performs the identity translation.
|
||||
*
|
||||
* @param __c A character to the locale-specific character set.
|
||||
*
|
||||
* @returns __c.
|
||||
*/
|
||||
char32_t translate(char32_t __c) const {
|
||||
return __c;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Translates a character into a case-insensitive equivalent.
|
||||
*
|
||||
* @param __c A character to the locale-specific character set.
|
||||
*
|
||||
* @returns the locale-specific lower-case equivalent of __c.
|
||||
* @throws std::bad_cast if the imbued locale does not support the ctype
|
||||
* facet.
|
||||
*/
|
||||
char32_t translate_nocase(char32_t __c) const {
|
||||
typedef std::ctype<char32_t> __ctype_type;
|
||||
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
|
||||
return __fctyp.tolower(__c);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a sort key for a character sequence.
|
||||
*
|
||||
* @param __first beginning of the character sequence.
|
||||
* @param __last one-past-the-end of the character sequence.
|
||||
*
|
||||
* Returns a sort key for the character sequence designated by the
|
||||
* iterator range [F1, F2) such that if the character sequence [G1, G2)
|
||||
* sorts before the character sequence [H1, H2) then
|
||||
* v.transform(G1, G2) < v.transform(H1, H2).
|
||||
*
|
||||
* What this really does is provide a more efficient way to compare a
|
||||
* string to multiple other strings in locales with fancy collation
|
||||
* rules and equivalence classes.
|
||||
*
|
||||
* @returns a locale-specific sort key equivalent to the input range.
|
||||
*
|
||||
* @throws std::bad_cast if the current locale does not have a collate
|
||||
* facet.
|
||||
*/
|
||||
template<typename _Fwd_iter> string_type transform(_Fwd_iter __first, _Fwd_iter __last) const {
|
||||
typedef std::collate<char32_t> __collate_type;
|
||||
const __collate_type& __fclt(use_facet<__collate_type>(_M_locale));
|
||||
string_type __s(__first, __last);
|
||||
return __fclt.transform(__s.data(), __s.data() + __s.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a sort key for a character sequence, independent of case.
|
||||
*
|
||||
* @param __first beginning of the character sequence.
|
||||
* @param __last one-past-the-end of the character sequence.
|
||||
*
|
||||
* Effects: if typeid(use_facet<collate<_Ch_type> >) ==
|
||||
* typeid(collate_byname<_Ch_type>) and the form of the sort key
|
||||
* returned by collate_byname<_Ch_type>::transform(__first, __last)
|
||||
* is known and can be converted into a primary sort key
|
||||
* then returns that key, otherwise returns an empty string.
|
||||
*
|
||||
* @todo Implement this function correctly.
|
||||
*/
|
||||
template<typename _Fwd_iter> string_type transform_primary(_Fwd_iter __first, _Fwd_iter __last) const {
|
||||
// TODO : this is not entirely correct.
|
||||
// This function requires extra support from the platform.
|
||||
//
|
||||
// Read http://gcc.gnu.org/ml/libstdc++/2013-09/msg00117.html and
|
||||
// http://www.open-std.org/Jtc1/sc22/wg21/docs/papers/2003/n1429.htm
|
||||
// for details.
|
||||
typedef std::ctype<char32_t> __ctype_type;
|
||||
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
|
||||
std::vector<char32_t> __s(__first, __last);
|
||||
__fctyp.tolower(__s.data(), __s.data() + __s.size());
|
||||
return this->transform(__s.data(), __s.data() + __s.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a collation element by name.
|
||||
*
|
||||
* @param __first beginning of the collation element name.
|
||||
* @param __last one-past-the-end of the collation element name.
|
||||
*
|
||||
* @returns a sequence of one or more characters that represents the
|
||||
* collating element consisting of the character sequence designated by
|
||||
* the iterator range [__first, __last). Returns an empty string if the
|
||||
* character sequence is not a valid collating element.
|
||||
*/
|
||||
template<typename _Fwd_iter> string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const;
|
||||
|
||||
/**
|
||||
* @brief Maps one or more characters to a named character
|
||||
* classification.
|
||||
*
|
||||
* @param __first beginning of the character sequence.
|
||||
* @param __last one-past-the-end of the character sequence.
|
||||
* @param __icase ignores the case of the classification name.
|
||||
*
|
||||
* @returns an unspecified value that represents the character
|
||||
* classification named by the character sequence designated by
|
||||
* the iterator range [__first, __last). If @p icase is true,
|
||||
* the returned mask identifies the classification regardless of
|
||||
* the case of the characters to be matched (for example,
|
||||
* [[:lower:]] is the same as [[:alpha:]]), otherwise a
|
||||
* case-dependent classification is returned. The value
|
||||
* returned shall be independent of the case of the characters
|
||||
* in the character sequence. If the name is not recognized then
|
||||
* returns a value that compares equal to 0.
|
||||
*
|
||||
* At least the following names (or their wide-character equivalent) are
|
||||
* supported.
|
||||
* - d
|
||||
* - w
|
||||
* - s
|
||||
* - alnum
|
||||
* - alpha
|
||||
* - blank
|
||||
* - cntrl
|
||||
* - digit
|
||||
* - graph
|
||||
* - lower
|
||||
* - print
|
||||
* - punct
|
||||
* - space
|
||||
* - upper
|
||||
* - xdigit
|
||||
*/
|
||||
template<typename _Fwd_iter> char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase = false) const;
|
||||
|
||||
/**
|
||||
* @brief Determines if @p c is a member of an identified class.
|
||||
*
|
||||
* @param __c a character.
|
||||
* @param __f a class type (as returned from lookup_classname).
|
||||
*
|
||||
* @returns true if the character @p __c is a member of the classification
|
||||
* represented by @p __f, false otherwise.
|
||||
*
|
||||
* @throws std::bad_cast if the current locale does not have a ctype
|
||||
* facet.
|
||||
*/
|
||||
bool isctype(_Ch_type __c, char_class_type __f) const;
|
||||
|
||||
/**
|
||||
* @brief Converts a digit to an int.
|
||||
*
|
||||
* @param __ch a character representing a digit.
|
||||
* @param __radix the radix if the numeric conversion (limited to 8, 10,
|
||||
* or 16).
|
||||
*
|
||||
* @returns the value represented by the digit __ch in base radix if the
|
||||
* character __ch is a valid digit in base radix; otherwise returns -1.
|
||||
*/
|
||||
int value(_Ch_type __ch, int __radix) const;
|
||||
|
||||
/**
|
||||
* @brief Imbues the regex_traits object with a copy of a new locale.
|
||||
*
|
||||
* @param __loc A locale.
|
||||
*
|
||||
* @returns a copy of the previous locale in use by the regex_traits
|
||||
* object.
|
||||
*
|
||||
* @note Calling imbue with a different locale than the one currently in
|
||||
* use invalidates all cached data held by *this.
|
||||
*/
|
||||
locale_type imbue(locale_type __loc) {
|
||||
std::swap(_M_locale, __loc);
|
||||
return __loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a copy of the current locale in use by the regex_traits
|
||||
* object.
|
||||
*/
|
||||
locale_type getloc() const {
|
||||
return _M_locale;
|
||||
}
|
||||
protected:
|
||||
locale_type _M_locale;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
struct unicode_traits : std::regex_traits<char32_t> {
|
||||
static std::map<char32_t, int> data;
|
||||
int value(char32_t ch, int radix ) const {
|
||||
char32_t up = std::toupper(ch, getloc());
|
||||
return data.count(up) ? data[up] : regex_traits::value(ch, radix);
|
||||
}
|
||||
bool isctype(char32_t __c, char_class_type __f) const {
|
||||
TK_ERROR("plop 10");
|
||||
bool plop = std::regex_traits<char32_t>::isctype(__c, __f);
|
||||
TK_ERROR("plop 11");
|
||||
return plop;
|
||||
}
|
||||
char32_t translate_nocase(char32_t __c) const {
|
||||
TK_ERROR("plop 20");
|
||||
typedef std::ctype<char32_t> __ctype_type;
|
||||
TK_ERROR("plop 21");
|
||||
const __ctype_type& __fctyp(std::use_facet<__ctype_type>(_M_locale));
|
||||
TK_ERROR("plop 22");
|
||||
char32_t plop = __fctyp.tolower(__c);
|
||||
TK_ERROR("plop 23");
|
||||
return plop;
|
||||
}
|
||||
|
||||
template<typename _Fwd_iter> std::u32string transform(_Fwd_iter __first, _Fwd_iter __last) const {
|
||||
TK_ERROR("plop 30");
|
||||
typedef std::collate<char32_t> __collate_type;
|
||||
TK_ERROR("plop 31");
|
||||
const __collate_type& __fclt(std::use_facet<__collate_type>(_M_locale));
|
||||
TK_ERROR("plop 32");
|
||||
std::u32string __s(__first, __last);
|
||||
TK_ERROR("plop 33");
|
||||
std::u32string plop = __fclt.transform(__s.data(), __s.data() + __s.size());
|
||||
TK_ERROR("plop 34");
|
||||
return plop;
|
||||
}
|
||||
};
|
||||
std::map<char32_t, int> unicode_traits::data = {{U'〇',0}, {U'一',1}, {U'二',2},
|
||||
{U'三',3}, {U'四',4}, {U'五',5},
|
||||
{U'å…',6}, {U'七',7}, {U'å…«',8},
|
||||
{U'ä¹<EFBFBD>',9}, {U'A',10}, {U'ï¼¢',11},
|
||||
{U'C',12}, {U'D',13}, {U'E',14},
|
||||
{U'F',15}};
|
||||
|
||||
/*
|
||||
int main() {
|
||||
std::locale::global(std::locale("ja_JP.utf8"));
|
||||
std::wcout.sync_with_stdio(false);
|
||||
std::wcout.imbue(std::locale());
|
||||
|
||||
std::wstring in = L"¨";
|
||||
|
||||
if(std::regex_match(in, std::wregex(L"\\u98a8")))
|
||||
std::wcout << "\\u98a8 matched " << in << '\n';
|
||||
|
||||
if(std::regex_match(in, std::basic_regex<wchar_t, jnum_traits>(L"\\u]kAk")))
|
||||
std::wcout << L"\\u]kAk with custom traits matched " << in << '\n';
|
||||
}
|
||||
*/
|
||||
|
||||
void testRegExp2() {
|
||||
std::u32string lines[] = {U"Roses are #ff0000",
|
||||
U"violets are #0000ff",
|
||||
U"all of my base are belong to you"};
|
||||
//std::locale::global(std::locale("fr_FR.utf8"));
|
||||
//std::basic_regex<char32_t, unicode_traits> color_regex(U"a");//([a-f0-9]{2})([a-f0-9]{2})([a-f0-9]{2})");
|
||||
#if 0
|
||||
for (const auto &line : lines) {
|
||||
std::cout << "search : " << std::regex_search(line, color_regex) << '\n';
|
||||
}
|
||||
std::match_results<std::u32string::const_iterator> color_match;
|
||||
for (const auto &line : lines) {
|
||||
std::regex_search(line, color_match, color_regex);
|
||||
std::cout << "matches for \n";
|
||||
/*
|
||||
for (size_t i = 0; i < color_match.size(); ++i) {
|
||||
std::ssub_match sub_match = color_match[i];
|
||||
std::string sub_match_str = sub_match.str();
|
||||
std::cout << i << ": " << sub_match_str << '\n';
|
||||
}
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
// the only one init for etk:
|
||||
etk::log::setLevel(etk::log::logLevelVerbose);
|
||||
etk::setArgZero(argv[0]);
|
||||
etk::initDefaultFolder("ewolApplNoName");
|
||||
|
||||
//testHash();
|
||||
////testFSNode();
|
||||
////testDimension();
|
||||
////testArchive();
|
||||
//testColor();
|
||||
//testRegExp();
|
||||
testRegExp2();
|
||||
testHash();
|
||||
//testFSNode();
|
||||
//testDimension();
|
||||
//testArchive();
|
||||
testColor();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user