[DEV] set back the etk::RegExp better than common for EDN

This commit is contained in:
Edouard DUPIN 2017-03-11 22:10:25 +01:00
parent 27bb43e322
commit f6850c591d
6 changed files with 2566 additions and 70 deletions

470
etk/RegExp.cpp Normal file
View File

@ -0,0 +1,470 @@
/**
* @author Edouard DUPIN
*
* @copyright 2011, Edouard DUPIN, all right reserved
*
* @license MPL v2.0 (see license file)
*/
#include <etk/RegExp.hpp>
const struct etk::convertionTable etk::regexp::constConvertionTable[] = {
// haveBackSlash, inputValue, newValue
{ false , '(' , 0 , etk::regexpOpcodePTheseIn},
{ true , '(' , '(' , etk::regexpOpcodeError},
{ false , ')' , 0 , etk::regexpOpcodePTheseOut},
{ true , ')' , ')' , etk::regexpOpcodeError},
{ false , '[' , 0 , etk::regexpOpcodeBracketIn},
{ true , '[' , '[' , etk::regexpOpcodeError},
{ false , ']' , 0 , etk::regexpOpcodeBracketOut},
{ true , ']' , ']' , etk::regexpOpcodeError},
{ false , '{' , 0 , etk::regexpOpcodeBracetIn},
{ true , '{' , '{' , etk::regexpOpcodeError},
{ false , '}' , 0 , etk::regexpOpcodeBracetOut},
{ true , '}' , '}' , etk::regexpOpcodeError},
{ false , '-' , 0 , etk::regexpOpcodeTo},
{ true , '-' , '-' , etk::regexpOpcodeError},
{ false , '*' , 0 , etk::regexpOpcodeStar},
{ true , '*' , '*' , etk::regexpOpcodeError},
{ false , '.' , 0 , etk::regexpOpcodeDot},
{ true , '.' , '.' , etk::regexpOpcodeError},
{ true , 'e' , 0 , etk::regexpOpcodeEOF},
{ false , 'e' , 'e' , etk::regexpOpcodeError},
{ false , '?' , 0 , etk::regexpOpcodeQuestion},
{ true , '?' , '?' , etk::regexpOpcodeError},
{ false , '+' , 0 , etk::regexpOpcodePlus},
{ true , '+' , '+' , etk::regexpOpcodeError},
{ false , '|' , 0 , etk::regexpOpcodePipe},
{ true , '|' , '|' , etk::regexpOpcodeError},
{ false , '^' , 0 , etk::regexpOpcodeStartOfLine},
{ true , '^' , '^' , etk::regexpOpcodeError},
{ false , '$' , 0 , etk::regexpOpcodeEndOfLine},
{ true , '$' , '$' , etk::regexpOpcodeError},
{ true , 'd' , 0 , etk::regexpOpcodeDigit},
{ true , 'D' , 0 , etk::regexpOpcodeDigitNot},
{ true , 'l' , 0 , etk::regexpOpcodeLetter},
{ true , 'L' , 0 , etk::regexpOpcodeLetterNot},
{ true , 's' , 0 , etk::regexpOpcodeSpace},
{ true , 'S' , 0 , etk::regexpOpcodeSpaceNot},
{ true , 'w' , 0 , etk::regexpOpcodeWord},
{ true , 'W' , 0 , etk::regexpOpcodeWordNot},
{ true , 'a' , '\a', etk::regexpOpcodeError},
{ true , 'b' , '\b', etk::regexpOpcodeError},
{ true , 'e' , 0x1B, etk::regexpOpcodeError}, // Escape character <Esc>
{ true , 'f' , '\f', etk::regexpOpcodeError},
{ true , 'n' , '\n', etk::regexpOpcodeError},
{ true , 'r' , '\r', etk::regexpOpcodeError},
{ true , 't' , '\t', etk::regexpOpcodeError},
{ true , 'v' , '\v', etk::regexpOpcodeError},
{ true , '\\' , '\\', etk::regexpOpcodeError},
{ true , '&' , '&' , etk::regexpOpcodeError},
{ true , '0' , '\0', etk::regexpOpcodeError},
{ true , '@' , 0 , etk::regexpOpcodeNoChar},
};
const int64_t etk::regexp::constConvertionTableSize = sizeof(etk::regexp::constConvertionTable) / sizeof(struct etk::convertionTable) ;
static const char* parseStatusTable[] = {
"parseStatusUnknow",
"parseStatusNone",
"parseStatusPartial",
"parseStatusFull"
};
std::ostream& etk::regexp::operator <<(std::ostream& _os, enum etk::regexp::parseStatus _obj) {
_os << parseStatusTable[_obj];
return _os;
}
std::ostream& etk::regexp::operator <<(std::ostream& _os, const etk::regexp::FindProperty& _obj) {
_os << "property([" << _obj.getPositionStart() << "," << _obj.getPositionStop() << "]*" << _obj.getMultiplicity() << " " << _obj.getStatus() << ")";
return _os;
}
std::string etk::regexp::createString(const std::vector<char32_t>& _data, int64_t _start, int64_t _stop) {
std::string output(ETK_BASH_COLOR_NORMAL);
for (int64_t iii=_start; iii<(int64_t)_data.size() && iii<_stop ; iii++) {
switch(_data[iii]) {
case regexpOpcodePTheseIn: output += std::string(ETK_BASH_COLOR_RED) + (char*)"(" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodePTheseOut: output += std::string(ETK_BASH_COLOR_RED) + (char*)")" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeBracketIn: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"[" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeBracketOut: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"]" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeTo: output += std::string(ETK_BASH_COLOR_YELLOW) + (char*)"-" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeBracetIn: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"{" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeBracetOut: output += std::string(ETK_BASH_COLOR_GREEN) + (char*)"}" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeStar: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"*" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeDot: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"." + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeQuestion: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"?" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodePlus: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"+" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodePipe: output += std::string(ETK_BASH_COLOR_BLUE) + (char*)"|" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeNoChar: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"@" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeStartOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"^" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeEndOfLine: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"$" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeDigit: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\d" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeDigitNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\D" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeLetter: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\l" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeLetterNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\L" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeSpace: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\s" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeSpaceNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\S" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeWord: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\w" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeWordNot: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\W" + ETK_BASH_COLOR_NORMAL; break;
case regexpOpcodeEOF: output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\e" + ETK_BASH_COLOR_NORMAL; break;
case '\n': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\n" + ETK_BASH_COLOR_NORMAL; break;
case '\t': output += std::string(ETK_BASH_COLOR_MAGENTA) + (char*)"\\t" + ETK_BASH_COLOR_NORMAL; break;
default:
char plop[10];
int8_t nb = u32char::convertUtf8(_data[iii], plop);
plop[nb] = '\0';
output += plop;
break;
}
}
return output;
}
char* etk::regexp::levelSpace(uint32_t _level) {
static char* tmpSpace = " ";
if (_level>30) {
return tmpSpace;
}
return tmpSpace + 60 - 2*_level;
}
int64_t etk::regexp::getLenOfPTheseElem(const std::vector<char32_t>& _data, int64_t _startPos) {
if (_startPos>=(int64_t)_data.size()){
return 0;
}
int64_t pos = _startPos;
int32_t nbOpen = 0;
// special case of the (...) or | ==> we search '|' or ')'
if( _data[pos] == regexpOpcodePTheseOut
|| _data[pos] == regexpOpcodePipe) {
return 0;
}
// find size ...
while (pos < (int64_t)_data.size() ) {
if(_data[pos] == regexpOpcodePTheseIn) {
// find a sub section :
nbOpen++;
} else if(0 < nbOpen) {
if (_data[pos] == regexpOpcodePTheseOut) {
nbOpen--;
if (0 > nbOpen) {
TK_ERROR("Error in the (...) find element at "<< pos);
return -1;
}
}
} else if( _data[pos] == regexpOpcodePTheseOut
|| _data[pos] == regexpOpcodePipe) {
// Find the end of the (...)
// just return the size inside
int32_t sizeInside = pos - _startPos;
if (0 >= sizeInside) {
TK_ERROR("Error in the (...) no data at "<< pos-1);
return 0;
}
return sizeInside;
}
pos++;
}
return pos - _startPos;
}
int64_t etk::regexp::getLenOfPThese(const std::vector<char32_t>& _data, int64_t _startPos) {
int64_t pos = _startPos;
int32_t nbOpen = 0;
// special case of the (...) or | ==> we search '|' or ')'
if(_data[pos]==regexpOpcodePTheseOut) {
return 0;
}
if(_data[pos]!=regexpOpcodePTheseIn) {
TK_ERROR(" find error in PThese");
return 0;
}
pos++;
// find size ...
while (pos < (int64_t)_data.size() ) {
if(_data[pos]==regexpOpcodePTheseIn) {
// find a sub section :
nbOpen++;
} else if(0 < nbOpen) {
if (_data[pos]==regexpOpcodePTheseOut) {
nbOpen--;
if (0 > nbOpen) {
TK_ERROR("Error in the (...) find element at "<< pos);
return 0;
}
}
} else if(_data[pos]==regexpOpcodePTheseOut) {
// Find the end of the (...)
// just return the size inside
int32_t sizeInside = pos - _startPos-1;
if (0 >= sizeInside) {
TK_ERROR("Error in the (...) no data at "<< pos-1);
return 0;
}
return sizeInside;
}
pos++;
}
return 0;
}
int64_t etk::regexp::getLenOfBracket(const std::vector<char32_t>& _data, int64_t _startPos) {
int64_t pos = _startPos;
// special case of the (...) or | ==> we search '|' or ')'
if(_data[pos]==regexpOpcodeBracketOut) {
return 0;
}
if(_data[pos] != regexpOpcodeBracketIn) {
TK_ERROR("find no {...");
return 0;
}
pos++;
// find size ...
while (pos < (int64_t)_data.size() ) {
if(_data[pos]==regexpOpcodeBracketOut) {
// Find the end of the [...]
// just return the size inside
int32_t sizeInside = pos - _startPos -1 ;
if (0 >= sizeInside) {
TK_ERROR("Error in the [...] no data at "<< pos-1);
return 0;
}
return sizeInside;
} else if( _data[pos] != regexpOpcodeTo
&& _data[pos] > 0xFF ) {
TK_ERROR("Error in the [...] not permited element at "<< pos << " '" << (char)_data[pos] << "'");
return 0;
}
pos++;
}
return 0;
}
int64_t etk::regexp::getLenOfBrace(const std::vector<char32_t>& _data, int64_t _startPos) {
int32_t pos = _startPos;
// special case of the (...) or | ==> we search '|' or ')'
if(_data[pos]==regexpOpcodeBracetOut) {
return 0;
}
if(_data[pos]!=regexpOpcodeBracetIn) {
TK_ERROR(" did not find brace IN { ");
return 0;
}
pos++;
// find size ...
while (pos < (int64_t)_data.size() ) {
if(_data[pos]==regexpOpcodeBracetOut) {
// Find the end of the [...]
// just return the size inside
int32_t sizeInside = pos - _startPos -1 ;
if (0 >= sizeInside) {
TK_ERROR("Error in the {...} no data at "<< pos-1);
return 0;
}
return sizeInside;
} else if( _data[pos] != ','
&& ( _data[pos] < '0'
|| _data[pos] > '9') ) {
TK_ERROR("Error in the {...} not permited element at "<< pos << " '" << _data[pos] << "'");
return 0;
}
pos++;
}
return 0;
}
int64_t etk::regexp::getLenOfNormal(const std::vector<char32_t>& _data, int64_t _startPos) {
int64_t pos = _startPos;
// find size ...
while (pos < (int64_t)_data.size() ) {
switch(_data[pos]) {
case regexpOpcodePTheseIn:
case regexpOpcodePTheseOut:
case regexpOpcodeBracketIn:
case regexpOpcodeBracketOut:
case regexpOpcodeBracetIn:
case regexpOpcodeBracetOut:
case regexpOpcodeTo:
case regexpOpcodeStar:
case regexpOpcodeDot:
case regexpOpcodeQuestion:
case regexpOpcodePlus:
case regexpOpcodePipe:
case regexpOpcodeStartOfLine:
case regexpOpcodeEndOfLine:
case regexpOpcodeDigit:
case regexpOpcodeDigitNot:
case regexpOpcodeLetter:
case regexpOpcodeLetterNot:
case regexpOpcodeSpace:
case regexpOpcodeSpaceNot:
case regexpOpcodeWord:
case regexpOpcodeWordNot:
{
// just return the size inside
int32_t sizeInside = pos - _startPos;
if (0 >= sizeInside) {
TK_ERROR("Error in the normal data : no data ...");
}
return sizeInside;
}
break;
default :
// nothing to do ...
break;
}
pos++;
}
if ((int64_t)pos - (int64_t)_startPos < 0) {
return 0;
}
return pos - _startPos ;
}
bool etk::regexp::parseBrace(const std::vector<char32_t>& _data, uint32_t& _min, uint32_t& _max) {
//TK_INFO("parse {...} in "; DisplayElem(data); );
int64_t k=0;
int32_t firstElement = 0;
int32_t SecondElement = 0;
while(k < (int64_t)_data.size()) {
if (_data[k] == ',') {
k++;
break;
} if (_data[k] == '}' ) {
SecondElement = firstElement;
goto allIsSet;
} else if(u32char::isInteger(_data[k]) == true) {
firstElement *= 10;
firstElement += u32char::toInt(_data[k]);
} else {
TK_ERROR("Can not parse this element " << (char)_data[k] << " at pos " << k);
return false;
}
k++;
}
if (k == (int64_t)_data.size()) {
SecondElement = firstElement;
}
while(k < (int64_t)_data.size()) {
if (_data[k] == ',') {
TK_ERROR("Can not find a second , in {} at pos " << k);
return false;
} if (_data[k] == '}') {
goto allIsSet;
} else if (true == u32char::isInteger(_data[k])) {
SecondElement *= 10;
SecondElement += u32char::toInt(_data[k]);
} else {
TK_ERROR("Can not parse this element " << _data[k] << " at pos " << k);
return false;
}
k++;
}
allIsSet:
if (SecondElement == 0 && firstElement != 0) {
_min = 0;
_max = firstElement;
} else {
_min = firstElement;
_max = SecondElement;
}
if (_min > _max) {
TK_ERROR("Minimum=" << _min << " can not be < maximum=" << _max );
return false;
}
return true;
}
std::string etk::regexp::autoStr(const std::string& _data) {
std::string out;
for (auto &it : _data) {
if (it == '\n') {
out += "\\n";
} else if (it == '\t') {
out += "\\t";
} else if (it == '\r') {
out += "\\r";
} else if (it == '\0') {
out += "\\0";
} else if (it == ' ') {
out += " ";
} else if (it <= 0x20) {
out += std::to_string((int32_t)it);
} else {
out += it;
}
}
return out;
}
std::string etk::regexp::autoStr(char _data) {
std::string out;
if (_data == '\n') {
out += "\\n";
} else if (_data == '\t') {
out += "\\t";
} else if (_data == '\r') {
out += "\\r";
} else if (_data == '\0') {
out += "\\0";
} else if (_data == ' ') {
out += " ";
} else if (_data <= 0x20) {
out += std::to_string((int32_t)_data);
} else {
out += _data;
}
return out;
}
std::string etk::regexp::strTick(int32_t _pos) {
std::string out;
for (int32_t iii=0; iii<_pos; ++iii) {
out += " ";
}
out += "^";
return out;
}
namespace etk {
template<> std::string to_string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) {
return _val.getRegExp();
}
template<> std::string to_string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) {
return _val.getRegExp();
}
template<> std::u32string to_u32string<etk::RegExp<std::string>>(const etk::RegExp<std::string>& _val) {
return _val.getURegExp();
}
template<> std::u32string to_u32string<etk::RegExp<std::u32string>>(const etk::RegExp<std::u32string>& _val) {
return _val.getURegExp();
}
template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::u32string& _value) {
_variableRet.compile(_value);
return true;
}
template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::u32string& _value) {
_variableRet.compile(_value);
return true;
}
template<> bool from_string<etk::RegExp<std::string>>(etk::RegExp<std::string>& _variableRet, const std::string& _value) {
_variableRet.compile(_value);
return true;
}
template<> bool from_string<etk::RegExp<std::u32string>>(etk::RegExp<std::u32string>& _variableRet, const std::string& _value) {
_variableRet.compile(_value);
return true;
}
};

1913
etk/RegExp.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -230,76 +230,6 @@ bool utf8::theoricFirst(const char _input) {
}
#endif
utf8::iterator& utf8::iterator::operator++ () {
m_value = u32char::Null;
if (m_current <= 0) {
m_current = 0;
return *this;
}
if (m_data != nullptr) {
if (m_current < (int64_t)m_data->size() ) {
int8_t nbChar = utf8::theoricLen((*m_data)[m_current]);
if (nbChar != 0) {
m_current+=nbChar;
} else {
m_current++;
}
}
if (m_current >= (int64_t)m_data->size()) {
m_current = m_data->size();
}
}
return *this;
}
utf8::iterator& utf8::iterator::operator-- () {
m_value = u32char::Null;
if (m_data != nullptr) {
if (m_current > 0) {
int32_t iii = -1;
while( utf8::theoricFirst((*m_data)[m_current+iii]) == false
&& iii >= -6
&& m_current-iii>0) {
--iii;
};
m_current += iii;
} else {
m_current = 0;
}
} else {
m_current = 0;
}
if (m_current < 0) {
m_current = 0;
}
return *this;
}
char32_t utf8::iterator::operator* () {
if (m_value != u32char::Null) {
return m_value;
}
if (m_data == nullptr) {
TK_ERROR("request an element that iterator not link");
return m_value;
}
if ( m_current < 0
|| m_current >= (int64_t)m_data->size()) {
TK_ERROR("request an element out of bounding !!! 0 <= " << m_current << " < " << m_data->size());
return m_value;
}
char tmpVal[5];
memset(tmpVal, 0, sizeof(tmpVal));
tmpVal[0] = (*m_data)[m_current];
int8_t nbChar = utf8::theoricLen(tmpVal[0]);
for (int32_t iii=1; iii<nbChar && m_current+iii<(int64_t)m_data->size(); ++iii) {
tmpVal[iii] = (*m_data)[m_current+iii];
}
// transform ...
m_value = utf8::convertChar32(tmpVal);
return m_value;
}
namespace etk {
#if __CPP_VERSION__ >= 2011

View File

@ -34,6 +34,7 @@ def configure(target, my_module):
'etk/tool.cpp',
'etk/Noise.cpp',
'etk/Color.cpp',
'etk/RegExp.cpp',
'etk/math/Matrix2.cpp',
'etk/math/Matrix4.cpp',
'etk/math/Vector2D.cpp',
@ -46,11 +47,13 @@ def configure(target, my_module):
my_module.add_header_file([
'etk/etk.hpp',
'etk/debug.hpp',
'etk/types.hpp',
'etk/stdTools.hpp',
'etk/tool.hpp',
'etk/Noise.hpp',
'etk/Color.hpp',
'etk/RegExp.hpp',
'etk/Hash.hpp',
'etk/math/Matrix2.hpp',
'etk/math/Matrix4.hpp',

View File

@ -22,6 +22,7 @@
#include "testColor.hpp"
#include "testFSNode.hpp"
#include "testHash.hpp"
#include "testRegExp.hpp"
#include "testStdShared.hpp"
int main(int argc, const char *argv[]) {
@ -30,6 +31,7 @@ int main(int argc, const char *argv[]) {
// the only one init for etk:
etk::init(argc, argv);
etk::initDefaultFolder("ewolApplNoName");
//testRegExp();
return RUN_ALL_TESTS();
}

178
test/testRegExp.hpp Normal file
View File

@ -0,0 +1,178 @@
/**
* @author Edouard DUPIN
*
* @copyright 2011, Edouard DUPIN, all right reserved
*
* @license MPL v2.0 (see license file)
*/
#include <gtest/gtest.h>
#include <etk/RegExp.hpp>
#undef NAME
#define NAME "Hash"
std::pair<int32_t, int32_t> testRegExpSingle(const std::string& _expression, const std::string& _search) {
etk::RegExp<std::string> expression(_expression);
TK_INFO("Parse RegEx: \"" << expression.getRegExDecorated() << "\"");
TK_INFO(" IN: \"" << etk::regexp::autoStr(_search) << "\"");
if (expression.parse(_search, 0, _search.size()) == true) {
TK_INFO(" match [" << expression.start() << ".." << expression.stop() << "] ");
TK_INFO(" ==> '" << etk::regexp::autoStr(std::string(_search, expression.start(), expression.stop() - expression.start())) << "'");
return std::make_pair(expression.start(), expression.stop());
}
TK_INFO(" ==> ---------------");
return std::make_pair(0,0);
}
static std::string data1 = " a /* plop */ \n int eee = 22; // error value \nint main(void) {\n return 0;\n}\n";
static std::string data2 = "alpha /* plop */ test";
static std::string data3 = "pp \n // qdfqdfsdf \nde";
TEST(TestEtkRegExp, MultipleLineComment ) {
std::string expression = "/\\*.*\\*/";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, data1);
EXPECT_EQ(res, std::make_pair(3,13));
res = testRegExpSingle(expression, data2);
EXPECT_EQ(res, std::make_pair(6,16));
res = testRegExpSingle(expression, data3);
EXPECT_EQ(res, std::make_pair(0,0));
}
TEST(TestEtkRegExp, MultipleEndDollar ) {
std::string expression = "//.*$";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, data1);
EXPECT_EQ(res, std::make_pair(30,46));
res = testRegExpSingle(expression, data2);
EXPECT_EQ(res, std::make_pair(0,0));
res = testRegExpSingle(expression, data3);
EXPECT_EQ(res, std::make_pair(5,19));
}
TEST(TestEtkRegExp, MultipleNoEnd ) {
std::string expression = "/\\*.*";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, data1);
EXPECT_EQ(res, std::make_pair(3,5));
res = testRegExpSingle(expression, data2);
EXPECT_EQ(res, std::make_pair(6,8));
res = testRegExpSingle(expression, data3);
EXPECT_EQ(res, std::make_pair(0,0));
}
TEST(TestEtkRegExp, aToZ ) {
std::string expression = "[a-z]";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, data1);
EXPECT_EQ(res, std::make_pair(1,2));
res = testRegExpSingle(expression, data2);
EXPECT_EQ(res, std::make_pair(0,1));
res = testRegExpSingle(expression, data3);
EXPECT_EQ(res, std::make_pair(0,1));
}
TEST(TestEtkRegExp, complexString ) {
std::string expression = "a.*plop(z{2,3}|h+)+r";
std::string dataToParse = " eesd a lzzml plophzzzzzhhhhhrlkmlkml";
std::pair<int32_t, int32_t> res;
res = testRegExpSingle(expression, dataToParse);
EXPECT_EQ(res, std::make_pair(7,31));
}
void testRegExp() {
std::string data;
//std::string data = "pp \n# plop // qdfqdfsdf \nde";
//std::string data = "pp \n# plop //\\\n qdfqdfsdf \nde";
//std::string data = "p#\ne";
//testRegExpSingle("#(\\\\\\\\|\\\\\\n|.)*$", data);
//testRegExpSingle("#.*$", data);
//std::string data = "p//TODO:\ndse";
//std::string data = "p// TODO:\ndse";
//std::string data = "p// TODO :\ndse";
//std::string data = "p// TODO : sdfgsdfsd \ndse";
//testRegExpSingle("//[ \\t]*TODO[ \\t]*:.*$", data);
data = "abc m_def ghi";
data = " protected:\n"
" vec2 m_offset; \n";
//testRegExpSingle("\\@m_[A-Za-z_0-9]*\\@", data);
data = " * @param[in] _mode Configuring mode.\n"
" * @param[in] _time Time in second of the annimation display\n"
" */\n"
" void setAnnimationTime(enum ";
data = "virtual vec2 relativePosition(const vec2& _pos);";
//testRegExpSingle("\\@(\\w|_)+[ \\t]*\\(", data);
data = "include <ewol/Dimensio2n.h>\n"
"#include <ewol/Dimension.h>\n"
"'dfgd\'fg'\n"
"\"dqf\\\"gsdfg\" // \"\n"
"// TODO : sqdkfjsdldkqfj\n"
"\n"
"namespace ewol {\n"
" class Widget;\n"
" namespace widget {\n"
" class Manager;\n"
" class Windows;\n"
" };\n"
"};\n"
"#include <etk/types.h>\n";
//testRegExpSingle("#(\\\\[\\\\\\n]|.)*$", data);
data = " 'dfgd\\'fg' \n"
" vec2 m_offset; \n";
//testRegExpSingle("'((\\\\[\\\\'])|.)*'", data);
/*
data = "ddfgdfgh";
etk::RegExp<std::string> reg(".*");
reg.setMaximize(true);
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
if (reg.parse(data, 0, data.size()) == true) {
//if (reg.processOneElement(data, 0, data.size()) == true) {
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
}
data = "plop \"\" sdfsdf s\"swdfsqd sdfgsdfg \" \" sdfsf";
reg = etk::RegExp<std::string>("\"(\\\\[\\\\\"]|.)*\"");
reg.setMaximize(false);
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
if (reg.parse(data, 0, data.size()) == true) {
//if (reg.processOneElement(data, 0, data.size()) == true) {
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
}
//TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*"
data = "void limit(const vec2& _origin, const vec2& _size);\n";
reg = etk::RegExp<std::string>("\\@(\\w|_)+[ \\t]*\\(");
reg.setMaximize(false);
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
if (reg.parse(data, 0, data.size()) == true) {
//if (reg.processOneElement(data, 0, data.size()) == true) {
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
}
data = "void limit const vec2& _origin, const vec2& _size);\n";
if (reg.parse(data, 0, data.size()) == true) {
//if (reg.processOneElement(data, 0, data.size()) == true) {
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
}
*/
}