485 lines
14 KiB
C++
485 lines
14 KiB
C++
/**
|
|
*******************************************************************************
|
|
* @file etkExp.cpp
|
|
* @brief Ewol Tool Kit : Regular expression annalyser (sources)
|
|
* @author Edouard DUPIN
|
|
* @date 04/04/2011
|
|
* @par Project
|
|
* Ewol TK
|
|
*
|
|
* @par Copyright
|
|
* Copyright 2011 Edouard DUPIN, all right reserved
|
|
*
|
|
* This software is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY.
|
|
*
|
|
* Licence summary :
|
|
* You can modify and redistribute the sources code and binaries.
|
|
* You can send me the bug-fix
|
|
*
|
|
* Term of the licence in in the file licence.txt.
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include <etkRegExp.h>
|
|
|
|
|
|
|
|
const etk::convertionTable_ts etk::constConvertionTable[] = {
|
|
// haveBackSlash, inputValue, newValue
|
|
{ false , '(' , REGEXP_OPCODE_PTHESE_IN},
|
|
{ true , '(' , (int16_t)'('},
|
|
{ false , ')' , REGEXP_OPCODE_PTHESE_OUT},
|
|
{ true , ')' , (int16_t)')'},
|
|
{ false , '[' , REGEXP_OPCODE_BRACKET_IN},
|
|
{ true , '[' , (int16_t)'['},
|
|
{ false , ']' , REGEXP_OPCODE_BRACKET_OUT},
|
|
{ true , ']' , (int16_t)']'},
|
|
{ false , '{' , REGEXP_OPCODE_BRACE_IN},
|
|
{ true , '{' , (int16_t)'{'},
|
|
{ false , '}' , REGEXP_OPCODE_BRACE_OUT},
|
|
{ true , '}' , (int16_t)'}'},
|
|
{ false , '-' , REGEXP_OPCODE_TO},
|
|
{ true , '-' , (int16_t)'-'},
|
|
{ false , '*' , REGEXP_OPCODE_STAR},
|
|
{ true , '*' , (int16_t)'*'},
|
|
{ false , '.' , REGEXP_OPCODE_DOT},
|
|
{ true , '.' , (int16_t)'.'},
|
|
{ false , '?' , REGEXP_OPCODE_QUESTION},
|
|
{ true , '?' , (int16_t)'?'},
|
|
{ false , '+' , REGEXP_OPCODE_PLUS},
|
|
{ true , '+' , (int16_t)'+'},
|
|
{ false , '|' , REGEXP_OPCODE_PIPE},
|
|
{ true , '|' , (int16_t)'|'},
|
|
{ false , '^' , REGEXP_OPCODE_START_OF_LINE},
|
|
{ true , '^' , (int16_t)'^'},
|
|
{ false , '$' , REGEXP_OPCODE_END_OF_LINE},
|
|
{ true , '$' , (int16_t)'$'},
|
|
{ true , 'd' , REGEXP_OPCODE_DIGIT},
|
|
{ true , 'D' , REGEXP_OPCODE_DIGIT_NOT},
|
|
{ true , 'l' , REGEXP_OPCODE_LETTER},
|
|
{ true , 'L' , REGEXP_OPCODE_LETTER_NOT},
|
|
{ true , 's' , REGEXP_OPCODE_SPACE},
|
|
{ true , 'S' , REGEXP_OPCODE_SPACE_NOT},
|
|
{ true , 'w' , REGEXP_OPCODE_WORD},
|
|
{ true , 'W' , REGEXP_OPCODE_WORD_NOT},
|
|
{ true , 'a' , (int16_t)'\a'},
|
|
{ true , 'b' , (int16_t)'\b'},
|
|
{ true , 'e' , 0x001B}, // Escape character <Esc>
|
|
{ true , 'f' , (int16_t)'\f'},
|
|
{ true , 'n' , (int16_t)'\n'},
|
|
{ true , 'r' , (int16_t)'\r'},
|
|
{ true , 't' , (int16_t)'\t'},
|
|
{ true , 'v' , (int16_t)'\v'},
|
|
{ true , '\\' , (int16_t)'\\'},
|
|
{ true , '&' , (int16_t)'&'},
|
|
{ true , '0' , (int16_t)'\0'},
|
|
{ true , '@' , REGEXP_OPCODE_NO_CHAR},
|
|
};
|
|
const int32_t etk::constConvertionTableSize = sizeof(etk::constConvertionTable) / sizeof(etk::convertionTable_ts) ;
|
|
|
|
|
|
/**
|
|
* @brief Display the internal data of a node
|
|
*
|
|
* @param[in] data element do display in the console
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
void etk::DisplayData(etk::VectorType<char> &data)
|
|
{
|
|
int32_t i;
|
|
for (i=0; i<(int32_t)data.Size() ; i++) {
|
|
std::cout<< (char)(data[i]&0x00FF );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Display of a part of the Regexp element
|
|
*
|
|
* @param[in] data Vector where data is constain
|
|
* @param[in] start Position where the display might start
|
|
* @param[in] stop Position where the display might stop
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
void etk::DisplayElem(etk::VectorType<int16_t> &data, int32_t start, int32_t stop)
|
|
{
|
|
int32_t i;
|
|
std::cout<< ETK_BASH_COLOR_NORMAL;
|
|
for (i=start; i<(int32_t)data.Size() && i<stop ; i++) {
|
|
switch(data[i])
|
|
{
|
|
case REGEXP_OPCODE_PTHESE_IN: std::cout<<ETK_BASH_COLOR_RED << (char*)"(" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_PTHESE_OUT: std::cout<<ETK_BASH_COLOR_RED << (char*)")" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_BRACKET_IN: std::cout<<ETK_BASH_COLOR_YELLOW << (char*)"[" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_BRACKET_OUT: std::cout<<ETK_BASH_COLOR_YELLOW << (char*)"]" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_TO: std::cout<<ETK_BASH_COLOR_YELLOW << (char*)"-" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_BRACE_IN: std::cout<<ETK_BASH_COLOR_GREEN << (char*)"{" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_BRACE_OUT: std::cout<<ETK_BASH_COLOR_GREEN << (char*)"}" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_STAR: std::cout<<ETK_BASH_COLOR_BLUE << (char*)"*" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_DOT: std::cout<<ETK_BASH_COLOR_BLUE << (char*)"." << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_QUESTION: std::cout<<ETK_BASH_COLOR_BLUE << (char*)"?" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_PLUS: std::cout<<ETK_BASH_COLOR_BLUE << (char*)"+" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_PIPE: std::cout<<ETK_BASH_COLOR_BLUE << (char*)"|" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_NO_CHAR: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"@" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_START_OF_LINE: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"^" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_END_OF_LINE: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"$" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_DIGIT: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\d" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_DIGIT_NOT: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\D" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_LETTER: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\l" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_LETTER_NOT: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\L" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_SPACE: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\s" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_SPACE_NOT: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\S" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_WORD: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\w" << ETK_BASH_COLOR_NORMAL; break;
|
|
case REGEXP_OPCODE_WORD_NOT: std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\W" << ETK_BASH_COLOR_NORMAL; break;
|
|
case '\n': std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\n" << ETK_BASH_COLOR_NORMAL; break;
|
|
case '\t': std::cout<<ETK_BASH_COLOR_MAGENTA << (char*)"\\t" << ETK_BASH_COLOR_NORMAL; break;
|
|
default: std::cout<< (char)(data[i]&0x00FF ); break;
|
|
}
|
|
}
|
|
}
|
|
char * etk::levelSpace(int32_t level)
|
|
{
|
|
switch(level)
|
|
{
|
|
case 0: return (char*)"";
|
|
case 1: return (char*)" ";
|
|
case 2: return (char*)" ";
|
|
case 3: return (char*)" ";
|
|
case 4: return (char*)" ";
|
|
case 5: return (char*)" ";
|
|
case 6: return (char*)" ";
|
|
case 7: return (char*)" ";
|
|
case 8: return (char*)" ";
|
|
case 9: return (char*)" ";
|
|
case 10: return (char*)" ";
|
|
case 11: return (char*)" ";
|
|
case 12: return (char*)" ";
|
|
case 13: return (char*)" ";
|
|
case 14: return (char*)" ";
|
|
case 15: return (char*)" ";
|
|
case 16: return (char*)" ";
|
|
default: return (char*)" ";
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
int32_t etk::GetLenOfPTheseElem(etk::VectorType<int16_t> &data, int32_t startPos)
|
|
{
|
|
int32_t pos = startPos;
|
|
int32_t nbOpen = 0;
|
|
// special case of the (...) or | ==> we search '|' or ')'
|
|
if( REGEXP_OPCODE_PTHESE_OUT == data[pos]
|
|
|| REGEXP_OPCODE_PIPE == data[pos]) {
|
|
return 0;
|
|
}
|
|
// find size ...
|
|
while (pos < (int32_t)data.Size() ) {
|
|
if(REGEXP_OPCODE_PTHESE_IN == data[pos]) {
|
|
// find a sub section :
|
|
nbOpen++;
|
|
} else if(0 < nbOpen) {
|
|
if (REGEXP_OPCODE_PTHESE_OUT == data[pos])
|
|
{
|
|
nbOpen--;
|
|
if (0 > nbOpen) {
|
|
TK_ERROR("Error in the (...) find element at "<< pos);
|
|
return -1;
|
|
}
|
|
}
|
|
} else if( REGEXP_OPCODE_PTHESE_OUT == data[pos]
|
|
|| REGEXP_OPCODE_PIPE == data[pos])
|
|
{
|
|
// Find the end of the (...)
|
|
// just return the size inside
|
|
int32_t sizeInside = pos - startPos;
|
|
if (0 >= sizeInside) {
|
|
TK_ERROR("Error in the (...) no data at "<< pos-1);
|
|
return -1;
|
|
} else {
|
|
return sizeInside;
|
|
}
|
|
}
|
|
pos++;
|
|
}
|
|
return pos - startPos;
|
|
}
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
int32_t etk::GetLenOfPThese(etk::VectorType<int16_t> &data, int32_t startPos)
|
|
{
|
|
int32_t pos = startPos;
|
|
int32_t nbOpen = 0;
|
|
// special case of the (...) or | ==> we search '|' or ')'
|
|
if( REGEXP_OPCODE_PTHESE_OUT == data[pos]) {
|
|
return 0;
|
|
} else if( REGEXP_OPCODE_PTHESE_IN == data[pos])
|
|
{
|
|
pos++;
|
|
// find size ...
|
|
while (pos < (int32_t)data.Size() ) {
|
|
if(REGEXP_OPCODE_PTHESE_IN == data[pos]) {
|
|
// find a sub section :
|
|
nbOpen++;
|
|
} else if(0 < nbOpen) {
|
|
if (REGEXP_OPCODE_PTHESE_OUT == data[pos])
|
|
{
|
|
nbOpen--;
|
|
if (0 > nbOpen) {
|
|
TK_ERROR("Error in the (...) find element at "<< pos);
|
|
return -1;
|
|
}
|
|
}
|
|
} else if( REGEXP_OPCODE_PTHESE_OUT == data[pos])
|
|
{
|
|
// Find the end of the (...)
|
|
// just return the size inside
|
|
int32_t sizeInside = pos - startPos-1;
|
|
if (0 >= sizeInside) {
|
|
TK_ERROR("Error in the (...) no data at "<< pos-1);
|
|
return -1;
|
|
} else {
|
|
return sizeInside;
|
|
}
|
|
}
|
|
pos++;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
int32_t etk::GetLenOfBracket(etk::VectorType<int16_t> &data, int32_t startPos)
|
|
{
|
|
int32_t pos = startPos;
|
|
// special case of the (...) or | ==> we search '|' or ')'
|
|
if( REGEXP_OPCODE_BRACKET_OUT == data[pos]) {
|
|
return 0;
|
|
} else if( REGEXP_OPCODE_BRACKET_IN == data[pos]) {
|
|
pos++;
|
|
// find size ...
|
|
while (pos < (int32_t)data.Size() ) {
|
|
if(REGEXP_OPCODE_BRACKET_OUT == data[pos]) {
|
|
// Find the end of the [...]
|
|
// just return the size inside
|
|
int32_t sizeInside = pos - startPos -1 ;
|
|
if (0 >= sizeInside) {
|
|
TK_ERROR("Error in the [...] no data at "<< pos-1);
|
|
return sizeInside;
|
|
} else {
|
|
return sizeInside;
|
|
}
|
|
} else if( REGEXP_OPCODE_TO != data[pos]
|
|
&& ( 0 > data[pos]
|
|
|| 0xFF < data[pos]) )
|
|
{
|
|
TK_ERROR("Error in the [...] not permited element at "<< pos << " '" << (char)data[pos] << "'");
|
|
return false;
|
|
}
|
|
pos++;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
int32_t etk::GetLenOfBrace(etk::VectorType<int16_t> &data, int32_t startPos)
|
|
{
|
|
int32_t pos = startPos;
|
|
// special case of the (...) or | ==> we search '|' or ')'
|
|
if( REGEXP_OPCODE_BRACE_OUT == data[pos]) {
|
|
return 0;
|
|
} else if( REGEXP_OPCODE_BRACE_IN == data[pos]) {
|
|
pos++;
|
|
// find size ...
|
|
while (pos < (int32_t)data.Size() ) {
|
|
if(REGEXP_OPCODE_BRACE_OUT == data[pos]) {
|
|
// Find the end of the [...]
|
|
// just return the size inside
|
|
int32_t sizeInside = pos - startPos -1 ;
|
|
if (0 >= sizeInside) {
|
|
TK_ERROR("Error in the {...} no data at "<< pos-1);
|
|
return sizeInside;
|
|
} else {
|
|
return sizeInside;
|
|
}
|
|
} else if( ',' != data[pos]
|
|
&& ( '0' > data[pos]
|
|
|| '9' < data[pos]) )
|
|
{
|
|
TK_ERROR("Error in the {...} not permited element at "<< pos << " '" << (char)data[pos] << "'");
|
|
return false;
|
|
}
|
|
pos++;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
int32_t etk::GetLenOfNormal(etk::VectorType<int16_t> &data, int32_t startPos)
|
|
{
|
|
int32_t pos = startPos;
|
|
|
|
// find size ...
|
|
while (pos < (int32_t)data.Size() ) {
|
|
switch(data[pos])
|
|
{
|
|
case REGEXP_OPCODE_PTHESE_IN:
|
|
case REGEXP_OPCODE_PTHESE_OUT:
|
|
case REGEXP_OPCODE_BRACKET_IN:
|
|
case REGEXP_OPCODE_BRACKET_OUT:
|
|
case REGEXP_OPCODE_BRACE_IN:
|
|
case REGEXP_OPCODE_BRACE_OUT:
|
|
case REGEXP_OPCODE_TO:
|
|
case REGEXP_OPCODE_STAR:
|
|
case REGEXP_OPCODE_DOT:
|
|
case REGEXP_OPCODE_QUESTION:
|
|
case REGEXP_OPCODE_PLUS:
|
|
case REGEXP_OPCODE_PIPE:
|
|
case REGEXP_OPCODE_START_OF_LINE:
|
|
case REGEXP_OPCODE_END_OF_LINE:
|
|
case REGEXP_OPCODE_DIGIT:
|
|
case REGEXP_OPCODE_DIGIT_NOT:
|
|
case REGEXP_OPCODE_LETTER:
|
|
case REGEXP_OPCODE_LETTER_NOT:
|
|
case REGEXP_OPCODE_SPACE:
|
|
case REGEXP_OPCODE_SPACE_NOT:
|
|
case REGEXP_OPCODE_WORD:
|
|
case REGEXP_OPCODE_WORD_NOT:
|
|
{
|
|
// just return the size inside
|
|
int32_t sizeInside = pos - startPos;
|
|
if (0 >= sizeInside) {
|
|
TK_ERROR("Error in the normal data : no data ...");
|
|
}
|
|
return sizeInside;
|
|
}
|
|
break;
|
|
default :
|
|
// nothing to do ...
|
|
break;
|
|
}
|
|
pos++;
|
|
}
|
|
return pos - startPos ;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param[in,out] ---
|
|
*
|
|
* @return ---
|
|
*
|
|
*/
|
|
bool etk::ParseBrace(etk::VectorType<int16_t> &data, int32_t &min, int32_t &max)
|
|
{
|
|
//TK_INFO("parse {...} in "; DisplayElem(data); );
|
|
int32_t k=0;
|
|
|
|
int32_t firstElement = 0;
|
|
int32_t SecondElement = 0;
|
|
|
|
while(k<data.Size()) {
|
|
if (',' == (char)data[k]) {
|
|
k++;
|
|
break;
|
|
} if ('}' == (char)data[k]) {
|
|
SecondElement = firstElement;
|
|
goto allIsSet;
|
|
} else if ('0' <= (char)data[k] && '9' >= (char)data[k]) {
|
|
firstElement *=10;
|
|
firstElement += (char)data[k] - '0';
|
|
} else {
|
|
TK_ERROR("Can not parse this element " << (char)data[k] << " at pos " << k);
|
|
return false;
|
|
}
|
|
k++;
|
|
}
|
|
if (k==data.Size()) {
|
|
SecondElement = firstElement;
|
|
}
|
|
while(k<data.Size()) {
|
|
if (',' == (char)data[k]) {
|
|
TK_ERROR("Can not find a second , in {} at pos " << k);
|
|
return false;
|
|
} if ('}' == (char)data[k]) {
|
|
goto allIsSet;
|
|
} else if ('0' <= (char)data[k] && '9' >= (char)data[k]) {
|
|
SecondElement *=10;
|
|
SecondElement += (char)data[k] - '0';
|
|
} else {
|
|
TK_ERROR("Can not parse this element " << (char)data[k] << " at pos " << k);
|
|
return false;
|
|
}
|
|
k++;
|
|
}
|
|
|
|
allIsSet:
|
|
if (SecondElement == 0 && firstElement != 0) {
|
|
min = 0;
|
|
max = firstElement;
|
|
} else {
|
|
min = firstElement;
|
|
max = SecondElement;
|
|
}
|
|
if (min > max) {
|
|
TK_ERROR("Minimum=" << min << " can not be < maximum=" << max );
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|