/** ******************************************************************************* * @file etk/RegExp.cpp * @brief Ewol Tool Kit : Regular expression annalyser (sources) * @author Edouard DUPIN * @date 04/04/2011 * @par Project * Ewol TK * * @par Copyright * Copyright 2011 Edouard DUPIN, all right reserved * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY. * * Licence summary : * You can modify and redistribute the sources code and binaries. * You can send me the bug-fix * * Term of the licence in in the file licence.txt. * ******************************************************************************* */ #include const etk::convertionTable_ts etk::constConvertionTable[] = { // haveBackSlash, inputValue, newValue { false , '(' , REGEXP_OPCODE_PTHESE_IN}, { true , '(' , (int16_t)'('}, { false , ')' , REGEXP_OPCODE_PTHESE_OUT}, { true , ')' , (int16_t)')'}, { false , '[' , REGEXP_OPCODE_BRACKET_IN}, { true , '[' , (int16_t)'['}, { false , ']' , REGEXP_OPCODE_BRACKET_OUT}, { true , ']' , (int16_t)']'}, { false , '{' , REGEXP_OPCODE_BRACE_IN}, { true , '{' , (int16_t)'{'}, { false , '}' , REGEXP_OPCODE_BRACE_OUT}, { true , '}' , (int16_t)'}'}, { false , '-' , REGEXP_OPCODE_TO}, { true , '-' , (int16_t)'-'}, { false , '*' , REGEXP_OPCODE_STAR}, { true , '*' , (int16_t)'*'}, { false , '.' , REGEXP_OPCODE_DOT}, { true , '.' , (int16_t)'.'}, { false , '?' , REGEXP_OPCODE_QUESTION}, { true , '?' , (int16_t)'?'}, { false , '+' , REGEXP_OPCODE_PLUS}, { true , '+' , (int16_t)'+'}, { false , '|' , REGEXP_OPCODE_PIPE}, { true , '|' , (int16_t)'|'}, { false , '^' , REGEXP_OPCODE_START_OF_LINE}, { true , '^' , (int16_t)'^'}, { false , '$' , REGEXP_OPCODE_END_OF_LINE}, { true , '$' , (int16_t)'$'}, { true , 'd' , REGEXP_OPCODE_DIGIT}, { true , 'D' , REGEXP_OPCODE_DIGIT_NOT}, { true , 'l' , REGEXP_OPCODE_LETTER}, { true , 'L' , REGEXP_OPCODE_LETTER_NOT}, { true , 's' , REGEXP_OPCODE_SPACE}, { true , 'S' , REGEXP_OPCODE_SPACE_NOT}, { true , 'w' , REGEXP_OPCODE_WORD}, { true , 'W' , REGEXP_OPCODE_WORD_NOT}, { true , 'a' , (int16_t)'\a'}, { true , 'b' , (int16_t)'\b'}, { true , 'e' , 0x001B}, // Escape character { true , 'f' , (int16_t)'\f'}, { true , 'n' , (int16_t)'\n'}, { true , 'r' , (int16_t)'\r'}, { true , 't' , (int16_t)'\t'}, { true , 'v' , (int16_t)'\v'}, { true , '\\' , (int16_t)'\\'}, { true , '&' , (int16_t)'&'}, { true , '0' , (int16_t)'\0'}, { true , '@' , REGEXP_OPCODE_NO_CHAR}, }; const int32_t etk::constConvertionTableSize = sizeof(etk::constConvertionTable) / sizeof(etk::convertionTable_ts) ; /** * @brief Display the internal data of a node * * @param[in] data element do display in the console * * @return --- * */ void etk::DisplayData(etk::VectorType &data) { int32_t i; for (i=0; i<(int32_t)data.Size() ; i++) { etk::cout<< (char)(data[i]&0x00FF ); } } /** * @brief Display of a part of the Regexp element * * @param[in] data Vector where data is constain * @param[in] start Position where the display might start * @param[in] stop Position where the display might stop * * @return --- * */ void etk::DisplayElem(etk::VectorType &data, int32_t start, int32_t stop) { int32_t i; etk::cout<< ETK_BASH_COLOR_NORMAL; for (i=start; i<(int32_t)data.Size() && i &data, int32_t startPos) { int32_t pos = startPos; int32_t nbOpen = 0; // special case of the (...) or | ==> we search '|' or ')' if( REGEXP_OPCODE_PTHESE_OUT == data[pos] || REGEXP_OPCODE_PIPE == data[pos]) { return 0; } // find size ... while (pos < (int32_t)data.Size() ) { if(REGEXP_OPCODE_PTHESE_IN == data[pos]) { // find a sub section : nbOpen++; } else if(0 < nbOpen) { if (REGEXP_OPCODE_PTHESE_OUT == data[pos]) { nbOpen--; if (0 > nbOpen) { TK_ERROR("Error in the (...) find element at "<< pos); return -1; } } } else if( REGEXP_OPCODE_PTHESE_OUT == data[pos] || REGEXP_OPCODE_PIPE == data[pos]) { // Find the end of the (...) // just return the size inside int32_t sizeInside = pos - startPos; if (0 >= sizeInside) { TK_ERROR("Error in the (...) no data at "<< pos-1); return -1; } else { return sizeInside; } } pos++; } return pos - startPos; } /** * @brief * * @param[in,out] --- * * @return --- * */ int32_t etk::GetLenOfPThese(etk::VectorType &data, int32_t startPos) { int32_t pos = startPos; int32_t nbOpen = 0; // special case of the (...) or | ==> we search '|' or ')' if( REGEXP_OPCODE_PTHESE_OUT == data[pos]) { return 0; } else if( REGEXP_OPCODE_PTHESE_IN == data[pos]) { pos++; // find size ... while (pos < (int32_t)data.Size() ) { if(REGEXP_OPCODE_PTHESE_IN == data[pos]) { // find a sub section : nbOpen++; } else if(0 < nbOpen) { if (REGEXP_OPCODE_PTHESE_OUT == data[pos]) { nbOpen--; if (0 > nbOpen) { TK_ERROR("Error in the (...) find element at "<< pos); return -1; } } } else if( REGEXP_OPCODE_PTHESE_OUT == data[pos]) { // Find the end of the (...) // just return the size inside int32_t sizeInside = pos - startPos-1; if (0 >= sizeInside) { TK_ERROR("Error in the (...) no data at "<< pos-1); return -1; } else { return sizeInside; } } pos++; } } else { return -1; } return 0; } /** * @brief * * @param[in,out] --- * * @return --- * */ int32_t etk::GetLenOfBracket(etk::VectorType &data, int32_t startPos) { int32_t pos = startPos; // special case of the (...) or | ==> we search '|' or ')' if( REGEXP_OPCODE_BRACKET_OUT == data[pos]) { return 0; } else if( REGEXP_OPCODE_BRACKET_IN == data[pos]) { pos++; // find size ... while (pos < (int32_t)data.Size() ) { if(REGEXP_OPCODE_BRACKET_OUT == data[pos]) { // Find the end of the [...] // just return the size inside int32_t sizeInside = pos - startPos -1 ; if (0 >= sizeInside) { TK_ERROR("Error in the [...] no data at "<< pos-1); return sizeInside; } else { return sizeInside; } } else if( REGEXP_OPCODE_TO != data[pos] && ( 0 > data[pos] || 0xFF < data[pos]) ) { TK_ERROR("Error in the [...] not permited element at "<< pos << " '" << (char)data[pos] << "'"); return false; } pos++; } } else { return -1; } return 0; } /** * @brief * * @param[in,out] --- * * @return --- * */ int32_t etk::GetLenOfBrace(etk::VectorType &data, int32_t startPos) { int32_t pos = startPos; // special case of the (...) or | ==> we search '|' or ')' if( REGEXP_OPCODE_BRACE_OUT == data[pos]) { return 0; } else if( REGEXP_OPCODE_BRACE_IN == data[pos]) { pos++; // find size ... while (pos < (int32_t)data.Size() ) { if(REGEXP_OPCODE_BRACE_OUT == data[pos]) { // Find the end of the [...] // just return the size inside int32_t sizeInside = pos - startPos -1 ; if (0 >= sizeInside) { TK_ERROR("Error in the {...} no data at "<< pos-1); return sizeInside; } else { return sizeInside; } } else if( ',' != data[pos] && ( '0' > data[pos] || '9' < data[pos]) ) { TK_ERROR("Error in the {...} not permited element at "<< pos << " '" << (char)data[pos] << "'"); return false; } pos++; } } else { return -1; } return 0; } /** * @brief * * @param[in,out] --- * * @return --- * */ int32_t etk::GetLenOfNormal(etk::VectorType &data, int32_t startPos) { int32_t pos = startPos; // find size ... while (pos < (int32_t)data.Size() ) { switch(data[pos]) { case REGEXP_OPCODE_PTHESE_IN: case REGEXP_OPCODE_PTHESE_OUT: case REGEXP_OPCODE_BRACKET_IN: case REGEXP_OPCODE_BRACKET_OUT: case REGEXP_OPCODE_BRACE_IN: case REGEXP_OPCODE_BRACE_OUT: case REGEXP_OPCODE_TO: case REGEXP_OPCODE_STAR: case REGEXP_OPCODE_DOT: case REGEXP_OPCODE_QUESTION: case REGEXP_OPCODE_PLUS: case REGEXP_OPCODE_PIPE: case REGEXP_OPCODE_START_OF_LINE: case REGEXP_OPCODE_END_OF_LINE: case REGEXP_OPCODE_DIGIT: case REGEXP_OPCODE_DIGIT_NOT: case REGEXP_OPCODE_LETTER: case REGEXP_OPCODE_LETTER_NOT: case REGEXP_OPCODE_SPACE: case REGEXP_OPCODE_SPACE_NOT: case REGEXP_OPCODE_WORD: case REGEXP_OPCODE_WORD_NOT: { // just return the size inside int32_t sizeInside = pos - startPos; if (0 >= sizeInside) { TK_ERROR("Error in the normal data : no data ..."); } return sizeInside; } break; default : // nothing to do ... break; } pos++; } return pos - startPos ; } /** * @brief * * @param[in,out] --- * * @return --- * */ bool etk::ParseBrace(etk::VectorType &data, int32_t &min, int32_t &max) { //TK_INFO("parse {...} in "; DisplayElem(data); ); int32_t k=0; int32_t firstElement = 0; int32_t SecondElement = 0; while(k= (char)data[k]) { firstElement *=10; firstElement += (char)data[k] - '0'; } else { TK_ERROR("Can not parse this element " << (char)data[k] << " at pos " << k); return false; } k++; } if (k==data.Size()) { SecondElement = firstElement; } while(k= (char)data[k]) { SecondElement *=10; SecondElement += (char)data[k] - '0'; } else { TK_ERROR("Can not parse this element " << (char)data[k] << " at pos " << k); return false; } k++; } allIsSet: if (SecondElement == 0 && firstElement != 0) { min = 0; max = firstElement; } else { min = firstElement; max = SecondElement; } if (min > max) { TK_ERROR("Minimum=" << min << " can not be < maximum=" << max ); return false; } return true; }