754 lines
24 KiB
C++
754 lines
24 KiB
C++
/**
|
|
* @author Edouard DUPIN
|
|
* @copyright 2017, Edouard DUPIN, all right reserved
|
|
* @license MPL-2 (see license file)
|
|
*/
|
|
#include <estyle/lexer/Lexer.hpp>
|
|
#include <estyle/debug.hpp>
|
|
|
|
|
|
static bool isWhitespace(char _char) {
|
|
return _char == ' '
|
|
|| _char == '\t';
|
|
}
|
|
|
|
static bool isNumeric(char _char) {
|
|
return _char >= '0'
|
|
&& _char <= '9';
|
|
}
|
|
static bool isNumber(const etk::String& _string) {
|
|
for (size_t i=0;i<_string.size();i++) {
|
|
if (!isNumeric(_string[i])) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
static bool isHexadecimal(char _char) {
|
|
return (_char >= '0' && _char <= '9')
|
|
|| (_char >= 'a' && _char <= 'f')
|
|
|| (_char >= 'A' && _char <= 'F');
|
|
}
|
|
static bool isAlpha(char _char) {
|
|
return (_char >= 'a' && _char <= 'z')
|
|
|| (_char >= 'A' && _char <= 'Z')
|
|
|| _char == '_';
|
|
}
|
|
|
|
|
|
void estyle::Lexer::getChar(size_t _iii, char& _currentChar, char& _nextChar) {
|
|
if (_iii < m_stream.size()) {
|
|
_currentChar = m_stream[_iii];
|
|
if (_iii+1 < m_stream.size()) {
|
|
_nextChar = m_stream[_iii+1];
|
|
} else {
|
|
_nextChar = 0;
|
|
}
|
|
//ESTYLE_DEBUG(" parse '" << etk::String(_currentChar) << "'");
|
|
return;
|
|
}
|
|
_currentChar = 0;
|
|
_nextChar = 0;
|
|
}
|
|
|
|
void estyle::Lexer::parse() {
|
|
int tokenStart;
|
|
int tokenEnd;
|
|
char currentChar = 0;
|
|
char nextChar = 0;
|
|
for (size_t iii=0; iii<m_stream.size(); ++iii) {
|
|
|
|
getChar(iii, currentChar, nextChar);
|
|
while (isWhitespace(currentChar) == true) {
|
|
++iii;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
if ( ( currentChar == '\n'
|
|
&& nextChar == '\r')
|
|
|| ( currentChar == '\r'
|
|
&& nextChar == '\n') ) {
|
|
tokenStart = iii;
|
|
++iii;
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::RESERVED_NEW_LINE, tokenStart, iii));
|
|
continue;
|
|
}
|
|
if ( currentChar == '\n'
|
|
|| nextChar == '\r' ) {
|
|
tokenStart = iii;
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::RESERVED_NEW_LINE, tokenStart, iii));
|
|
continue;
|
|
}
|
|
if ( currentChar == '/'
|
|
&& nextChar == '/') {
|
|
enum estyle::lexer::tocken tockenElement = estyle::lexer::RESERVED_COMMENT_1_LINE;
|
|
// remove "//"
|
|
iii += 2;
|
|
tokenStart = iii;
|
|
getChar(iii, currentChar, nextChar);
|
|
// Check if we have "//!" or "///" ==> doxygen comment
|
|
if ( currentChar == '!'
|
|
|| currentChar == '/') {
|
|
iii++;
|
|
tokenStart++;
|
|
getChar(iii, currentChar, nextChar);
|
|
tockenElement = estyle::lexer::RESERVED_DOCUMENTATION_1_LINE;
|
|
}
|
|
etk::String data;
|
|
while ( currentChar != 0
|
|
&& currentChar != '\n') {
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(tockenElement, tokenStart, iii));
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
continue;
|
|
}
|
|
// block comments
|
|
if ( currentChar == '/'
|
|
&& nextChar == '*') {
|
|
enum estyle::lexer::tocken tockenElement = estyle::lexer::RESERVED_COMMENT_N_LINE;
|
|
// remove "/*"
|
|
iii+=2;
|
|
getChar(iii, currentChar, nextChar);
|
|
tokenStart = iii;
|
|
// Check if we have "/**" or "/*!" ==> doxygen comment
|
|
if ( currentChar == '*'
|
|
|| currentChar == '!') {
|
|
iii++;
|
|
tokenStart = iii;
|
|
getChar(iii, currentChar, nextChar);
|
|
tockenElement = estyle::lexer::RESERVED_DOCUMENTATION_N_LINE;
|
|
}
|
|
while ( currentChar != 0
|
|
&& ( currentChar != '*'
|
|
|| nextChar != '/' ) ) {
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(tockenElement, tokenStart, iii));
|
|
iii++;
|
|
continue;
|
|
}
|
|
// tokens
|
|
if (isAlpha(currentChar)) {
|
|
tokenStart = iii;
|
|
etk::String idData;
|
|
// Parse reserved elements
|
|
while ( isAlpha(currentChar)
|
|
|| isNumeric(currentChar)) {
|
|
idData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
enum estyle::lexer::tocken tockenElement = estyle::lexer::ID;
|
|
if (idData == "if") {
|
|
tockenElement = estyle::lexer::RESERVED_IF;
|
|
} else if (idData == "else") {
|
|
tockenElement = estyle::lexer::RESERVED_ELSE;
|
|
} else if (idData == "do") {
|
|
tockenElement = estyle::lexer::RESERVED_DO;
|
|
} else if (idData == "while") {
|
|
tockenElement = estyle::lexer::RESERVED_WHILE;
|
|
} else if (idData == "for") {
|
|
tockenElement = estyle::lexer::RESERVED_FOR;
|
|
} else if (idData == "break") {
|
|
tockenElement = estyle::lexer::RESERVED_BREAK;
|
|
} else if (idData == "continue") {
|
|
tockenElement = estyle::lexer::RESERVED_CONTINUE;
|
|
} else if (idData == "return") {
|
|
tockenElement = estyle::lexer::RESERVED_RETURN;
|
|
} else if (idData == "switch") {
|
|
tockenElement = estyle::lexer::RESERVED_SWITCH;
|
|
} else if (idData == "case") {
|
|
tockenElement = estyle::lexer::RESERVED_CASE;
|
|
} else if (idData == "delete") {
|
|
tockenElement = estyle::lexer::RESERVED_DELETE;
|
|
} else if (idData == "new") {
|
|
tockenElement = estyle::lexer::RESERVED_NEW;
|
|
} else if (idData == "private") {
|
|
tockenElement = estyle::lexer::RESERVED_PRIVATE;
|
|
} else if (idData == "protected") {
|
|
tockenElement = estyle::lexer::RESERVED_PROTECTED;
|
|
} else if (idData == "public") {
|
|
tockenElement = estyle::lexer::RESERVED_PUBLIC;
|
|
} else if (idData == "int8_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_008;
|
|
} else if (idData == "uint8_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_008_UNSIGNED;
|
|
} else if (idData == "int16_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_016;
|
|
} else if (idData == "uint16_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_016_UNSIGNED;
|
|
} else if (idData == "int32_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_032;
|
|
} else if (idData == "uint32_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_032_UNSIGNED;
|
|
} else if (idData == "int64_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_064;
|
|
} else if (idData == "uint64_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_064_UNSIGNED;
|
|
} else if (idData == "int128_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_128;
|
|
} else if (idData == "uint128_t") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_INTEGER_128_UNSIGNED;
|
|
} else if (idData == "float") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_FLOAT_32;
|
|
} else if (idData == "double") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_FLOAT_64;
|
|
} else if (idData == "triple") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_FLOAT_96;
|
|
} else if (idData == "bool") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_BOOLEAN;
|
|
} else if (idData == "std::string") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_STD_STRING;
|
|
} else if ( idData == "std::null"
|
|
|| idData == "null") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_STD_NULLPTR;
|
|
} else if (idData == "void") {
|
|
tockenElement = estyle::lexer::BASIC_TYPE_VOID;
|
|
|
|
} else if (idData == "const") {
|
|
tockenElement = estyle::lexer::RESERVED_CONST;
|
|
} else if (idData == "static") {
|
|
tockenElement = estyle::lexer::RESERVED_STATIC;
|
|
} else if (idData == "mutable") {
|
|
tockenElement = estyle::lexer::RESERVED_MUTABLE;
|
|
} else if (idData == "virtual") {
|
|
tockenElement = estyle::lexer::RESERVED_VIRTUAL;
|
|
} else if (idData == "inline") {
|
|
tockenElement = estyle::lexer::RESERVED_INLINE;
|
|
} else if (idData == "final") {
|
|
tockenElement = estyle::lexer::RESERVED_FINAL;
|
|
} else if (idData == "default") {
|
|
tockenElement = estyle::lexer::RESERVED_DEFAULT;
|
|
} else if (idData == "int") {
|
|
tockenElement = estyle::lexer::RESERVED_INT;
|
|
} else if (idData == "long") {
|
|
tockenElement = estyle::lexer::RESERVED_LONG;
|
|
} else if (idData == "short") {
|
|
tockenElement = estyle::lexer::RESERVED_SHORT;
|
|
} else if (idData == "signed") {
|
|
tockenElement = estyle::lexer::RESERVED_SIGNED;
|
|
} else if (idData == "unsigned") {
|
|
tockenElement = estyle::lexer::RESERVED_UNSIGNED;
|
|
} else if (idData == "namespace") {
|
|
tockenElement = estyle::lexer::RESERVED_NAMESPACE;
|
|
} else if (idData == "class") {
|
|
tockenElement = estyle::lexer::RESERVED_CLASS;
|
|
} else if (idData == "struct") {
|
|
tockenElement = estyle::lexer::RESERVED_STRUCT;
|
|
} else if (idData == "true") {
|
|
tockenElement = estyle::lexer::BOOLEAN;
|
|
} else if (idData == "false") {
|
|
tockenElement = estyle::lexer::BOOLEAN;
|
|
} else if (idData == "throw") {
|
|
tockenElement = estyle::lexer::RESERVED_THROW;
|
|
} else if (idData == "try") {
|
|
tockenElement = estyle::lexer::RESERVED_TRY;
|
|
} else if (idData == "catch") {
|
|
tockenElement = estyle::lexer::RESERVED_CATCH;
|
|
} else if (idData == "#if") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_IF;
|
|
} else if (idData == "#elif") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_ELSE_IF;
|
|
} else if (idData == "#ifdef") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_IF_DEFINE;
|
|
} else if (idData == "#ifndef") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_IF_NOT_DEFINE;
|
|
} else if (idData == "#endif") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_END_IF;
|
|
} else if (idData == "#else") {
|
|
tockenElement = estyle::lexer::RESERVED_SHARP_ELSE;
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(tockenElement, tokenStart, iii));
|
|
iii--;
|
|
continue;
|
|
}
|
|
if (isNumeric(currentChar)) {
|
|
tokenStart = iii;
|
|
etk::String numericData;
|
|
// parse numbers
|
|
bool isHex = false;
|
|
if (currentChar == '0') {
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
if (currentChar == 'x') {
|
|
isHex = true;
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
enum estyle::lexer::tocken tockenElement = estyle::lexer::INTEGER;
|
|
while ( isNumeric(currentChar)
|
|
|| ( isHex == true
|
|
&& isHexadecimal(currentChar) ) ) {
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
if ( isHex == false
|
|
&& currentChar == '.') {
|
|
tockenElement = estyle::lexer::FLOAT;
|
|
numericData += '.';
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
while (isNumeric(currentChar)) {
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
}
|
|
// do fancy e-style floating point
|
|
if ( isHex == false
|
|
&& ( currentChar == 'e'
|
|
|| currentChar == 'E')) {
|
|
tockenElement = estyle::lexer::FLOAT;
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
if (currentChar == '-') {
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
while (isNumeric(currentChar) == true) {
|
|
numericData += currentChar;
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(tockenElement, tokenStart, iii));
|
|
iii--;
|
|
continue;
|
|
}
|
|
if (currentChar == '"') {
|
|
// strings...
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
tokenStart = iii;
|
|
etk::String numericData;
|
|
while ( currentChar != 0
|
|
&& currentChar != '"') {
|
|
if ( currentChar == '\\'
|
|
&& ( nextChar == '\\'
|
|
|| nextChar == '"') ) {
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
if (currentChar == 0) {
|
|
ESTYLE_ERROR("Arrive at the end of file without '\"' element in string parsing");
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::STRING, tokenStart, iii));
|
|
getChar(iii, currentChar, nextChar);
|
|
continue;
|
|
}
|
|
if (currentChar == '\'') {
|
|
// strings...
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
tokenStart = iii;
|
|
etk::String numericData;
|
|
while ( currentChar != 0
|
|
&& currentChar != '\'') {
|
|
if ( currentChar == '\\'
|
|
&& ( nextChar == '\\'
|
|
|| nextChar == '\'') ) {
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
iii++;
|
|
getChar(iii, currentChar, nextChar);
|
|
}
|
|
if (currentChar == 0) {
|
|
ESTYLE_ERROR("Arrive at the end of file without '\'' element in string parsing");
|
|
}
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::SIMPLE_QUOTE_STRING, tokenStart, iii));
|
|
getChar(iii, currentChar, nextChar);
|
|
continue;
|
|
}
|
|
if (currentChar == '=') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::EQUAL_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::EQUAL, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '!') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::NOT_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::NOT, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '<') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::LESS_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '<') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::LESS_LESS, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::LESS, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '>') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::GREATER_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '>') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::GREATER_GREATER, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::GREATER, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '+') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::PLUS_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '+') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::PLUS_PLUS, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::PLUS, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '-') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '-') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS_MINUS, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '&') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::AND_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '&') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::AND_AND, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::AND, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '|') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::OR_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '|') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::OR_OR, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::OR, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '-') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else if (nextChar == '-') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS_MINUS, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MINUS, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '^') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::XOR_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::XOR, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '/') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::DEVIDE_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::DEVIDE, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == '*') {
|
|
if (nextChar == '=') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MULTIPLY_EQUAL, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::MULTIPLY, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == ':') {
|
|
if (nextChar == ':') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::NAMESPACE, iii, iii+2));
|
|
iii++;
|
|
} else {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::COLON, iii, iii+1));
|
|
}
|
|
continue;
|
|
}
|
|
if (currentChar == ';') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::SEMICOLON, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '(') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::PARENTHESE_IN, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == ')') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::PARENTHESE_OUT, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '[') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::BRACKET_IN, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == ']') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::BRACKET_OUT, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '{') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::BRACE_IN, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '}') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::BRACE_OUT, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '.') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::DOT, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == ',') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::COMA, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == '%') {
|
|
m_list.pushBack(estyle::LexerElement(estyle::lexer::POUCENT, iii, iii+1));
|
|
continue;
|
|
}
|
|
if (currentChar == estyle::lexer::END_OF_FILE) {
|
|
// remove error
|
|
continue;
|
|
}
|
|
ESTYLE_ERROR(" UNKNOW tocken : '" << char(currentChar) << "'");
|
|
}
|
|
}
|
|
|
|
estyle::Lexer::Lexer() {
|
|
|
|
}
|
|
|
|
|
|
void estyle::Lexer::lexify(const etk::String& _input) {
|
|
m_stream = _input;
|
|
m_list.clear();
|
|
ESTYLE_DEBUG("Parse stream");
|
|
parse();
|
|
postAnnalyse_namespace();
|
|
postAnnalyse_function();
|
|
postAnnalyse_function_typeExtern();
|
|
postAnnalyse_function_typeInternal();
|
|
ESTYLE_DEBUG("find:");
|
|
for (auto &it: m_list) {
|
|
if ( it.getTocken() == estyle::lexer::ELEMENT_COMPLEX
|
|
|| it.getTocken() == estyle::lexer::ELEMENT_COMPLEX_TYPE) {
|
|
ESTYLE_DEBUG(" '" << it.getTocken() << "'");
|
|
for (auto &it2: it.getList()) {
|
|
ESTYLE_DEBUG(" '" << it2.getTocken() << "' ==> '" << m_stream.extract(it2.getStart(),it2.getStop()) << "'");
|
|
}
|
|
} else {
|
|
ESTYLE_DEBUG(" '" << it.getTocken() << "' ==> '" << m_stream.extract(it.getStart(),it.getStop()) << "'");
|
|
}
|
|
}
|
|
}
|
|
|
|
etk::String estyle::Lexer::getDataSource(int32_t _start, int32_t _stop) const {
|
|
return m_stream.extract(_start, _stop);
|
|
}
|
|
|
|
etk::String estyle::Lexer::getData(int32_t _position) const {
|
|
return getDataSource(m_list[_position].getStart(), m_list[_position].getStop());
|
|
}
|
|
|
|
etk::String estyle::Lexer::getValueString(int32_t _position) const {
|
|
return getDataSource(m_list[_position].getStart(), m_list[_position].getStop());
|
|
}
|
|
|
|
ivec2 estyle::Lexer::getFilePosition(int32_t _position) const {
|
|
int32_t position = m_list[_position].getStart();
|
|
int32_t line = 1;
|
|
int32_t colomn = 1;
|
|
for (int32_t iii=0; iii< position; ++iii) {
|
|
colomn++;
|
|
if (m_stream[iii] == '\n') {
|
|
line++;
|
|
colomn = 0;
|
|
}
|
|
}
|
|
return ivec2(colomn, line);
|
|
}
|
|
|
|
etk::String estyle::Lexer::getFileLine(int32_t _position) const {
|
|
int32_t positionStart = m_list[_position].getStart();
|
|
int32_t positionStop = m_list[_position].getStart();
|
|
if (m_stream[positionStart] == '\n') {
|
|
positionStart--;
|
|
}
|
|
if (positionStart < 0) {
|
|
positionStart = 0;
|
|
}
|
|
for (;positionStop < int64_t(m_stream.size()); ++positionStop) {
|
|
if (m_stream[positionStop] == '\n') {
|
|
break;
|
|
}
|
|
}
|
|
for (;positionStart >= 0; --positionStart) {
|
|
if (m_stream[positionStart] == '\n') {
|
|
break;
|
|
}
|
|
}
|
|
if (positionStart < 0) {
|
|
positionStart = 0;
|
|
}
|
|
ESTYLE_WARNING("extract " << positionStart << " " << positionStop);
|
|
return m_stream.extract(positionStart, positionStop);
|
|
}
|
|
|
|
// squash element name space like "::lklkmlk" and "lmkmlk::lmkmlk::mlklk" in 1 element
|
|
void estyle::Lexer::postAnnalyse_namespace() {
|
|
auto it = m_list.begin();
|
|
while (it != m_list.end()) {
|
|
if (it->getTocken() == estyle::lexer::NAMESPACE) {
|
|
// Remove next first ...
|
|
{
|
|
auto itTmp = it;
|
|
++itTmp;
|
|
if (it != m_list.end()) {
|
|
if (itTmp->getTocken() == estyle::lexer::ID) {
|
|
it->setStop(itTmp->getStop());
|
|
it->setTocken(estyle::lexer::ID);
|
|
// This work because I use etk::Vector ...
|
|
m_list.erase(itTmp);
|
|
}
|
|
}
|
|
}
|
|
if (it != m_list.begin()) {
|
|
auto itTmp = it;
|
|
--itTmp;
|
|
if (itTmp->getTocken() == estyle::lexer::ID) {
|
|
itTmp->setStop(it->getStop());
|
|
it = m_list.erase(it);
|
|
ESTYLE_WARNING("collapse '" << m_stream.extract(itTmp->getStart(), itTmp->getStop()) << "'");
|
|
continue;
|
|
}
|
|
}
|
|
ESTYLE_WARNING("collapse '" << m_stream.extract(it->getStart(), it->getStop()) << "'");
|
|
}
|
|
++it;
|
|
}
|
|
}
|
|
|
|
void estyle::Lexer::postAnnalyse_function() {
|
|
auto it = m_list.begin();
|
|
while (it != m_list.end()) {
|
|
if (it->getTocken() == estyle::lexer::PARENTHESE_IN) {
|
|
if (it != m_list.begin()) {
|
|
auto itTmp = it;
|
|
--itTmp;
|
|
if (itTmp->getTocken() == estyle::lexer::ID) {
|
|
itTmp->setTocken(estyle::lexer::ELEMENT_FUNCTION);
|
|
}
|
|
}
|
|
}
|
|
++it;
|
|
}
|
|
}
|
|
|
|
void estyle::Lexer::postAnnalyse_function_typeExtern() {
|
|
for (int64_t iii=0; iii<m_list.size(); ++iii) {
|
|
if (m_list[iii].getTocken() == estyle::lexer::ELEMENT_FUNCTION) {
|
|
ESTYLE_WARNING("Find function ==> check if we have a type : '" << getData(iii) << "'");
|
|
// search backward the first element availlable like const/ID/*/&/
|
|
int64_t end = iii;
|
|
for (int64_t jjj=iii-1; jjj>=0; --jjj) {
|
|
auto elem = m_list[jjj].getTocken();
|
|
if ( elem == estyle::lexer::BASIC_TYPE_INTEGER_SIZE_T
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_008
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_008_UNSIGNED
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_016
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_016_UNSIGNED
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_032
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_032_UNSIGNED
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_064
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_064_UNSIGNED
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_128
|
|
|| elem == estyle::lexer::BASIC_TYPE_INTEGER_128_UNSIGNED
|
|
|| elem == estyle::lexer::BASIC_TYPE_FLOAT_32
|
|
|| elem == estyle::lexer::BASIC_TYPE_FLOAT_64
|
|
|| elem == estyle::lexer::BASIC_TYPE_FLOAT_96
|
|
|| elem == estyle::lexer::BASIC_TYPE_BOOLEAN
|
|
|| elem == estyle::lexer::BASIC_TYPE_VOID
|
|
|| elem == estyle::lexer::RESERVED_INT
|
|
|| elem == estyle::lexer::RESERVED_LONG
|
|
|| elem == estyle::lexer::RESERVED_SHORT
|
|
|| elem == estyle::lexer::RESERVED_SIGNED
|
|
|| elem == estyle::lexer::RESERVED_UNSIGNED
|
|
|| elem == estyle::lexer::RESERVED_CONST
|
|
|| elem == estyle::lexer::AND
|
|
|| elem == estyle::lexer::MULTIPLY
|
|
|| elem == estyle::lexer::GREATER
|
|
|| elem == estyle::lexer::GREATER_GREATER
|
|
|| elem == estyle::lexer::LESS
|
|
|| elem == estyle::lexer::RESERVED_NEW_LINE) {
|
|
continue;
|
|
}
|
|
// find the end
|
|
end = jjj+1;
|
|
break;
|
|
}
|
|
if (end == iii) {
|
|
// Function call ...
|
|
} else {
|
|
estyle::LexerElement tmp = estyle::LexerElement(estyle::lexer::ELEMENT_COMPLEX_TYPE, m_list[end].getStart(), m_list[iii-1].getStop());
|
|
for (int64_t jjj=end; jjj<iii; ++jjj) {
|
|
tmp.pushElement(m_list[jjj]);
|
|
}
|
|
m_list[end] = tmp;
|
|
m_list.eraseLen(end+1, iii-end-1);
|
|
iii = end+1;
|
|
m_list[iii].setTocken(estyle::lexer::ELEMENT_FUNCTION_DECLARATION);
|
|
ESTYLE_WARNING(" ==> type: '" << getData(end) << "'");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void estyle::Lexer::postAnnalyse_function_typeInternal() {
|
|
|
|
}
|
|
|