diff --git a/langkit/langkit_lexer.cpp b/langkit/langkit_lexer.cpp index fc2719f..ee686f4 100644 --- a/langkit/langkit_lexer.cpp +++ b/langkit/langkit_lexer.cpp @@ -18,8 +18,23 @@ std::vector Lexer::lex(const std::string &input, const char *filename) int current_col = 0; int current_line = 0; + std::string master_lex_pattern; + + unsigned int i = 0; + for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) { + if (i > 0) { + master_lex_pattern += "|"; + } + master_lex_pattern += "(" + iter->regex_string + ")"; + ++i; + } + + boost::regex lex_regex(master_lex_pattern); + while (input_iter != input.end()) { found = false; + + /* for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) { boost::match_results what; if (regex_search(input_iter, input.end(), what, iter->regex, boost::match_continuous)) { @@ -35,6 +50,27 @@ std::vector Lexer::lex(const std::string &input, const char *filename) break; } } + */ + + boost::match_results what; + if (regex_search(input_iter, input.end(), what, lex_regex, boost::match_continuous)) { + for (i = 0; i < lex_patterns.size(); ++i) { + if (!(std::string(what[i+1])).empty()) { + TokenPtr t(new Token(what[i+1], lex_patterns[i].identifier, filename)); + t->start.column = current_col; + t->start.line = current_line; + current_col += t->text.size(); + t->end.column = current_col; + t->end.line = current_line; + retval.push_back(t); + input_iter += t->text.size(); + found = true; + break; + } + } + } + + if (!found) { for (iter = skip_patterns.begin(), end = skip_patterns.end(); iter != end; ++iter) { boost::match_results what; diff --git a/langkit/langkit_lexer.hpp b/langkit/langkit_lexer.hpp index 13c1f0f..98d5bda 100644 --- a/langkit/langkit_lexer.hpp +++ b/langkit/langkit_lexer.hpp @@ -20,9 +20,10 @@ struct File_Position { struct Pattern { boost::regex regex; + std::string regex_string; int identifier; - Pattern(const std::string ®exp, int id) : regex(regexp), identifier(id) { } + Pattern(const std::string ®exp, int id) : regex(regexp), regex_string(regexp), identifier(id) { } }; typedef std::tr1::shared_ptr TokenPtr; diff --git a/langkit/main.cpp b/langkit/main.cpp index 671f185..b4ea3a7 100644 --- a/langkit/main.cpp +++ b/langkit/main.cpp @@ -12,7 +12,7 @@ class TokenType { public: enum Type { Whitespace, Identifier, Number, Operator, Parens_Open, Parens_Close, Square_Open, Square_Close, Curly_Open, Curly_Close, Comma, Quoted_String, Single_Quoted_String, Carriage_Return, Semicolon, - Function_Def, Scoped_Block, Statement, Equation, Return}; }; + Function_Def, Scoped_Block, Statement, Equation, Return, Add}; }; void debug_print(TokenPtr token, std::string prepend) { std::cout << prepend << "Token: " << token->text << "(" << token->identifier << ") @ " << token->filename << ": (" << token->start.column @@ -64,21 +64,25 @@ void parse(std::vector &tokens) { */ - //Example: "def add(x,y)" - /* + //Example: "def add(x,y) { return x+y }" + Rule params; Rule block(TokenType::Scoped_Block); Rule rule(TokenType::Function_Def); Rule statement(TokenType::Statement); Rule return_statement(TokenType::Return); + Rule add(TokenType::Add); rule = Ign(Str("def")) << Id(TokenType::Identifier) << ~(Ign(Str("(")) << ~params << Ign(Str(")"))) << block; params = Id(TokenType::Identifier) << *(Ign(Str(",")) << Id(TokenType::Identifier)); block = Ign(Str("{")) << ~return_statement << Ign(Str("}")); - return_statement = Ign(Str("return")) << Id(TokenType::Identifier) << Str("+") << Id(TokenType::Identifier); - */ + return_statement = Ign(Str("return")) << add; + add = Id(TokenType::Identifier) << Ign(Str("+")) << Id(TokenType::Identifier); + + /* Rule rule = Str("x") << Id(TokenType::Semicolon); + */ /* Rule rule; @@ -107,6 +111,13 @@ void parse(std::vector &tokens) { std::pair results = rule(iter, end, parent); + /* + while (results.second) { + results = rule(results.first + 1, end, parent); + //debug_print(parent, ""); + } + */ + if (results.second) { std::cout << "Parse successful: " << std::endl; debug_print(parent, ""); @@ -115,6 +126,7 @@ void parse(std::vector &tokens) { std::cout << "Parse failed: " << std::endl; debug_print(parent, ""); } + } @@ -144,7 +156,7 @@ int main(int argc, char *argv[]) { std::getline(std::cin, input); while (input != "quit") { std::vector tokens = lexer.lex(input, "INPUT"); - debug_print(tokens); + //debug_print(tokens); parse(tokens); std::cout << "Expression> ";