// This file is distributed under the BSD License. // See "license.txt" for details. // Copyright 2009-2010, Jonathan Turner (jonathan@emptycrate.com) // and Jason Turner (jason@emptycrate.com) // http://www.chaiscript.com #ifndef CHAISCRIPT_PARSER_HPP_ #define CHAISCRIPT_PARSER_HPP_ #include #include #include #include #include "chaiscript_prelude.hpp" #include "chaiscript_common.hpp" namespace chaiscript { class ChaiScript_Parser { std::string::const_iterator input_pos, input_end; int line, col; std::string multiline_comment_begin, multiline_comment_end; std::string singleline_comment; const char *filename; std::vector match_stack; std::vector > operator_matches; std::vector operators; public: ChaiScript_Parser() { multiline_comment_begin = "/*"; multiline_comment_end = "*/"; singleline_comment = "//"; setup_operators(); } ChaiScript_Parser(const ChaiScript_Parser &); // explicitly unimplemented copy constructor ChaiScript_Parser &operator=(const ChaiScript_Parser &); // explicitly unimplemented assignment operator void setup_operators() { using namespace boost::assign; operators.push_back(AST_Node_Type::Logical_Or); std::vector logical_or; logical_or += "||"; operator_matches.push_back(logical_or); operators.push_back(AST_Node_Type::Logical_And); std::vector logical_and; logical_and += "&&"; operator_matches.push_back(logical_and); operators.push_back(AST_Node_Type::Bitwise_Or); std::vector bitwise_or; bitwise_or += "|"; operator_matches.push_back(bitwise_or); operators.push_back(AST_Node_Type::Bitwise_Xor); std::vector bitwise_xor; bitwise_xor += "^"; operator_matches.push_back(bitwise_xor); operators.push_back(AST_Node_Type::Bitwise_And); std::vector bitwise_and; bitwise_and += "&"; operator_matches.push_back(bitwise_and); operators.push_back(AST_Node_Type::Equality); std::vector equality; equality += "==", "!="; operator_matches.push_back(equality); operators.push_back(AST_Node_Type::Comparison); std::vector comparison; comparison += "<", "<=", ">", ">="; operator_matches.push_back(comparison); operators.push_back(AST_Node_Type::Shift); std::vector shift; shift += "<<", ">>"; operator_matches.push_back(shift); operators.push_back(AST_Node_Type::Additive); std::vector additive; additive += "+", "-"; operator_matches.push_back(additive); operators.push_back(AST_Node_Type::Multiplicative); std::vector multiplicative; multiplicative += "*", "/", "%"; operator_matches.push_back(multiplicative); operators.push_back(AST_Node_Type::Dot_Access); std::vector dot_access; dot_access += "."; operator_matches.push_back(dot_access); } /** * Prints the parsed ast_nodes as a tree */ /* void debug_print(AST_NodePtr t, std::string prepend = "") { std::cout << prepend << "(" << ast_node_type_to_string(t->identifier) << ") " << t->text << " : " << t->start.line << ", " << t->start.column << std::endl; for (unsigned int j = 0; j < t->children.size(); ++j) { debug_print(t->children[j], prepend + " "); } } */ /** * Shows the current stack of matched ast_nodes */ void show_match_stack() { for (unsigned int i = 0; i < match_stack.size(); ++i) { //debug_print(match_stack[i]); std::cout << match_stack[i]->to_string(); } } /** * Clears the stack of matched ast_nodes */ void clear_match_stack() { match_stack.clear(); } /** * Returns the front-most AST node */ AST_NodePtr ast() { return match_stack.front(); } /** * Helper function that collects ast_nodes from a starting position to the top of the stack into a new AST node */ void build_match(AST_NodePtr t, int match_start) { int pos_line_start, pos_col_start, pos_line_stop, pos_col_stop; int is_deep = false; //so we want to take everything to the right of this and make them children if (match_start != int(match_stack.size())) { pos_line_start = match_stack[match_start]->start.line; pos_col_start = match_stack[match_start]->start.column; pos_line_stop = line; pos_col_stop = col; is_deep = true; } else { pos_line_start = line; pos_col_start = col; pos_line_stop = line; pos_col_stop = col; } t->filename = filename; t->start.line = pos_line_start; t->start.column = pos_col_start; t->end.line = pos_line_stop; t->end.column = pos_col_stop; if (is_deep) { t->children.assign(match_stack.begin() + (match_start), match_stack.end()); match_stack.erase(match_stack.begin() + (match_start), match_stack.end()); match_stack.push_back(t); } else { //todo: fix the fact that a successful match that captured no ast_nodes doesn't have any real start position match_stack.push_back(t); } } /** * Does ranged char check */ inline bool char_between(char start, char end) { if ((*input_pos >= start) && (*input_pos <= end)) { return true; } else { return false; } } /** * Check to see if there is more text parse */ inline bool has_more_input() { return (input_pos != input_end); } /** * Skips any multi-line or single-line comment */ bool SkipComment() { bool retval = false; if (Symbol_(multiline_comment_begin.c_str())) { while (input_pos != input_end) { if (Symbol_(multiline_comment_end.c_str())) { break; } else if (!Eol_()) { ++col; ++input_pos; } } retval = true; } else if (Symbol_(singleline_comment.c_str())) { while (input_pos != input_end) { if (Symbol_("\r\n")) { input_pos -= 2; break; } else if (Char_('\n')) { --input_pos; break; } else { ++col; ++input_pos; } } retval = true; } return retval; } /** * Skips ChaiScript whitespace, which means space and tab, but not cr/lf */ bool SkipWS() { bool retval = false; while (has_more_input()) { if ((*input_pos == ' ') || (*input_pos == '\t')) { ++input_pos; ++col; retval = true; } else if (SkipComment()) { retval = true; } else { break; } } return retval; } /** * Reads a floating point value from input, without skipping initial whitespace */ bool Float_() { bool retval = false; std::string::const_iterator start = input_pos; if (has_more_input() && (char_between('0', '9') || (*input_pos == '.'))) { while (has_more_input() && char_between('0', '9')) { ++input_pos; ++col; } if (has_more_input() && (*input_pos == '.')) { ++input_pos; ++col; if (has_more_input() && char_between('0', '9')) { retval = true; while (has_more_input() && char_between('0', '9')) { ++input_pos; ++col; } } else { --input_pos; --col; } } } return retval; } /** * Reads a floating point value from input, without skipping initial whitespace */ bool Hex_() { bool retval = false; if (has_more_input() && (*input_pos == '0')) { ++input_pos; ++col; if (has_more_input() && ((*input_pos == 'x') || (*input_pos == 'X'))) { ++input_pos; ++col; if (has_more_input() && (char_between('0', '9') || char_between('a', 'f') || char_between('A', 'F'))) { retval = true; while (has_more_input() && (char_between('0', '9') || char_between('a', 'f') || char_between('A', 'F'))) { ++input_pos; ++col; } } else { --input_pos; --col; } } else { --input_pos; --col; } } return retval; } /** * Reads a floating point value from input, without skipping initial whitespace */ bool Binary_() { bool retval = false; if (has_more_input() && (*input_pos == '0')) { ++input_pos; ++col; if (has_more_input() && ((*input_pos == 'b') || (*input_pos == 'B'))) { ++input_pos; ++col; if (has_more_input() && char_between('0', '1')) { retval = true; while (has_more_input() && char_between('0', '1')) { ++input_pos; ++col; } } else { --input_pos; --col; } } else { --input_pos; --col; } } return retval; } /** * Reads a number from the input, detecting if it's an integer or floating point */ bool Num(bool capture = false) { SkipWS(); if (!capture) { return Hex_() || Float_(); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (has_more_input() && (char_between('0', '9') || (*input_pos == '.')) ) { if (Hex_()) { std::string match(start, input_pos); std::stringstream ss(match); unsigned int temp_int; ss >> std::hex >> temp_int; std::ostringstream out_int; out_int << int(temp_int); AST_NodePtr t(new Int_AST_Node(out_int.str(), AST_Node_Type::Int, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } if (Binary_()) { std::string match(start, input_pos); int temp_int = 0; unsigned int pos = 0, end = match.length(); while ((pos < end) && (pos < (2 + sizeof(int) * 8))) { temp_int <<= 1; if (match[pos] == '1') { temp_int += 1; } ++pos; } std::ostringstream out_int; out_int << temp_int; AST_NodePtr t(new Int_AST_Node(out_int.str(), AST_Node_Type::Int, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } if (Float_()) { std::string match(start, input_pos); AST_NodePtr t(new Float_AST_Node(match, AST_Node_Type::Float, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { std::string match(start, input_pos); if ((match.size() > 0) && (match[0] == '0')) { std::stringstream ss(match); unsigned int temp_int; ss >> std::oct >> temp_int; std::ostringstream out_int; out_int << int(temp_int); AST_NodePtr t(new Int_AST_Node(out_int.str(), AST_Node_Type::Int, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); } else { AST_NodePtr t(new Int_AST_Node(match, AST_Node_Type::Int, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); } return true; } } else { return false; } } } /** * Reads an identifier from input which conforms to C's identifier naming conventions, without skipping initial whitespace */ bool Id_() { bool retval = false; if (has_more_input() && (char_between('A', 'Z') || (*input_pos == '_') || char_between('a', 'z'))) { retval = true; while (has_more_input() && (char_between('A', 'Z') || (*input_pos == '_') || char_between('a', 'z') || char_between('0', '9'))) { ++input_pos; ++col; } } else if (has_more_input() && (*input_pos == '`')) { retval = true; ++col; ++input_pos; std::string::const_iterator start = input_pos; while (has_more_input() && (*input_pos != '`')) { if (Eol()) { throw Eval_Error("Carriage return in identifier literal", File_Position(line, col), filename); } else { ++input_pos; ++col; } } if (start == input_pos) { throw Eval_Error("Missing contents of identifier literal", File_Position(line, col), filename); } else if (input_pos == input_end) { throw Eval_Error("Incomplete identifier literal", File_Position(line, col), filename); } ++col; ++input_pos; } return retval; } /** * Reads (and potentially captures) an identifier from input */ bool Id(bool capture = false) { SkipWS(); if (!capture) { return Id_(); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Id_()) { if (*start == '`') { //Id Literal std::string match(start+1, input_pos-1); AST_NodePtr t(new Id_AST_Node(match, AST_Node_Type::Id, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { std::string match(start, input_pos); AST_NodePtr t(new Id_AST_Node(match, AST_Node_Type::Id, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } } else { return false; } } } /** * Checks for a node annotation of the form "#" */ bool Annotation() { SkipWS(); std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Symbol_("#")) { do { while (input_pos != input_end) { if (Eol_()) { break; } else { ++col; ++input_pos; } } } while (Symbol("#")); std::string match(start, input_pos); AST_NodePtr t(new Annotation_AST_Node(match, AST_Node_Type::Annotation, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { return false; } } /** * Reads a quoted string from input, without skipping initial whitespace */ bool Quoted_String_() { bool retval = false; char prev_char = 0; if (has_more_input() && (*input_pos == '\"')) { retval = true; prev_char = *input_pos; ++input_pos; ++col; while (has_more_input() && ((*input_pos != '\"') || ((*input_pos == '\"') && (prev_char == '\\')))) { if (!Eol_()) { if (prev_char == '\\') { prev_char = 0; } else { prev_char = *input_pos; } ++input_pos; ++col; } } if (has_more_input()) { ++input_pos; ++col; } else { throw Eval_Error("Unclosed quoted string", File_Position(line, col), filename); } } return retval; } /** * Reads (and potentially captures) a quoted string from input. Translates escaped sequences. */ bool Quoted_String(bool capture = false) { SkipWS(); if (!capture) { return Quoted_String_(); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Quoted_String_()) { std::string match; bool is_escaped = false; bool is_interpolated = false; bool saw_interpolation_marker = false; int prev_stack_top = match_stack.size(); //for (std::string::iterator s = start + 1, end = input_pos - 1; s != end; ++s) { std::string::const_iterator s = start + 1, end = input_pos - 1; while (s != end) { if (saw_interpolation_marker) { if (*s == '{') { //We've found an interpolation point if (is_interpolated) { //If we've seen previous interpolation, add on instead of making a new one AST_NodePtr plus(new Str_AST_Node("+", AST_Node_Type::Str, filename, prev_line, prev_col, line, col)); match_stack.push_back(plus); AST_NodePtr t(new Quoted_String_AST_Node(match, AST_Node_Type::Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); build_match(AST_NodePtr(new Additive_AST_Node()), prev_stack_top); } else { AST_NodePtr t(new Quoted_String_AST_Node(match, AST_Node_Type::Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); } //We've finished with the part of the string up to this point, so clear it match = ""; AST_NodePtr plus(new Str_AST_Node("+", AST_Node_Type::Str, filename, prev_line, prev_col, line, col)); match_stack.push_back(plus); std::string eval_match; ++s; while ((*s != '}') && (s != end)) { eval_match.push_back(*s); ++s; } if (*s == '}') { is_interpolated = true; ++s; int tostr_stack_top = match_stack.size(); AST_NodePtr tostr(new Id_AST_Node("to_string", AST_Node_Type::Id, filename, prev_line, prev_col, line, col)); match_stack.push_back(tostr); int ev_stack_top = match_stack.size(); AST_NodePtr ev(new Id_AST_Node("eval", AST_Node_Type::Id, filename, prev_line, prev_col, line, col)); match_stack.push_back(ev); int arg_stack_top = match_stack.size(); AST_NodePtr t(new Quoted_String_AST_Node(eval_match, AST_Node_Type::Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); build_match(AST_NodePtr(new Arg_List_AST_Node()), arg_stack_top); build_match(AST_NodePtr(new Inplace_Fun_Call_AST_Node()), ev_stack_top); build_match(AST_NodePtr(new Arg_List_AST_Node()), ev_stack_top); build_match(AST_NodePtr(new Fun_Call_AST_Node()), tostr_stack_top); build_match(AST_NodePtr(new Additive_AST_Node()), prev_stack_top); } else { throw Eval_Error("Unclosed in-string eval", File_Position(prev_line, prev_col), filename); } } else { match.push_back('$'); } saw_interpolation_marker = false; } else { if (*s == '\\') { if (is_escaped) { match.push_back('\\'); is_escaped = false; } else { is_escaped = true; } } else { if (is_escaped) { switch (*s) { case ('b') : match.push_back('\b'); break; case ('f') : match.push_back('\f'); break; case ('n') : match.push_back('\n'); break; case ('r') : match.push_back('\r'); break; case ('t') : match.push_back('\t'); break; case ('\'') : match.push_back('\''); break; case ('\"') : match.push_back('\"'); break; case ('$') : match.push_back('$'); break; default: throw Eval_Error("Unknown escaped sequence in string", File_Position(prev_line, prev_col), filename); } } else if (*s == '$') { saw_interpolation_marker = true; } else { match.push_back(*s); } is_escaped = false; } ++s; } } if (is_interpolated) { AST_NodePtr plus(new Str_AST_Node("+", AST_Node_Type::Str, filename, prev_line, prev_col, line, col)); match_stack.push_back(plus); AST_NodePtr t(new Quoted_String_AST_Node(match, AST_Node_Type::Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); build_match(AST_NodePtr(new Additive_AST_Node()), prev_stack_top); } else { AST_NodePtr t(new Quoted_String_AST_Node(match, AST_Node_Type::Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); } return true; } else { return false; } } } /** * Reads a character group from input, without skipping initial whitespace */ bool Single_Quoted_String_() { bool retval = false; char prev_char = 0; if (has_more_input() && (*input_pos == '\'')) { retval = true; prev_char = *input_pos; ++input_pos; ++col; while (has_more_input() && ((*input_pos != '\'') || ((*input_pos == '\'') && (prev_char == '\\')))) { if (!Eol_()) { if (prev_char == '\\') { prev_char = 0; } else { prev_char = *input_pos; } ++input_pos; ++col; } } if (input_pos != input_end) { ++input_pos; ++col; } else { throw Eval_Error("Unclosed single-quoted string", File_Position(line, col), filename); } } return retval; } /** * Reads (and potentially captures) a char group from input. Translates escaped sequences. */ bool Single_Quoted_String(bool capture = false) { SkipWS(); if (!capture) { return Single_Quoted_String_(); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Single_Quoted_String_()) { std::string match; bool is_escaped = false; for (std::string::const_iterator s = start + 1, end = input_pos - 1; s != end; ++s) { if (*s == '\\') { if (is_escaped) { match.push_back('\\'); is_escaped = false; } else { is_escaped = true; } } else { if (is_escaped) { switch (*s) { case ('b') : match.push_back('\b'); break; case ('f') : match.push_back('\f'); break; case ('n') : match.push_back('\n'); break; case ('r') : match.push_back('\r'); break; case ('t') : match.push_back('\t'); break; case ('\'') : match.push_back('\''); break; case ('\"') : match.push_back('\"'); break; default: throw Eval_Error("Unknown escaped sequence in string", File_Position(prev_line, prev_col), filename); } } else { match.push_back(*s); } is_escaped = false; } } AST_NodePtr t(new Single_Quoted_String_AST_Node(match, AST_Node_Type::Single_Quoted_String, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { return false; } } } /** * Reads a char from input if it matches the parameter, without skipping initial whitespace */ bool Char_(char c) { bool retval = false; if (has_more_input() && (*input_pos == c)) { ++input_pos; ++col; retval = true; } return retval; } /** * Reads (and potentially captures) a char from input if it matches the parameter */ bool Char(char c, bool capture = false) { SkipWS(); if (!capture) { return Char_(c); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Char_(c)) { std::string match(start, input_pos); AST_NodePtr t(new Char_AST_Node(match, AST_Node_Type::Char, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { return false; } } } /** * Reads a string from input if it matches the parameter, without skipping initial whitespace */ bool Keyword_(const char *s) { bool retval = false; int len = strlen(s); if ((input_end - input_pos) >= len) { std::string::const_iterator tmp = input_pos; for (int i = 0; i < len; ++i) { if (*tmp != s[i]) { return false; } ++tmp; } retval = true; input_pos = tmp; col += len; } return retval; } /** * Reads (and potentially captures) a string from input if it matches the parameter */ bool Keyword(const char *s, bool capture = false) { SkipWS(); if (!capture) { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; bool retval = Keyword_(s); if (retval) { //todo: fix this. Hacky workaround for preventing substring matches if (has_more_input() && (char_between('A', 'Z') || (*input_pos == '_') || char_between('a', 'z') || char_between('0', '9'))) { input_pos = start; col = prev_col; line = prev_line; return false; } return true; } else { return retval; } } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Keyword_(s)) { //todo: fix this. Hacky workaround for preventing substring matches if (has_more_input() && (char_between('A', 'Z') || (*input_pos == '_') || char_between('a', 'z') || char_between('0', '9'))) { input_pos = start; col = prev_col; line = prev_line; return false; } std::string match(start, input_pos); AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { return false; } } } /** * Reads a symbol group from input if it matches the parameter, without skipping initial whitespace */ bool Symbol_(const char *s) { bool retval = false; int len = strlen(s); if ((input_end - input_pos) >= len) { std::string::const_iterator tmp = input_pos; for (int i = 0; i < len; ++i) { if (*tmp != s[i]) { return false; } ++tmp; } retval = true; input_pos = tmp; col += len; } return retval; } /** * Reads (and potentially captures) a symbol group from input if it matches the parameter */ bool Symbol(const char *s, bool capture = false, bool disallow_prevention=false) { SkipWS(); if (!capture) { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; bool retval = Symbol_(s); if (retval) { //todo: fix this. Hacky workaround for preventing substring matches if (has_more_input() && (disallow_prevention == false) && ((*input_pos == '+') || (*input_pos == '-') || (*input_pos == '*') || (*input_pos == '/') || (*input_pos == '|') || (*input_pos == '&') || (*input_pos == '^') || (*input_pos == '=') || (*input_pos == '.') || (*input_pos == '<') || (*input_pos == '>'))) { input_pos = start; col = prev_col; line = prev_line; return false; } return true; } else { return retval; } } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Symbol_(s)) { //todo: fix this. Hacky workaround for preventing substring matches if (has_more_input() && (disallow_prevention == false) && ((*input_pos == '+') || (*input_pos == '-') || (*input_pos == '*') || (*input_pos == '/') || (*input_pos == '|') || (*input_pos == '&') || (*input_pos == '^') || (*input_pos == '=') || (*input_pos == '.') || (*input_pos == '<') || (*input_pos == '>'))) { input_pos = start; col = prev_col; line = prev_line; return false; } else { std::string match(start, input_pos); AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } } else { return false; } } } /** * Reads an end-of-line group from input, without skipping initial whitespace */ bool Eol_() { bool retval = false; if (has_more_input() && (Symbol_("\r\n") || Char_('\n'))) { retval = true; ++line; col = 1; } else if (has_more_input() && Char_(';')) { retval = true; } return retval; } /** * Reads (and potentially captures) an end-of-line group from input */ bool Eol(bool capture = false) { SkipWS(); if (!capture) { return Eol_(); } else { std::string::const_iterator start = input_pos; int prev_col = col; int prev_line = line; if (Eol_()) { std::string match(start, input_pos); AST_NodePtr t(new Eol_AST_Node(match, AST_Node_Type::Eol, filename, prev_line, prev_col, line, col)); match_stack.push_back(t); return true; } else { return false; } } } /** * Reads a comma-separated list of values from input */ bool Arg_List() { bool retval = false; int prev_stack_top = match_stack.size(); if (Equation()) { retval = true; while (Eol()) {} if (Char(',')) { do { while (Eol()) {} if (!Equation()) { throw Eval_Error("Unexpected value in parameter list", File_Position(line, col), filename); } } while (retval && Char(',')); } build_match(AST_NodePtr(new Arg_List_AST_Node()), prev_stack_top); } return retval; } /** * Reads possible special container values, including ranges and map_pairs */ bool Container_Arg_List() { bool retval = false; int prev_stack_top = match_stack.size(); if (Value_Range()) { retval = true; build_match(AST_NodePtr(new Arg_List_AST_Node()), prev_stack_top); } else if (Map_Pair()) { retval = true; while (Eol()) {} if (Char(',')) { do { while (Eol()) {} if (!Map_Pair()) { throw Eval_Error("Unexpected value in container", File_Position(line, col), filename); } } while (retval && Char(',')); } build_match(AST_NodePtr(new Arg_List_AST_Node()), prev_stack_top); } return retval; } /** * Reads a lambda (anonymous function) from input */ bool Lambda() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("fun")) { retval = true; if (Char('(')) { Arg_List(); if (!Char(')')) { throw Eval_Error("Incomplete anonymous function", File_Position(line, col), filename); } } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete anonymous function", File_Position(line, col), filename); } build_match(AST_NodePtr(new Lambda_AST_Node()), prev_stack_top); } return retval; } /** * Reads a function definition from input */ bool Def() { bool retval = false; bool is_annotated = false; bool is_method = false; AST_NodePtr annotation; if (Annotation()) { while (Eol_()) {} annotation = match_stack.back(); match_stack.pop_back(); is_annotated = true; } int prev_stack_top = match_stack.size(); if (Keyword("def")) { retval = true; if (!Id(true)) { throw Eval_Error("Missing function name in definition", File_Position(line, col), filename); } if (Symbol("::", false)) { //We're now a method is_method = true; if (!Id(true)) { throw Eval_Error("Missing method name in definition", File_Position(line, col), filename); } } if (Char('(')) { Arg_List(); if (!Char(')')) { throw Eval_Error("Incomplete function definition", File_Position(line, col), filename); } } while (Eol()) {} if (Char(':')) { if (!Operator()) { throw Eval_Error("Missing guard expression for function", File_Position(line, col), filename); } } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete function definition", File_Position(line, col), filename); } if (is_method) { build_match(AST_NodePtr(new Method_AST_Node()), prev_stack_top); } else { build_match(AST_NodePtr(new Def_AST_Node()), prev_stack_top); } if (is_annotated) { match_stack.back()->annotation = annotation; } } return retval; } /** * Reads a function definition from input */ bool Try() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("try")) { retval = true; while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'try' block", File_Position(line, col), filename); } bool has_matches = true; while (has_matches) { while (Eol()) {} has_matches = false; if (Keyword("catch", false)) { int catch_stack_top = match_stack.size(); if (Char('(')) { if (!(Id(true) && Char(')'))) { throw Eval_Error("Incomplete 'catch' expression", File_Position(line, col), filename); } if (Char(':')) { if (!Operator()) { throw Eval_Error("Missing guard expression for catch", File_Position(line, col), filename); } } } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'catch' block", File_Position(line, col), filename); } build_match(AST_NodePtr(new Catch_AST_Node()), catch_stack_top); has_matches = true; } } while (Eol()) {} if (Keyword("finally", false)) { int finally_stack_top = match_stack.size(); while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'finally' block", File_Position(line, col), filename); } build_match(AST_NodePtr(new Finally_AST_Node()), finally_stack_top); } build_match(AST_NodePtr(new Try_AST_Node()), prev_stack_top); } return retval; } /** * Reads an if/elseif/else block from input */ bool If() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("if")) { retval = true; if (!Char('(')) { throw Eval_Error("Incomplete 'if' expression", File_Position(line, col), filename); } if (!(Operator() && Char(')'))) { throw Eval_Error("Incomplete 'if' expression", File_Position(line, col), filename); } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'if' block", File_Position(line, col), filename); } bool has_matches = true; while (has_matches) { while (Eol()) {} has_matches = false; if (Keyword("else", true)) { if (Keyword("if")) { match_stack.back()->text = "else if"; if (!Char('(')) { throw Eval_Error("Incomplete 'else if' expression", File_Position(line, col), filename); } if (!(Operator() && Char(')'))) { throw Eval_Error("Incomplete 'else if' expression", File_Position(line, col), filename); } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'else if' block", File_Position(line, col), filename); } has_matches = true; } else { while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'else' block", File_Position(line, col), filename); } has_matches = true; } } } build_match(AST_NodePtr(new If_AST_Node()), prev_stack_top); } return retval; } /** * Reads a while block from input */ bool While() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("while")) { retval = true; if (!Char('(')) { throw Eval_Error("Incomplete 'while' expression", File_Position(line, col), filename); } if (!(Operator() && Char(')'))) { throw Eval_Error("Incomplete 'while' expression", File_Position(line, col), filename); } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'while' block", File_Position(line, col), filename); } build_match(AST_NodePtr(new While_AST_Node()), prev_stack_top); } return retval; } /** * Reads the C-style for conditions from input */ bool For_Guards() { Equation(); if (Char(';') && Operator() && Char(';') && Equation()) { return true; } else { throw Eval_Error("Incomplete conditions in 'for' loop", File_Position(line, col), filename); } } /** * Reads a for block from input */ bool For() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("for")) { retval = true; if (!Char('(')) { throw Eval_Error("Incomplete 'for' expression", File_Position(line, col), filename); } if (!(For_Guards() && Char(')'))) { throw Eval_Error("Incomplete 'for' expression", File_Position(line, col), filename); } while (Eol()) {} if (!Block()) { throw Eval_Error("Incomplete 'for' block", File_Position(line, col), filename); } build_match(AST_NodePtr(new For_AST_Node()), prev_stack_top); } return retval; } /** * Reads a curly-brace C-style block from input */ bool Block() { bool retval = false; int prev_stack_top = match_stack.size(); if (Char('{')) { retval = true; Statements(); if (!Char('}')) { throw Eval_Error("Incomplete block", File_Position(line, col), filename); } build_match(AST_NodePtr(new Block_AST_Node()), prev_stack_top); } return retval; } /** * Reads a return statement from input */ bool Return() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("return")) { retval = true; Operator(); build_match(AST_NodePtr(new Return_AST_Node()), prev_stack_top); } return retval; } /** * Reads a break statement from input */ bool Break() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("break")) { retval = true; build_match(AST_NodePtr(new Break_AST_Node()), prev_stack_top); } return retval; } /** * Reads an identifier, then proceeds to check if it's a function or array call */ bool Id_Fun_Array() { bool retval = false; std::string::const_iterator prev_pos = input_pos; unsigned int prev_stack_top = match_stack.size(); if (Id(true)) { retval = true; bool has_more = true; while (has_more) { has_more = false; if (Char('(')) { has_more = true; Arg_List(); if (!Char(')')) { throw Eval_Error("Incomplete function call", File_Position(line, col), filename); } build_match(AST_NodePtr(new Fun_Call_AST_Node()), prev_stack_top); } else if (Char('[')) { has_more = true; if (!(Operator() && Char(']'))) { throw Eval_Error("Incomplete array access", File_Position(line, col), filename); } build_match(AST_NodePtr(new Array_Call_AST_Node()), prev_stack_top); } } } return retval; } /** * Reads a variable declaration from input */ bool Var_Decl() { bool retval = false; int prev_stack_top = match_stack.size(); if (Keyword("var")) { retval = true; if (!Id(true)) { throw Eval_Error("Incomplete variable declaration", File_Position(line, col), filename); } build_match(AST_NodePtr(new Var_Decl_AST_Node()), prev_stack_top); } else if (Keyword("attr")) { retval = true; if (!Id(true)) { throw Eval_Error("Incomplete attribute declaration", File_Position(line, col), filename); } if (!Symbol("::", false)) { throw Eval_Error("Incomplete attribute declaration", File_Position(line, col), filename); } if (!Id(true)) { throw Eval_Error("Missing attribute name in definition", File_Position(line, col), filename); } build_match(AST_NodePtr(new Attr_Decl_AST_Node()), prev_stack_top); } return retval; } /** * Reads an expression surrounded by parentheses from input */ bool Paren_Expression() { bool retval = false; if (Char('(')) { retval = true; if (!Operator()) { throw Eval_Error("Incomplete expression", File_Position(line, col), filename); } if (!Char(')')) { throw Eval_Error("Missing closing parenthesis", File_Position(line, col), filename); } } return retval; } /** * Reads, and identifies, a short-form container initialization from input */ bool Inline_Container() { bool retval = false; unsigned int prev_stack_top = match_stack.size(); if (Char('[')) { retval = true; Container_Arg_List(); if (!Char(']')) { throw Eval_Error("Missing closing square bracket", File_Position(line, col), filename); } if ((prev_stack_top != match_stack.size()) && (match_stack.back()->children.size() > 0)) { if (match_stack.back()->children[0]->identifier == AST_Node_Type::Value_Range) { build_match(AST_NodePtr(new Inline_Range_AST_Node()), prev_stack_top); } else if (match_stack.back()->children[0]->identifier == AST_Node_Type::Map_Pair) { build_match(AST_NodePtr(new Inline_Map_AST_Node()), prev_stack_top); } else { build_match(AST_NodePtr(new Inline_Array_AST_Node()), prev_stack_top); } } else { build_match(AST_NodePtr(new Inline_Array_AST_Node()), prev_stack_top); } } return retval; } /** * Reads a unary prefixed expression from input */ bool Prefix() { bool retval = false; int prev_stack_top = match_stack.size(); if (Symbol("++", true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete '++' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } else if (Symbol("--", true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete '--' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } else if (Char('-', true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete unary '-' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } else if (Char('+', true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete unary '+' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } else if (Char('!', true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete '!' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } else if (Char('~', true)) { retval = true; if (!Operator(operators.size()-1)) { throw Eval_Error("Incomplete '~' expression", File_Position(line, col), filename); } build_match(AST_NodePtr(new Prefix_AST_Node()), prev_stack_top); } return retval; } /** * Parses any of a group of 'value' style ast_node groups from input */ bool Value() { if (Var_Decl() || Lambda() || Id_Fun_Array() || Num(true) || Prefix() || Quoted_String(true) || Single_Quoted_String(true) || Paren_Expression() || Inline_Container()) { return true; } else { return false; } } bool Operator_Helper(int precedence) { for (unsigned int i = 0; i < operator_matches[precedence].size(); ++i) { if (Symbol(operator_matches[precedence][i].c_str(), true)) { return true; } } return false; } bool Operator(unsigned int precedence = 0) { bool retval = false; int prev_stack_top = match_stack.size(); if (precedence < operators.size()) { if (Operator(precedence+1)) { retval = true; if (Operator_Helper(precedence)) { do { if (!Operator(precedence+1)) { throw Eval_Error("Incomplete " + std::string(ast_node_type_to_string(operators[precedence])) + " expression", File_Position(line, col), filename); } } while (Operator_Helper(precedence)); switch (operators[precedence]) { case(AST_Node_Type::Comparison) : build_match(AST_NodePtr(new Comparison_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Dot_Access) : build_match(AST_NodePtr(new Dot_Access_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Additive) : build_match(AST_NodePtr(new Additive_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Multiplicative) : build_match(AST_NodePtr(new Multiplicative_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Shift) : build_match(AST_NodePtr(new Shift_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Equality) : build_match(AST_NodePtr(new Equality_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Bitwise_And) : build_match(AST_NodePtr(new Bitwise_And_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Bitwise_Xor) : build_match(AST_NodePtr(new Bitwise_Xor_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Bitwise_Or) : build_match(AST_NodePtr(new Bitwise_Or_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Logical_And) : build_match(AST_NodePtr(new Logical_And_AST_Node()), prev_stack_top); break; case(AST_Node_Type::Logical_Or) : build_match(AST_NodePtr(new Logical_Or_AST_Node()), prev_stack_top); break; default: throw Eval_Error("Internal error: unhandled ast_node", File_Position(line, col), filename); } } } } else { return Value(); } return retval; } /** * Reads a pair of values used to create a map initialization from input */ bool Map_Pair() { bool retval = false; int prev_stack_top = match_stack.size(); if (Operator()) { retval = true; if (Symbol(":")) { do { if (!Operator()) { throw Eval_Error("Incomplete map pair", File_Position(line, col), filename); } } while (retval && Symbol(":")); build_match(AST_NodePtr(new Map_Pair_AST_Node()), prev_stack_top); } } return retval; } /** * Reads a pair of values used to create a range initialization from input */ bool Value_Range() { bool retval = false; unsigned int prev_stack_top = match_stack.size(); std::string::const_iterator prev_pos = input_pos; int prev_col = col; if (Operator()) { if (Symbol("..")) { retval = true; if (!Operator()) { throw Eval_Error("Incomplete value range", File_Position(line, col), filename); } build_match(AST_NodePtr(new Value_Range_AST_Node()), prev_stack_top); } else { input_pos = prev_pos; col = prev_col; while (prev_stack_top != match_stack.size()) { match_stack.pop_back(); } } } return retval; } /** * Parses a string of binary equation operators */ bool Equation() { bool retval = false; int prev_stack_top = match_stack.size(); if (Operator()) { retval = true; if (Symbol("=", true, true) || Symbol(":=", true, true) || Symbol("+=", true, true) || Symbol("-=", true, true) || Symbol("*=", true, true) || Symbol("/=", true, true) || Symbol("%=", true, true) || Symbol("<<=", true, true) || Symbol(">>=", true, true) || Symbol("&=", true, true) || Symbol("^=", true, true) || Symbol("|=", true, true)) { if (!Equation()) { throw Eval_Error("Incomplete equation", File_Position(line, col), filename); } build_match(AST_NodePtr(new Equation_AST_Node()), prev_stack_top); } } return retval; } /** * Top level parser, starts parsing of all known parses */ bool Statements() { bool retval = false; bool has_more = true; bool saw_eol = true; while (has_more) { has_more = false; int prev_line = line; int prev_col = col; if (Def()) { if (!saw_eol) { throw Eval_Error("Two function definitions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = true; } else if (Try()) { if (!saw_eol) { throw Eval_Error("Two function definitions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = true; } else if (If()) { if (!saw_eol) { throw Eval_Error("Two function definitions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = true; } else if (While()) { if (!saw_eol) { throw Eval_Error("Two function definitions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = true; } else if (For()) { if (!saw_eol) { throw Eval_Error("Two function definitions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = true; } else if (Return()) { if (!saw_eol) { throw Eval_Error("Two expressions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = false; } else if (Break()) { if (!saw_eol) { throw Eval_Error("Two expressions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = false; } else if (Equation()) { if (!saw_eol) { throw Eval_Error("Two expressions missing line separator", File_Position(prev_line, prev_col), filename); } has_more = true; retval = true; saw_eol = false; } else if (Eol()) { has_more = true; retval = true; saw_eol = true; } else if (Block()) { has_more = true; retval = true; saw_eol = true; } else { has_more = false; } } return retval; } /** * Parses the given input string, tagging parsed ast_nodes with the given filename. */ bool parse(const std::string &input, const char *fname) { input_pos = input.begin(); input_end = input.end(); line = 1; col = 1; filename = fname; if ((input.size() > 1) && (input[0] == '#') && (input[1] == '!')) { while ((input_pos != input_end) && (!Eol())) { ++input_pos; } } if (Statements()) { if (input_pos != input_end) { throw Eval_Error("Unparsed input", File_Position(line, col), fname); } else { build_match(AST_NodePtr(new File_AST_Node()), 0); return true; } } else { return false; } } }; } #endif /* CHAISCRIPT_PARSER_HPP_ */