Add langkit namespace

This commit is contained in:
Jason Turner 2009-06-13 22:19:30 +00:00
parent ce52cae45c
commit 416242286a
5 changed files with 509 additions and 495 deletions

View File

@ -10,8 +10,8 @@
template <typename Eval_Engine> template <typename Eval_Engine>
class ChaiScript_System { class ChaiScript_System {
Lexer lexer; langkit::Lexer lexer;
Rule parser; langkit::Rule parser;
Eval_Engine engine; Eval_Engine engine;
public: public:
@ -30,10 +30,10 @@ public:
val = Cast_Helper<std::string &>()(vals[0]); val = Cast_Helper<std::string &>()(vals[0]);
} }
catch (std::exception &e) { catch (std::exception &e) {
throw EvalError("Can not evaluate string: " + val, TokenPtr()); throw EvalError("Can not evaluate string: " + val, langkit::TokenPtr());
} }
catch (EvalError &ee) { catch (EvalError &ee) {
throw EvalError("Can not evaluate string: " + val + " reason: " + ee.reason, TokenPtr()); throw EvalError("Can not evaluate string: " + val + " reason: " + ee.reason, langkit::TokenPtr());
} }
return evaluate_string(val); return evaluate_string(val);
} }
@ -57,7 +57,8 @@ public:
return ret_val; return ret_val;
} }
void debug_print(TokenPtr token, std::string prepend) { void debug_print(langkit::TokenPtr token, std::string prepend) {
using namespace langkit;
std::cout << prepend << "Token: " << token->text << "(" << tokentype_to_string(token->identifier) << ") @ " << token->filename std::cout << prepend << "Token: " << token->text << "(" << tokentype_to_string(token->identifier) << ") @ " << token->filename
<< ": (" << token->start.line << ", " << token->start.column << ") to (" << ": (" << token->start.line << ", " << token->start.column << ") to ("
<< token->end.line << ", " << token->end.column << ") " << std::endl; << token->end.line << ", " << token->end.column << ") " << std::endl;
@ -67,13 +68,15 @@ public:
} }
} }
void debug_print(std::vector<TokenPtr> &tokens) { void debug_print(std::vector<langkit::TokenPtr> &tokens) {
using namespace langkit;
for (unsigned int i = 0; i < tokens.size(); ++i) { for (unsigned int i = 0; i < tokens.size(); ++i) {
debug_print(tokens[i], ""); debug_print(tokens[i], "");
} }
} }
Lexer build_lexer() { langkit::Lexer build_lexer() {
using namespace langkit;
Lexer lexer; Lexer lexer;
lexer.set_skip(Pattern("[ \\t]+", TokenType::Whitespace)); lexer.set_skip(Pattern("[ \\t]+", TokenType::Whitespace));
lexer.set_line_sep(Pattern("\\n|\\r\\n", TokenType::Carriage_Return)); lexer.set_line_sep(Pattern("\\n|\\r\\n", TokenType::Carriage_Return));
@ -98,7 +101,8 @@ public:
return lexer; return lexer;
} }
Rule build_parser_rules() { langkit::Rule build_parser_rules() {
using namespace langkit;
Rule params; Rule params;
Rule block(TokenType::Scoped_Block); Rule block(TokenType::Scoped_Block);
Rule fundef(TokenType::Function_Def); Rule fundef(TokenType::Function_Def);
@ -184,7 +188,8 @@ public:
} }
Eval_Engine build_eval_system(Lexer &lexer, Rule &parser) { Eval_Engine build_eval_system(langkit::Lexer &lexer, langkit::Rule &parser) {
using namespace langkit;
Eval_Engine ss; Eval_Engine ss;
Bootstrap::bootstrap(ss); Bootstrap::bootstrap(ss);
bootstrap_vector<std::vector<Boxed_Value> >(ss, "Vector"); bootstrap_vector<std::vector<Boxed_Value> >(ss, "Vector");
@ -197,7 +202,8 @@ public:
return ss; return ss;
} }
TokenPtr parse(Rule &rule, std::vector<TokenPtr> &tokens, const char *filename) { langkit::TokenPtr parse(langkit::Rule &rule, std::vector<langkit::TokenPtr> &tokens, const char *filename) {
using namespace langkit;
Token_Iterator iter = tokens.begin(), end = tokens.end(); Token_Iterator iter = tokens.begin(), end = tokens.end();
TokenPtr parent(new Token("Root", TokenType::File, filename)); TokenPtr parent(new Token("Root", TokenType::File, filename));
@ -215,6 +221,7 @@ public:
} }
Boxed_Value evaluate_string(const std::string &input, const char *filename = "__EVAL__") { Boxed_Value evaluate_string(const std::string &input, const char *filename = "__EVAL__") {
using namespace langkit;
std::vector<TokenPtr> tokens = lexer.lex(input, filename); std::vector<TokenPtr> tokens = lexer.lex(input, filename);
Boxed_Value value; Boxed_Value value;

View File

@ -8,33 +8,33 @@
struct ParserError { struct ParserError {
std::string reason; std::string reason;
TokenPtr location; langkit::TokenPtr location;
ParserError(const std::string &why, const TokenPtr where) : reason(why), location(where){ } ParserError(const std::string &why, const langkit::TokenPtr where) : reason(why), location(where){ }
}; };
struct EvalError { struct EvalError {
std::string reason; std::string reason;
TokenPtr location; langkit::TokenPtr location;
EvalError(const std::string &why, const TokenPtr where) : reason(why), location(where) { } EvalError(const std::string &why, const langkit::TokenPtr where) : reason(why), location(where) { }
}; };
struct ReturnValue { struct ReturnValue {
Boxed_Value retval; Boxed_Value retval;
TokenPtr location; langkit::TokenPtr location;
ReturnValue(const Boxed_Value &return_value, const TokenPtr where) : retval(return_value), location(where) { } ReturnValue(const Boxed_Value &return_value, const langkit::TokenPtr where) : retval(return_value), location(where) { }
}; };
struct BreakLoop { struct BreakLoop {
TokenPtr location; langkit::TokenPtr location;
BreakLoop(const TokenPtr where) : location(where) { } BreakLoop(const langkit::TokenPtr where) : location(where) { }
}; };
template <typename Eval_System> template <typename Eval_System>
const Boxed_Value eval_function (Eval_System &ss, TokenPtr node, const std::vector<std::string> &param_names, const std::vector<Boxed_Value> &vals) { const Boxed_Value eval_function (Eval_System &ss, langkit::TokenPtr node, const std::vector<std::string> &param_names, const std::vector<Boxed_Value> &vals) {
ss.new_scope(); ss.new_scope();
for (unsigned int i = 0; i < param_names.size(); ++i) { for (unsigned int i = 0; i < param_names.size(); ++i) {
@ -47,7 +47,7 @@ const Boxed_Value eval_function (Eval_System &ss, TokenPtr node, const std::vect
} }
template <typename Eval_System> template <typename Eval_System>
Boxed_Value eval_token(Eval_System &ss, TokenPtr node) { Boxed_Value eval_token(Eval_System &ss, langkit::TokenPtr node) {
Boxed_Value retval; Boxed_Value retval;
unsigned int i, j; unsigned int i, j;

View File

@ -8,197 +8,199 @@
#include <tr1/memory> #include <tr1/memory>
#include <string> #include <string>
struct File_Position { namespace langkit
int line; {
int column; struct File_Position {
int line;
int column;
File_Position(int file_line, int file_column) File_Position(int file_line, int file_column)
: line(file_line), column(file_column) { } : line(file_line), column(file_column) { }
File_Position() : line(0), column(0) { } File_Position() : line(0), column(0) { }
}; };
struct Pattern { struct Pattern {
boost::regex regex; boost::regex regex;
int identifier; int identifier;
Pattern() { } Pattern() { }
Pattern(const std::string &regexp, int id) : regex(regexp), identifier(id) { } Pattern(const std::string &regexp, int id) : regex(regexp), identifier(id) { }
}; };
typedef std::tr1::shared_ptr<struct Token> TokenPtr; typedef std::tr1::shared_ptr<struct Token> TokenPtr;
struct Token { struct Token {
std::string text; std::string text;
int identifier; int identifier;
const char *filename; const char *filename;
File_Position start, end; File_Position start, end;
std::vector<TokenPtr> children; std::vector<TokenPtr> children;
Token(const std::string &token_text, int id, const char *fname) : text(token_text), identifier(id), filename(fname) { } Token(const std::string &token_text, int id, const char *fname) : text(token_text), identifier(id), filename(fname) { }
}; };
struct Lexer { struct Lexer {
std::vector<Pattern> lex_patterns; std::vector<Pattern> lex_patterns;
Pattern skip_pattern; Pattern skip_pattern;
Pattern command_sep_pattern; Pattern command_sep_pattern;
Pattern line_sep_pattern; Pattern line_sep_pattern;
Pattern multiline_comment_start_pattern; Pattern multiline_comment_start_pattern;
Pattern multiline_comment_end_pattern; Pattern multiline_comment_end_pattern;
Pattern singleline_comment_pattern; Pattern singleline_comment_pattern;
Lexer operator<<(const Pattern &p) { Lexer operator<<(const Pattern &p) {
lex_patterns.push_back(p); lex_patterns.push_back(p);
return *this; return *this;
} }
std::vector<TokenPtr> lex(const std::string &input, const char *filename) { std::vector<TokenPtr> lex(const std::string &input, const char *filename) {
std::vector<Pattern>::iterator iter, end, iter2, end2; std::vector<Pattern>::iterator iter, end, iter2, end2;
std::vector<TokenPtr> retval; std::vector<TokenPtr> retval;
bool found; bool found;
std::string::const_iterator input_iter = input.begin(), input_end = input.end(); std::string::const_iterator input_iter = input.begin(), input_end = input.end();
int current_col = 0; int current_col = 0;
int current_line = 0; int current_line = 0;
boost::match_results<std::string::const_iterator> what; boost::match_results<std::string::const_iterator> what;
while (input_iter != input_end) { while (input_iter != input_end) {
found = false; found = false;
if (regex_search(input_iter, input_end, what, singleline_comment_pattern.regex, boost::match_continuous)) { if (regex_search(input_iter, input_end, what, singleline_comment_pattern.regex, boost::match_continuous)) {
std::string comment_start(what[0]); std::string comment_start(what[0]);
input_iter += comment_start.size(); input_iter += comment_start.size();
bool found_eol = false; bool found_eol = false;
while ((!found_eol) && (input_iter != input_end)) { while ((!found_eol) && (input_iter != input_end)) {
boost::match_results<std::string::const_iterator> eol_delim; boost::match_results<std::string::const_iterator> eol_delim;
if (regex_search(input_iter, input_end, eol_delim, line_sep_pattern.regex, boost::match_continuous)) { if (regex_search(input_iter, input_end, eol_delim, line_sep_pattern.regex, boost::match_continuous)) {
std::string comment_end(eol_delim[0]); std::string comment_end(eol_delim[0]);
input_iter += comment_end.size(); input_iter += comment_end.size();
++current_line; ++current_line;
current_col = 0; current_col = 0;
found_eol = true; found_eol = true;
break; break;
} }
if ((!found_eol) && (input_iter != input_end)) { if ((!found_eol) && (input_iter != input_end)) {
++input_iter; ++input_iter;
}
} }
} }
} else if (regex_search(input_iter, input_end, what, multiline_comment_start_pattern.regex, boost::match_continuous)) {
else if (regex_search(input_iter, input_end, what, multiline_comment_start_pattern.regex, boost::match_continuous)) { std::string comment_start(what[0]);
std::string comment_start(what[0]); input_iter += comment_start.size();
input_iter += comment_start.size();
bool found_eoc = false; bool found_eoc = false;
while ((!found_eoc) && (input_iter != input_end)) { while ((!found_eoc) && (input_iter != input_end)) {
boost::match_results<std::string::const_iterator> eol_delim; boost::match_results<std::string::const_iterator> eol_delim;
if (regex_search(input_iter, input_end, eol_delim, line_sep_pattern.regex, boost::match_continuous)) { if (regex_search(input_iter, input_end, eol_delim, line_sep_pattern.regex, boost::match_continuous)) {
std::string comment_end(eol_delim[0]); std::string comment_end(eol_delim[0]);
input_iter += comment_end.size(); input_iter += comment_end.size();
++current_line; ++current_line;
current_col = 0; current_col = 0;
}
boost::match_results<std::string::const_iterator> eoc_delim;
if (regex_search(input_iter, input_end, eoc_delim, multiline_comment_end_pattern.regex, boost::match_continuous)) {
std::string comment_end(eoc_delim[0]);
input_iter += comment_end.size();
current_col += comment_end.size();
found_eoc = true;
break;
}
if ((!found_eoc) && (input_iter != input_end)) {
++input_iter;
++current_col;
}
} }
boost::match_results<std::string::const_iterator> eoc_delim;
if (regex_search(input_iter, input_end, eoc_delim, multiline_comment_end_pattern.regex, boost::match_continuous)) { if (!found_eoc) {
std::string comment_end(eoc_delim[0]); std::cout << "Incomplete comment block! Add exceptions!" << std::endl;
input_iter += comment_end.size(); return retval;
current_col += comment_end.size();
found_eoc = true;
break;
}
if ((!found_eoc) && (input_iter != input_end)) {
++input_iter;
++current_col;
} }
} }
else if (regex_search(input_iter, input_end, what, skip_pattern.regex, boost::match_continuous)) {
if (!found_eoc) { std::string whitespace(what[0]);
std::cout << "Incomplete comment block! Add exceptions!" << std::endl; input_iter += whitespace.size();
return retval; current_col += whitespace.size();
found = true;
} }
} else if (regex_search(input_iter, input_end, what, line_sep_pattern.regex, boost::match_continuous)) {
else if (regex_search(input_iter, input_end, what, skip_pattern.regex, boost::match_continuous)) { const std::string cr(what[0]);
std::string whitespace(what[0]);
input_iter += whitespace.size();
current_col += whitespace.size();
found = true;
}
else if (regex_search(input_iter, input_end, what, line_sep_pattern.regex, boost::match_continuous)) {
const std::string cr(what[0]);
boost::match_results<std::string::const_iterator> if_delim; boost::match_results<std::string::const_iterator> if_delim;
if (regex_search(cr.begin(), cr.end(), if_delim, command_sep_pattern.regex, boost::match_continuous)) { if (regex_search(cr.begin(), cr.end(), if_delim, command_sep_pattern.regex, boost::match_continuous)) {
TokenPtr t(new Token(if_delim[0], command_sep_pattern.identifier, filename)); TokenPtr t(new Token(if_delim[0], command_sep_pattern.identifier, filename));
t->start.column = current_col;
t->start.line = current_line;
current_col += t->text.size();
t->end.column = current_col;
t->end.line = current_line;
retval.push_back(t);
}
input_iter += cr.size();
++current_line;
current_col = 0;
found = true;
}
else if (regex_search(input_iter, input_end, what, command_sep_pattern.regex, boost::match_continuous)) {
TokenPtr t(new Token(what[0], command_sep_pattern.identifier, filename));
t->start.column = current_col;
t->start.line = current_line;
current_col += t->text.size();
t->end.column = current_col;
t->end.line = current_line;
retval.push_back(t);
input_iter += t->text.size();
found = true;
}
else {
for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) {
if (regex_search(input_iter, input_end, what, iter->regex, boost::match_continuous)) {
TokenPtr t(new Token(what[0], iter->identifier, filename));
t->start.column = current_col; t->start.column = current_col;
t->start.line = current_line; t->start.line = current_line;
current_col += t->text.size(); current_col += t->text.size();
t->end.column = current_col; t->end.column = current_col;
t->end.line = current_line; t->end.line = current_line;
retval.push_back(t); retval.push_back(t);
input_iter += t->text.size(); }
found = true;
break; input_iter += cr.size();
++current_line;
current_col = 0;
found = true;
}
else if (regex_search(input_iter, input_end, what, command_sep_pattern.regex, boost::match_continuous)) {
TokenPtr t(new Token(what[0], command_sep_pattern.identifier, filename));
t->start.column = current_col;
t->start.line = current_line;
current_col += t->text.size();
t->end.column = current_col;
t->end.line = current_line;
retval.push_back(t);
input_iter += t->text.size();
found = true;
}
else {
for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) {
if (regex_search(input_iter, input_end, what, iter->regex, boost::match_continuous)) {
TokenPtr t(new Token(what[0], iter->identifier, filename));
t->start.column = current_col;
t->start.line = current_line;
current_col += t->text.size();
t->end.column = current_col;
t->end.line = current_line;
retval.push_back(t);
input_iter += t->text.size();
found = true;
break;
}
}
if (!found) {
const std::string err(input_iter, input_end);
std::cout << "Unknown string at: " << err << std::endl;
return retval;
} }
} }
if (!found) {
const std::string err(input_iter, input_end);
std::cout << "Unknown string at: " << err << std::endl;
return retval;
}
} }
return retval;
} }
return retval;
}
void set_skip(const Pattern &p) {
skip_pattern = p;
}
void set_line_sep(const Pattern &p) {
line_sep_pattern = p;
}
void set_command_sep(const Pattern &p) {
command_sep_pattern = p;
}
void set_multiline_comment(const Pattern &start, const Pattern &end) {
multiline_comment_start_pattern = start;
multiline_comment_end_pattern = end;
}
void set_singleline_comment(const Pattern &p) {
singleline_comment_pattern = p;
}
};
void set_skip(const Pattern &p) {
skip_pattern = p;
}
void set_line_sep(const Pattern &p) {
line_sep_pattern = p;
}
void set_command_sep(const Pattern &p) {
command_sep_pattern = p;
}
void set_multiline_comment(const Pattern &start, const Pattern &end) {
multiline_comment_start_pattern = start;
multiline_comment_end_pattern = end;
}
void set_singleline_comment(const Pattern &p) {
singleline_comment_pattern = p;
}
};
}
#endif /* LANGKIT_LEXER_HPP_ */ #endif /* LANGKIT_LEXER_HPP_ */

View File

@ -8,96 +8,76 @@
#include "langkit_lexer.hpp" #include "langkit_lexer.hpp"
struct RuleImpl; namespace langkit
{
struct RuleImpl;
typedef std::vector<TokenPtr>::iterator Token_Iterator; typedef std::vector<TokenPtr>::iterator Token_Iterator;
typedef boost::function<std::pair<Token_Iterator, bool>(Token_Iterator, Token_Iterator, TokenPtr, bool, int)> RuleFun; typedef boost::function<std::pair<Token_Iterator, bool>(Token_Iterator, Token_Iterator, TokenPtr, bool, int)> RuleFun;
typedef std::tr1::shared_ptr<RuleImpl> RuleImplPtr; typedef std::tr1::shared_ptr<RuleImpl> RuleImplPtr;
struct RuleImpl { struct RuleImpl {
RuleFun rule; RuleFun rule;
bool keep; bool keep;
int new_id; int new_id;
RuleImpl() : keep(true), new_id(-1) {} RuleImpl() : keep(true), new_id(-1) {}
RuleImpl(int id) : keep(true), new_id(id) {} RuleImpl(int id) : keep(true), new_id(id) {}
RuleImpl(RuleFun fun) : rule(fun), keep(true), new_id(-1) {} RuleImpl(RuleFun fun) : rule(fun), keep(true), new_id(-1) {}
RuleImpl(RuleFun fun, bool keep_match) : rule(fun), keep(keep_match), new_id(-1) {} RuleImpl(RuleFun fun, bool keep_match) : rule(fun), keep(keep_match), new_id(-1) {}
std::pair<Token_Iterator, bool> operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) { std::pair<Token_Iterator, bool> operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) {
return rule(iter, end, parent, keep, new_id); return rule(iter, end, parent, keep, new_id);
}
};
//struct Rule;
template <typename T_Iter>
std::pair<T_Iter, bool> String_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const std::string &val) {
if (iter != end) {
if ((*iter)->text == val) {
if (keep) {
parent->children.push_back(*iter);
}
return std::pair<T_Iter, bool>(++iter, true);
} }
} };
return std::pair<T_Iter, bool>(iter, false); //struct Rule;
}
template <typename T_Iter> template <typename T_Iter>
std::pair<T_Iter, bool> Id_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const int val) { std::pair<T_Iter, bool> String_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const std::string &val) {
if (iter != end) { if (iter != end) {
if ((*iter)->identifier == val) { if ((*iter)->text == val) {
if (keep) { if (keep) {
parent->children.push_back(*iter); parent->children.push_back(*iter);
}
return std::pair<T_Iter, bool>(++iter, true);
}
}
return std::pair<T_Iter, bool>(iter, false);
}
template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Or_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) {
T_Iter new_iter;
unsigned int prev_size;
TokenPtr prev_parent = parent;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
prev_size = parent->children.size();
if (iter != end) {
std::pair<T_Iter, bool> result = lhs(iter, end, parent);
if (result.second) {
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
} }
else { return std::pair<T_Iter, bool>(++iter, true);
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
} }
return std::pair<T_Iter, bool>(result.first, true);
} }
else {
if (parent->children.size() != prev_size) {
//Clear out the partial matches
parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
}
result = rhs(iter, end, parent); return std::pair<T_Iter, bool>(iter, false);
}
template <typename T_Iter>
std::pair<T_Iter, bool> Id_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const int val) {
if (iter != end) {
if ((*iter)->identifier == val) {
if (keep) {
parent->children.push_back(*iter);
}
return std::pair<T_Iter, bool>(++iter, true);
}
}
return std::pair<T_Iter, bool>(iter, false);
}
template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Or_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) {
T_Iter new_iter;
unsigned int prev_size;
TokenPtr prev_parent = parent;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
prev_size = parent->children.size();
if (iter != end) {
std::pair<T_Iter, bool> result = lhs(iter, end, parent);
if (result.second) { if (result.second) {
if (new_id != -1) { if (new_id != -1) {
parent->filename = (*iter)->filename; parent->filename = (*iter)->filename;
parent->start = (*iter)->start; parent->start = (*iter)->start;
if (result.first == iter) { if (result.first == iter) {
@ -106,129 +86,105 @@ std::pair<T_Iter, bool> Or_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool k
else { else {
parent->end = (*(result.first - 1))->end; parent->end = (*(result.first - 1))->end;
} }
prev_parent->children.push_back(parent); prev_parent->children.push_back(parent);
} }
return std::pair<T_Iter, bool>(result.first, true); return std::pair<T_Iter, bool>(result.first, true);
} }
} else {
} if (parent->children.size() != prev_size) {
//Clear out the partial matches
if (parent->children.size() != prev_size) { parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
//Clear out the partial matches }
parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
} result = rhs(iter, end, parent);
if (result.second) {
return std::pair<T_Iter, bool>(iter, false); if (new_id != -1) {
}
parent->filename = (*iter)->filename;
template <typename T_Iter, typename R_Type> parent->start = (*iter)->start;
std::pair<Token_Iterator, bool> And_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) { if (result.first == iter) {
T_Iter lhs_iter, rhs_iter; parent->end = (*iter)->start;
unsigned int prev_size; }
TokenPtr prev_parent = parent; else {
parent->end = (*(result.first - 1))->end;
if (new_id != -1) { }
parent = TokenPtr(new Token("", new_id, parent->filename));
} prev_parent->children.push_back(parent);
}
prev_size = parent->children.size(); return std::pair<T_Iter, bool>(result.first, true);
if (iter != end) {
std::pair<T_Iter, bool> result = lhs(iter, end, parent);
if (result.second) {
result = rhs(result.first, end, parent);
if (result.second) {
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
} }
return std::pair<Token_Iterator, bool>(result.first, true);
} }
} }
if (parent->children.size() != prev_size) {
//Clear out the partial matches
parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
}
return std::pair<T_Iter, bool>(iter, false);
} }
if (parent->children.size() != prev_size) { template <typename T_Iter, typename R_Type>
//Clear out the partial matches std::pair<Token_Iterator, bool> And_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) {
parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); T_Iter lhs_iter, rhs_iter;
} unsigned int prev_size;
TokenPtr prev_parent = parent;
return std::pair<T_Iter, bool>(iter, false);
}
template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Kleene_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
TokenPtr prev_parent = parent;
std::pair<T_Iter, bool> result;
T_Iter new_iter = iter;
if (iter != end) {
if (new_id != -1) { if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename)); parent = TokenPtr(new Token("", new_id, parent->filename));
} }
result.second = true; prev_size = parent->children.size();
while (result.second == true) {
result = rule(new_iter, end, parent); if (iter != end) {
new_iter = result.first; std::pair<T_Iter, bool> result = lhs(iter, end, parent);
if (result.second) {
result = rhs(result.first, end, parent);
if (result.second) {
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
}
return std::pair<Token_Iterator, bool>(result.first, true);
}
}
} }
if (new_id != -1) { if (parent->children.size() != prev_size) {
//Clear out the partial matches
parent->filename = (*iter)->filename; parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
} }
return std::pair<T_Iter, bool>(result.first, true);
}
else {
return std::pair<T_Iter, bool>(iter, true);
}
}
template <typename T_Iter, typename R_Type> return std::pair<T_Iter, bool>(iter, false);
std::pair<T_Iter, bool> Plus_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
unsigned int prev_size;
TokenPtr prev_parent = parent;
T_Iter loop_iter = iter;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
} }
prev_size = parent->children.size(); template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Kleene_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
if (iter != end) { TokenPtr prev_parent = parent;
std::pair<T_Iter, bool> result; std::pair<T_Iter, bool> result;
result = rule(loop_iter, end, parent); T_Iter new_iter = iter;
if (iter != end) {
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
if (result.second == true) {
loop_iter = result.first;
result.second = true; result.second = true;
while ((loop_iter != end) && (result.second == true)) { while (result.second == true) {
result = rule(loop_iter, end, parent); result = rule(new_iter, end, parent);
loop_iter = result.first; new_iter = result.first;
} }
if (new_id != -1) { if (new_id != -1) {
@ -244,40 +200,122 @@ std::pair<T_Iter, bool> Plus_Rule
prev_parent->children.push_back(parent); prev_parent->children.push_back(parent);
} }
return std::pair<T_Iter, bool>(result.first, true); return std::pair<T_Iter, bool>(result.first, true);
} }
else {
return std::pair<T_Iter, bool>(iter, true);
}
} }
if (parent->children.size() != prev_size) { template <typename T_Iter, typename R_Type>
//Clear out the partial matches std::pair<T_Iter, bool> Plus_Rule
parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
unsigned int prev_size;
TokenPtr prev_parent = parent;
T_Iter loop_iter = iter;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
prev_size = parent->children.size();
if (iter != end) {
std::pair<T_Iter, bool> result;
result = rule(loop_iter, end, parent);
if (result.second == true) {
loop_iter = result.first;
result.second = true;
while ((loop_iter != end) && (result.second == true)) {
result = rule(loop_iter, end, parent);
loop_iter = result.first;
}
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
}
return std::pair<T_Iter, bool>(result.first, true);
}
}
if (parent->children.size() != prev_size) {
//Clear out the partial matches
parent->children.erase(parent->children.begin() + prev_size, parent->children.end());
}
return std::pair<T_Iter, bool>(iter, false);
} }
return std::pair<T_Iter, bool>(iter, false); template <typename T_Iter, typename R_Type>
} std::pair<T_Iter, bool> Optional_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
template <typename T_Iter, typename R_Type> TokenPtr prev_parent = parent;
std::pair<T_Iter, bool> Optional_Rule T_Iter new_iter = iter;
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
TokenPtr prev_parent = parent; if (iter != end) {
T_Iter new_iter = iter; if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
std::pair<T_Iter, bool> result;
result.second = true;
if ((new_iter != end) && (result.second == true)) {
result = rule(new_iter, end, parent);
new_iter = result.first;
}
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
}
return std::pair<T_Iter, bool>(result.first, true);
}
else {
return std::pair<T_Iter, bool>(iter, true);
}
}
template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Epsilon_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
TokenPtr prev_parent = parent;
T_Iter new_iter = iter;
if (iter != end) {
if (new_id != -1) { if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename)); parent = TokenPtr(new Token("", new_id, parent->filename));
} }
std::pair<T_Iter, bool> result; std::pair<T_Iter, bool> result;
result.second = true; if ((new_iter != end)) {
if ((new_iter != end) && (result.second == true)) {
result = rule(new_iter, end, parent); result = rule(new_iter, end, parent);
new_iter = result.first; new_iter = result.first;
} }
if (new_id != -1) { if (new_id != -1) {
parent->filename = (*iter)->filename; parent->filename = (*iter)->filename;
parent->start = (*iter)->start; parent->start = (*iter)->start;
if (result.first == iter) { if (result.first == iter) {
@ -289,158 +327,123 @@ std::pair<T_Iter, bool> Optional_Rule
prev_parent->children.push_back(parent); prev_parent->children.push_back(parent);
} }
return std::pair<T_Iter, bool>(result.first, true);
}
else {
return std::pair<T_Iter, bool>(iter, true);
}
}
template <typename T_Iter, typename R_Type> return std::pair<T_Iter, bool>(iter, result.second);
std::pair<T_Iter, bool> Epsilon_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
TokenPtr prev_parent = parent;
T_Iter new_iter = iter;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
} }
std::pair<T_Iter, bool> result; template <typename T_Iter, typename R_Type>
if ((new_iter != end)) { std::pair<T_Iter, bool> Wrap_Rule
result = rule(new_iter, end, parent); (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
new_iter = result.first;
}
if (new_id != -1) { TokenPtr prev_parent = parent;
parent->filename = (*iter)->filename; T_Iter new_iter = iter;
parent->start = (*iter)->start;
if (result.first == iter) { if (new_id != -1) {
parent->end = (*iter)->start; parent = TokenPtr(new Token("", new_id, parent->filename));
}
else {
parent->end = (*(result.first - 1))->end;
} }
prev_parent->children.push_back(parent); std::pair<T_Iter, bool> result;
} if ((new_iter != end)) {
result = rule(new_iter, end, parent);
return std::pair<T_Iter, bool>(iter, result.second); new_iter = result.first;
}
template <typename T_Iter, typename R_Type>
std::pair<T_Iter, bool> Wrap_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
TokenPtr prev_parent = parent;
T_Iter new_iter = iter;
if (new_id != -1) {
parent = TokenPtr(new Token("", new_id, parent->filename));
}
std::pair<T_Iter, bool> result;
if ((new_iter != end)) {
result = rule(new_iter, end, parent);
new_iter = result.first;
}
if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
} }
prev_parent->children.push_back(parent); if (new_id != -1) {
parent->filename = (*iter)->filename;
parent->start = (*iter)->start;
if (result.first == iter) {
parent->end = (*iter)->start;
}
else {
parent->end = (*(result.first - 1))->end;
}
prev_parent->children.push_back(parent);
}
return std::pair<T_Iter, bool>(result.first, result.second);
} }
return std::pair<T_Iter, bool>(result.first, result.second); template <typename T_Iter, typename R_Type>
} std::pair<T_Iter, bool> Ignore_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
template <typename T_Iter, typename R_Type> rule.impl->keep = false;
std::pair<T_Iter, bool> Ignore_Rule
(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) {
rule.impl->keep = false; return rule(iter, end, parent);
return rule(iter, end, parent);
}
struct Rule {
RuleImplPtr impl;
Rule() : impl(new RuleImpl()) {}
Rule(int id) : impl(new RuleImpl(id)) {}
Rule(RuleFun fun) : impl(new RuleImpl(fun)) {}
Rule(RuleFun fun, bool keep) : impl(new RuleImpl(fun, keep)) {}
std::pair<Token_Iterator, bool> operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) {
return (*impl)(iter, end, parent);
} }
Rule &operator=(const Rule &rule) { struct Rule {
int prev_id = impl->new_id; RuleImplPtr impl;
*impl = *(rule.impl);
impl->new_id = prev_id;
return *this; Rule() : impl(new RuleImpl()) {}
Rule(int id) : impl(new RuleImpl(id)) {}
Rule(RuleFun fun) : impl(new RuleImpl(fun)) {}
Rule(RuleFun fun, bool keep) : impl(new RuleImpl(fun, keep)) {}
std::pair<Token_Iterator, bool> operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) {
return (*impl)(iter, end, parent);
}
Rule &operator=(const Rule &rule) {
int prev_id = impl->new_id;
*impl = *(rule.impl);
impl->new_id = prev_id;
return *this;
}
};
inline Rule operator>>(const Rule &lhs, const Rule &rhs) {
return Rule(boost::bind(And_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, lhs, rhs));
} }
}; inline Rule operator|(const Rule &lhs, const Rule &rhs) {
return Rule(boost::bind(Or_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, lhs, rhs));
}
inline Rule operator>>(const Rule &lhs, const Rule &rhs) { inline Rule operator*(const Rule &operand) {
return Rule(boost::bind(And_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, lhs, rhs)); return Rule(boost::bind(Kleene_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand));
} }
inline Rule operator|(const Rule &lhs, const Rule &rhs) { inline Rule operator+(const Rule &operand) {
return Rule(boost::bind(Or_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, lhs, rhs)); return Rule(boost::bind(Plus_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand));
} }
inline Rule operator*(const Rule &operand) { inline Rule operator~(const Rule &operand) {
return Rule(boost::bind(Kleene_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand)); return Rule(boost::bind(Optional_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand));
} }
inline Rule operator+(const Rule &operand) {
return Rule(boost::bind(Plus_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand));
}
inline Rule operator~(const Rule &operand) {
return Rule(boost::bind(Optional_Rule<Token_Iterator, Rule>, _1, _2, _3, _4, _5, operand));
}
template<typename ItrType, typename ParamType, template<typename ItrType, typename ParamType,
std::pair<ItrType,bool> (*Function)(ItrType, ItrType, TokenPtr, bool, int, ParamType)> std::pair<ItrType,bool> (*Function)(ItrType, ItrType, TokenPtr, bool, int, ParamType)>
struct Rule_Builder struct Rule_Builder
{
Rule_Builder(ParamType p, bool t_keep = true)
: m_p(p), m_keep(t_keep)
{ {
Rule_Builder(ParamType p, bool t_keep = true)
: m_p(p), m_keep(t_keep)
{
} }
// Auto conversion operator is the glue here. // Auto conversion operator is the glue here.
// In one sense this option cleans up the impl quite a bit, with much fewer code // In one sense this option cleans up the impl quite a bit, with much fewer code
// repeats in all the rule builders. // repeats in all the rule builders.
// In another sense, it might take a couple of tries to get it right. // In another sense, it might take a couple of tries to get it right.
operator Rule() { operator Rule() {
return Rule(boost::bind(Function, _1, _2, _3, _4, _5, m_p), m_keep); return Rule(boost::bind(Function, _1, _2, _3, _4, _5, m_p), m_keep);
} }
ParamType m_p; ParamType m_p;
bool m_keep; bool m_keep;
}; };
typedef Rule_Builder<Token_Iterator, Rule, &Epsilon_Rule<Token_Iterator, Rule> > Epsilon; typedef Rule_Builder<Token_Iterator, Rule, &Epsilon_Rule<Token_Iterator, Rule> > Epsilon;
typedef Rule_Builder<Token_Iterator, Rule, &Wrap_Rule<Token_Iterator, Rule> > Wrap; typedef Rule_Builder<Token_Iterator, Rule, &Wrap_Rule<Token_Iterator, Rule> > Wrap;
typedef Rule_Builder<Token_Iterator, Rule, &Ignore_Rule<Token_Iterator, Rule> > Ign; typedef Rule_Builder<Token_Iterator, Rule, &Ignore_Rule<Token_Iterator, Rule> > Ign;
typedef Rule_Builder<Token_Iterator, int, &Id_Rule<Token_Iterator> > Id; typedef Rule_Builder<Token_Iterator, int, &Id_Rule<Token_Iterator> > Id;
typedef Rule_Builder<Token_Iterator, const std::string&, &String_Rule<Token_Iterator> > Str; typedef Rule_Builder<Token_Iterator, const std::string&, &String_Rule<Token_Iterator> > Str;
}
#endif /* LANGKIT_PARSER_HPP_ */ #endif /* LANGKIT_PARSER_HPP_ */

View File

@ -24,7 +24,7 @@ const char *tokentype_to_string(int tokentype) {
return token_types[tokentype]; return token_types[tokentype];
} }
void debug_print(TokenPtr token, std::string prepend) { void debug_print(langkit::TokenPtr token, std::string prepend) {
std::cout << prepend << "Token: " << token->text << "(" << tokentype_to_string(token->identifier) << ") @ " << token->filename std::cout << prepend << "Token: " << token->text << "(" << tokentype_to_string(token->identifier) << ") @ " << token->filename
<< ": (" << token->start.line << ", " << token->start.column << ") to (" << ": (" << token->start.line << ", " << token->start.column << ") to ("
<< token->end.line << ", " << token->end.column << ") " << std::endl; << token->end.line << ", " << token->end.column << ") " << std::endl;
@ -34,7 +34,7 @@ void debug_print(TokenPtr token, std::string prepend) {
} }
} }
void debug_print(std::vector<TokenPtr> &tokens) { void debug_print(std::vector<langkit::TokenPtr> &tokens) {
for (unsigned int i = 0; i < tokens.size(); ++i) { for (unsigned int i = 0; i < tokens.size(); ++i) {
debug_print(tokens[i], ""); debug_print(tokens[i], "");
} }
@ -59,7 +59,8 @@ std::string load_file(const char *filename) {
return ret_val; return ret_val;
} }
void parse(std::vector<TokenPtr> &tokens, const char *filename) { void parse(std::vector<langkit::TokenPtr> &tokens, const char *filename) {
using namespace langkit;
/* /*
Rule lhs; Rule lhs;
@ -161,6 +162,7 @@ void parse(std::vector<TokenPtr> &tokens, const char *filename) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
using namespace langkit;
std::string input; std::string input;
Lexer lexer; Lexer lexer;