diff --git a/langkit/langkit_parser.cpp b/langkit/langkit_parser.cpp index d63aed5..7418685 100644 --- a/langkit/langkit_parser.cpp +++ b/langkit/langkit_parser.cpp @@ -10,7 +10,7 @@ #include "langkit_parser.hpp" std::pair String_Rule(Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, const std::string &val) { - if (*iter != *end) { + if (iter != end) { if ((*iter)->text == val) { if (keep) { parent->children.push_back(*iter); @@ -23,7 +23,7 @@ std::pair String_Rule(Token_Iterator iter, Token_Iterator } std::pair Type_Rule(Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, const int val) { - if (*iter != *end) { + if (iter != end) { if ((*iter)->identifier == val) { if (keep) { parent->children.push_back(*iter); @@ -44,7 +44,7 @@ std::pair Or_Rule(Token_Iterator iter, Token_Iterator end, parent = TokenPtr(new Token("", new_id, parent->filename)); } - if (*iter != *end) { + if (iter != end) { std::pair result = lhs(iter, end, parent); if (result.second) { @@ -83,7 +83,7 @@ std::pair And_Rule(Token_Iterator iter, Token_Iterator end prev_size = parent->children.size(); - if (*iter != *end) { + if (iter != end) { std::pair result = lhs(iter, end, parent); if (result.second) { @@ -105,6 +105,92 @@ std::pair And_Rule(Token_Iterator iter, Token_Iterator end return std::pair(iter, false); } +std::pair Kleene_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule) { + + TokenPtr prev_parent = parent; + std::pair result; + + if (new_id != -1) { + parent = TokenPtr(new Token("", new_id, parent->filename)); + } + + result.second = true; + while ((iter != end) && (result.second == true)) { + result = rule(iter, end, parent); + iter = result.first; + } + + if (new_id != -1) { + prev_parent->children.push_back(parent); + } + return std::pair(result.first, true); +} + +std::pair Plus_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule) { + + unsigned int prev_size; + TokenPtr prev_parent = parent; + Token_Iterator loop_iter = iter; + + if (new_id != -1) { + parent = TokenPtr(new Token("", new_id, parent->filename)); + } + + prev_size = parent->children.size(); + + if (iter != end) { + std::pair result; + result = rule(loop_iter, end, parent); + + if (result.second == true) { + loop_iter = result.first; + result.second = true; + while ((iter != end) && (result.second == true)) { + result = rule(loop_iter, end, parent); + loop_iter = result.first; + } + + if (new_id != -1) { + prev_parent->children.push_back(parent); + } + + return std::pair(result.first, true); + } + } + + if (parent->children.size() != prev_size) { + //Clear out the partial matches + parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); + } + + return std::pair(iter, false); +} + +std::pair Optional_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule) { + + TokenPtr prev_parent = parent; + + if (new_id != -1) { + parent = TokenPtr(new Token("", new_id, parent->filename)); + } + + std::pair result; + result.second = true; + if ((iter != end) && (result.second == true)) { + result = rule(iter, end, parent); + iter = result.first; + } + + if (new_id != -1) { + prev_parent->children.push_back(parent); + } + return std::pair(result.first, true); +} + + Rule Str(const std::string &text, bool keep) { return Rule(boost::bind(String_Rule, _1, _2, _3, _4, _5, text), keep); } diff --git a/langkit/langkit_parser.hpp b/langkit/langkit_parser.hpp index f1beb5e..f600071 100644 --- a/langkit/langkit_parser.hpp +++ b/langkit/langkit_parser.hpp @@ -36,6 +36,15 @@ std::pair Or_Rule std::pair And_Rule (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule lhs, struct Rule rhs); +std::pair Kleene_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule); + +std::pair Plus_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule); + +std::pair Optional_Rule + (Token_Iterator iter, Token_Iterator end, TokenPtr parent, bool keep, int new_id, struct Rule rule); + struct Rule { RuleImplPtr impl; @@ -64,6 +73,18 @@ struct Rule { return Rule(boost::bind(And_Rule, _1, _2, _3, _4, _5, *this, rhs)); } + Rule operator*() { + return Rule(boost::bind(Kleene_Rule, _1, _2, _3, _4, _5, *this)); + } + + Rule operator+() { + return Rule(boost::bind(Plus_Rule, _1, _2, _3, _4, _5, *this)); + } + + Rule operator~() { + return Rule(boost::bind(Optional_Rule, _1, _2, _3, _4, _5, *this)); + } + //const RuleImplPtr get_impl() const { return impl; } //private: diff --git a/langkit/main.cpp b/langkit/main.cpp index 0b07eab..15f3c9e 100644 --- a/langkit/main.cpp +++ b/langkit/main.cpp @@ -65,11 +65,11 @@ void parse(std::vector &tokens) { //Example: "def add(x,y)" - //To do the params for real we need kleene star and other looping parsers + Rule params; Rule rule(TokenType::Function_Def); - rule = Ign(Str("def")) << Id(TokenType::Identifier) << Ign(Str("(")) << params << Ign(Str(")")); - params = Id(TokenType::Identifier) << Ign(Str(",")) << Id(TokenType::Identifier); + rule = Ign(Str("def")) << Id(TokenType::Identifier) << ~(Ign(Str("(")) << ~params << Ign(Str(")"))); + params = Id(TokenType::Identifier) << *(Ign(Str(",")) << Id(TokenType::Identifier)); Token_Iterator iter = tokens.begin(), end = tokens.end(); TokenPtr parent(new Token("Root", 0, "test"));