// This file is distributed under the BSD License. // See LICENSE.TXT for details. #ifndef LANGKIT_PARSER_HPP_ #define LANGKIT_PARSER_HPP_ #include #include "langkit_lexer.hpp" namespace langkit { struct RuleImpl; typedef std::vector::iterator Token_Iterator; typedef boost::function(Token_Iterator, Token_Iterator, TokenPtr, bool, int)> RuleFun; typedef std::tr1::shared_ptr RuleImplPtr; struct RuleImpl { RuleFun rule; bool keep; int new_id; RuleImpl() : keep(true), new_id(-1) {} RuleImpl(int id) : keep(true), new_id(id) {} RuleImpl(RuleFun fun) : rule(fun), keep(true), new_id(-1) {} RuleImpl(RuleFun fun, bool keep_match) : rule(fun), keep(keep_match), new_id(-1) {} std::pair operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) { return rule(iter, end, parent, keep, new_id); } }; //struct Rule; template std::pair String_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const std::string &val) { if (iter != end) { if ((*iter)->text == val) { if (keep) { parent->children.push_back(*iter); } return std::pair(++iter, true); } } return std::pair(iter, false); } template std::pair Id_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, const int val) { if (iter != end) { if ((*iter)->identifier == val) { if (keep) { parent->children.push_back(*iter); } return std::pair(++iter, true); } } return std::pair(iter, false); } template std::pair Or_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) { T_Iter new_iter; unsigned int prev_size; TokenPtr prev_parent = parent; if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } prev_size = parent->children.size(); if (iter != end) { std::pair result = lhs(iter, end, parent); if (result.second) { if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } else { if (parent->children.size() != prev_size) { //Clear out the partial matches parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); } result = rhs(iter, end, parent); if (result.second) { if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } } } if (parent->children.size() != prev_size) { //Clear out the partial matches parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); } return std::pair(iter, false); } template std::pair And_Rule(T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type lhs, R_Type rhs) { T_Iter lhs_iter, rhs_iter; unsigned int prev_size; TokenPtr prev_parent = parent; if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } prev_size = parent->children.size(); if (iter != end) { std::pair result = lhs(iter, end, parent); if (result.second) { result = rhs(result.first, end, parent); if (result.second) { if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } } } if (parent->children.size() != prev_size) { //Clear out the partial matches parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); } return std::pair(iter, false); } template std::pair Kleene_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { TokenPtr prev_parent = parent; std::pair result; T_Iter new_iter = iter; if (iter != end) { if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } result.second = true; while (result.second == true) { result = rule(new_iter, end, parent); new_iter = result.first; } if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } else { return std::pair(iter, true); } } template std::pair Plus_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { unsigned int prev_size; TokenPtr prev_parent = parent; T_Iter loop_iter = iter; if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } prev_size = parent->children.size(); if (iter != end) { std::pair result; result = rule(loop_iter, end, parent); if (result.second == true) { loop_iter = result.first; result.second = true; while ((loop_iter != end) && (result.second == true)) { result = rule(loop_iter, end, parent); loop_iter = result.first; } if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } } if (parent->children.size() != prev_size) { //Clear out the partial matches parent->children.erase(parent->children.begin() + prev_size, parent->children.end()); } return std::pair(iter, false); } template std::pair Optional_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { TokenPtr prev_parent = parent; T_Iter new_iter = iter; if (iter != end) { if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } std::pair result; result.second = true; if ((new_iter != end) && (result.second == true)) { result = rule(new_iter, end, parent); new_iter = result.first; } if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, true); } else { return std::pair(iter, true); } } template std::pair Epsilon_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { TokenPtr prev_parent = parent; T_Iter new_iter = iter; if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } std::pair result; if ((new_iter != end)) { result = rule(new_iter, end, parent); new_iter = result.first; } if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(iter, result.second); } template std::pair Wrap_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { TokenPtr prev_parent = parent; T_Iter new_iter = iter; if (new_id != -1) { parent = TokenPtr(new Token("", new_id, parent->filename)); } std::pair result; if ((new_iter != end)) { result = rule(new_iter, end, parent); new_iter = result.first; } if (new_id != -1) { parent->filename = (*iter)->filename; parent->start = (*iter)->start; if (result.first == iter) { parent->end = (*iter)->start; } else { parent->end = (*(result.first - 1))->end; } prev_parent->children.push_back(parent); } return std::pair(result.first, result.second); } template std::pair Ignore_Rule (T_Iter iter, T_Iter end, TokenPtr parent, bool keep, int new_id, R_Type rule) { rule.impl->keep = false; return rule(iter, end, parent); } struct Rule { RuleImplPtr impl; Rule() : impl(new RuleImpl()) {} Rule(int id) : impl(new RuleImpl(id)) {} Rule(RuleFun fun) : impl(new RuleImpl(fun)) {} Rule(RuleFun fun, bool keep) : impl(new RuleImpl(fun, keep)) {} std::pair operator()(Token_Iterator iter, Token_Iterator end, TokenPtr parent) { return (*impl)(iter, end, parent); } Rule &operator=(const Rule &rule) { int prev_id = impl->new_id; *impl = *(rule.impl); impl->new_id = prev_id; return *this; } }; inline Rule operator>>(const Rule &lhs, const Rule &rhs) { return Rule(boost::bind(And_Rule, _1, _2, _3, _4, _5, lhs, rhs)); } inline Rule operator|(const Rule &lhs, const Rule &rhs) { return Rule(boost::bind(Or_Rule, _1, _2, _3, _4, _5, lhs, rhs)); } inline Rule operator*(const Rule &operand) { return Rule(boost::bind(Kleene_Rule, _1, _2, _3, _4, _5, operand)); } inline Rule operator+(const Rule &operand) { return Rule(boost::bind(Plus_Rule, _1, _2, _3, _4, _5, operand)); } inline Rule operator~(const Rule &operand) { return Rule(boost::bind(Optional_Rule, _1, _2, _3, _4, _5, operand)); } template (*Function)(ItrType, ItrType, TokenPtr, bool, int, ParamType)> struct Rule_Builder { Rule_Builder(ParamType p, bool t_keep = true) : m_p(p), m_keep(t_keep) { } // Auto conversion operator is the glue here. // In one sense this option cleans up the impl quite a bit, with much fewer code // repeats in all the rule builders. // In another sense, it might take a couple of tries to get it right. operator Rule() { return Rule(boost::bind(Function, _1, _2, _3, _4, _5, m_p), m_keep); } ParamType m_p; bool m_keep; }; typedef Rule_Builder > Epsilon; typedef Rule_Builder > Wrap; typedef Rule_Builder > Ign; typedef Rule_Builder > Id; typedef Rule_Builder > Str; } #endif /* LANGKIT_PARSER_HPP_ */