163 lines
5.3 KiB
Plaintext
163 lines
5.3 KiB
Plaintext
|
// Copyright (c) 2001-2009 Hartmut Kaiser
|
||
|
//
|
||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||
|
|
||
|
// This example is the equivalent to the following lex program:
|
||
|
//
|
||
|
// %{
|
||
|
// /* INITIAL is the default start state. COMMENT is our new */
|
||
|
// /* state where we remove comments. */
|
||
|
// %}
|
||
|
//
|
||
|
// %s COMMENT
|
||
|
// %%
|
||
|
// <INITIAL>"//".* ;
|
||
|
// <INITIAL>"/*" BEGIN COMMENT;
|
||
|
// <INITIAL>. ECHO;
|
||
|
// <INITIAL>[\n] ECHO;
|
||
|
// <COMMENT>"*/" BEGIN INITIAL;
|
||
|
// <COMMENT>. ;
|
||
|
// <COMMENT>[\n] ;
|
||
|
// %%
|
||
|
//
|
||
|
// main()
|
||
|
// {
|
||
|
// yylex();
|
||
|
// }
|
||
|
//
|
||
|
// Its purpose is to strip comments out of C code.
|
||
|
//
|
||
|
// Additionally this example demonstrates the use of lexer states to structure
|
||
|
// the lexer definition.
|
||
|
|
||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||
|
|
||
|
#include <boost/config/warning_disable.hpp>
|
||
|
#include <boost/spirit/include/qi.hpp>
|
||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||
|
#include <boost/spirit/include/phoenix_container.hpp>
|
||
|
|
||
|
#include <iostream>
|
||
|
#include <string>
|
||
|
|
||
|
#include "example.hpp"
|
||
|
|
||
|
using namespace boost::spirit;
|
||
|
using namespace boost::spirit::qi;
|
||
|
using namespace boost::spirit::lex;
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||
|
// lexer type.
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
enum tokenids
|
||
|
{
|
||
|
IDANY = lex::min_token_id + 10
|
||
|
};
|
||
|
|
||
|
template <typename Lexer>
|
||
|
struct strip_comments_tokens : lexer<Lexer>
|
||
|
{
|
||
|
strip_comments_tokens()
|
||
|
{
|
||
|
// define tokens and associate them with the lexer
|
||
|
cppcomment = "//[^\n]*";
|
||
|
ccomment = "/\\*";
|
||
|
endcomment = "\\*/";
|
||
|
|
||
|
// The following tokens are associated with the default lexer state
|
||
|
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
||
|
// strictly optional.
|
||
|
this->self.add
|
||
|
(cppcomment) // no explicit token id is associated
|
||
|
(ccomment)
|
||
|
(".", IDANY) // IDANY is the token id associated with this token
|
||
|
// definition
|
||
|
;
|
||
|
|
||
|
// The following tokens are associated with the lexer state "COMMENT".
|
||
|
// We switch lexer states from inside the parsing process using the
|
||
|
// in_state("COMMENT")[] parser component as shown below.
|
||
|
this->self("COMMENT").add
|
||
|
(endcomment)
|
||
|
(".", IDANY)
|
||
|
;
|
||
|
}
|
||
|
|
||
|
token_def<> cppcomment, ccomment, endcomment;
|
||
|
};
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
// Grammar definition
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
template <typename Iterator>
|
||
|
struct strip_comments_grammar : grammar<Iterator>
|
||
|
{
|
||
|
template <typename TokenDef>
|
||
|
strip_comments_grammar(TokenDef const& tok)
|
||
|
: strip_comments_grammar::base_type(start)
|
||
|
{
|
||
|
// The in_state("COMMENT")[...] parser component switches the lexer
|
||
|
// state to be 'COMMENT' during the matching of the embedded parser.
|
||
|
start = *( tok.ccomment
|
||
|
>> in_state("COMMENT")
|
||
|
[
|
||
|
// the lexer is in the 'COMMENT' state during
|
||
|
// matching of the following parser components
|
||
|
*token(IDANY) >> tok.endcomment
|
||
|
]
|
||
|
| tok.cppcomment
|
||
|
| token(IDANY) [ std::cout << _1 ]
|
||
|
)
|
||
|
;
|
||
|
}
|
||
|
|
||
|
rule<Iterator> start;
|
||
|
};
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
int main(int argc, char* argv[])
|
||
|
{
|
||
|
// iterator type used to expose the underlying input stream
|
||
|
typedef std::string::iterator base_iterator_type;
|
||
|
|
||
|
// lexer type
|
||
|
typedef lexertl::lexer<lexertl::token<base_iterator_type> > lexer_type;
|
||
|
|
||
|
// iterator type exposed by the lexer
|
||
|
typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
|
||
|
|
||
|
// now we use the types defined above to create the lexer and grammar
|
||
|
// object instances needed to invoke the parsing process
|
||
|
strip_comments_tokens<lexer_type> strip_comments; // Our lexer
|
||
|
strip_comments_grammar<iterator_type> g (strip_comments); // Our grammar
|
||
|
|
||
|
// Parsing is done based on the token stream, not the character
|
||
|
// stream read from the input.
|
||
|
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
||
|
base_iterator_type first = str.begin();
|
||
|
|
||
|
bool r = tokenize_and_parse(first, str.end(), strip_comments, g);
|
||
|
|
||
|
if (r) {
|
||
|
std::cout << "-------------------------\n";
|
||
|
std::cout << "Parsing succeeded\n";
|
||
|
std::cout << "-------------------------\n";
|
||
|
}
|
||
|
else {
|
||
|
std::string rest(first, str.end());
|
||
|
std::cout << "-------------------------\n";
|
||
|
std::cout << "Parsing failed\n";
|
||
|
std::cout << "stopped at: \"" << rest << "\"\n";
|
||
|
std::cout << "-------------------------\n";
|
||
|
}
|
||
|
|
||
|
std::cout << "Bye... :-) \n\n";
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
|