commit 23fc75ca52afe51fd735fe4079eba42d27fed973 Author: Jonathan Turner Date: Mon May 25 18:52:01 2009 +0000 Initial check-in of langkit diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..08c0bd2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.6) + +project(langkit) + +SET (CMAKE_BUILD_TYPE gdb) +SET (CMAKE_C_FLAGS_GDB " -Wall -ggdb") +SET (CMAKE_CXX_FLAGS_GDB " -Wall -ggdb") + +find_package( Boost 1.36.0 COMPONENTS regex) +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + + add_executable(langkit langkit/lexer.cpp langkit/main.cpp langkit/langkit_lexer.cpp) + target_link_libraries(langkit ${Boost_LIBRARIES}) +endif() diff --git a/langkit/langkit_lexer.cpp b/langkit/langkit_lexer.cpp new file mode 100644 index 0000000..f694a2a --- /dev/null +++ b/langkit/langkit_lexer.cpp @@ -0,0 +1,56 @@ +// This file is distributed under the BSD License. +// See LICENSE.TXT for details. + +#include +#include "langkit_lexer.hpp" + +Lexer Lexer::operator<<(const Pattern &p) { + lex_patterns.push_back(p); + return *this; +} + +std::vector Lexer::lex(const std::string &input) { + std::vector::iterator iter, end; + //std::string::const_iterator str_end = input.end(); + std::vector retval; + bool found; + std::string::const_iterator input_iter = input.begin(); + + while (input_iter != input.end()) { + found = false; + for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) { + boost::match_results what; + if (regex_search(input_iter, input.end(), what, iter->regex, boost::match_continuous)) { + Token t(what[0], iter->identifier); + t.start.column = input_iter - input.begin(); + t.end.column = t.start.column + t.text.size(); + retval.push_back(t); + input_iter += t.text.size(); + found = true; + break; + } + } + if (!found) { + for (iter = skip_patterns.begin(), end = skip_patterns.end(); iter != end; ++iter) { + boost::match_results what; + if (regex_search(input_iter, input.end(), what, iter->regex, boost::match_continuous)) { + std::string whitespace(what[0]); + input_iter += whitespace.size(); + found = true; + break; + } + } + + if (!found) { + const std::string err(input_iter, input.end()); + std::cout << "Unknown string at: " << err << std::endl; + return retval; + } + } + } + return retval; +} + +void Lexer::set_skip(const Pattern &p) { + skip_patterns.push_back(p); +} diff --git a/langkit/langkit_lexer.hpp b/langkit/langkit_lexer.hpp new file mode 100644 index 0000000..49e97d2 --- /dev/null +++ b/langkit/langkit_lexer.hpp @@ -0,0 +1,47 @@ +// This file is distributed under the BSD License. +// See LICENSE.TXT for details. + +#ifndef LANGKIT_LEXER_HPP_ +#define LANGKIT_LEXER_HPP_ + +#include +#include + +struct File_Position { + int row; + int column; + char *filename; + + File_Position(int file_row, int file_column, char *fname) + : row(file_row), column(file_column), filename(fname) { } + + File_Position() : row(0), column(0), filename(NULL) { } +}; + +struct Pattern { + boost::regex regex; + int identifier; + + Pattern(const std::string ®exp, int id) : regex(regexp), identifier(id) { } +}; + +struct Token { + std::string text; + int identifier; + File_Position start, end; + + Token(const std::string &token_text, int id) : text(token_text), identifier(id) { } +}; + +struct Lexer { + std::vector lex_patterns; + std::vector skip_patterns; + + Lexer operator<<(const Pattern &p); + std::vector lex(const std::string &input); + + void set_skip(const Pattern &p); +}; + + +#endif /* LANGKIT_LEXER_HPP_ */ diff --git a/langkit/main.cpp b/langkit/main.cpp new file mode 100644 index 0000000..6a57461 --- /dev/null +++ b/langkit/main.cpp @@ -0,0 +1,87 @@ +// This file is distributed under the BSD License. +// See LICENSE.TXT for details. + +#include +#include + +#include "langkit_lexer.hpp" +//#include "lexer.hpp" +//#include "parser.hpp" +//#include "eval.hpp" + +class TokenType { public: enum Type { Whitespace, Identifier, Number, Operator, Parens_Open, Parens_Close, + Square_Open, Square_Close, Curly_Open, Curly_Close }; }; + +void debug_print(std::vector &tokens) { + for (unsigned int i = 0; i < tokens.size(); ++i) { + std::cout << "Token: " << tokens[i].text << "(" << tokens[i].identifier << ") @ " << tokens[i].start.column + << " to " << tokens[i].end.column << std::endl; + } +} + +int main(int argc, char *argv[]) { + std::string input; + + Lexer lexer; + lexer.set_skip(Pattern("\\s+", TokenType::Whitespace)); + lexer << Pattern("[A-Za-z]+", TokenType::Identifier); + lexer << Pattern("[0-9]+(\\.[0-9]+)?", TokenType::Number); + lexer << Pattern("[!@#$%^&*\\-+=/:]+", TokenType::Operator); + lexer << Pattern("\\(", TokenType::Parens_Open); + lexer << Pattern("\\)", TokenType::Parens_Close); + lexer << Pattern("\\[", TokenType::Square_Open); + lexer << Pattern("\\]", TokenType::Square_Close); + lexer << Pattern("\\{", TokenType::Curly_Open); + lexer << Pattern("\\}", TokenType::Curly_Close); + lexer << Pattern("[!@#$%^&*\\-+=/<>]+", TokenType::Operator); + + std::cout << "Expression> "; + std::getline(std::cin, input); + while (input != "quit") { + std::vector tokens = lexer.lex(input); + debug_print(tokens); + std::cout << "Expression> "; + std::getline(std::cin, input); + } +} + +/* +int main(int argc, char *argv[]) { + std::string input; + std::map symbols; + + std::cout << "Expression> "; + std::getline(std::cin, input); + while (input != "quit") { + if (input == "vars") { + //debug_print(symbols); + } + else { + try { + NodePtr lex_nodes = lex(input); + //clean_whitespace(lex_nodes); + //NodePtr parse_nodes = parse(lex_nodes); + //Result result = eval(parse_nodes, symbols); + + //debug_print(lex_nodes); + //std::cout << std::endl; + //debug_print(parse_nodes, ""); + //if (result.type == ResultType::NUMBER) { + // std::cout << result.value << std::endl; + //} + } + catch (LexerError &le) { + std::cerr << "Lexer error: " << le.reason << std::endl; + } + //catch (ParserError &pe) { + // std::cerr << "Parser error: " << pe.reason << std::endl; + //} + //catch (EvalError &ee) { + // std::cerr << "Eval error: " << ee.reason << std::endl; + //} + } + std::cout << "Expression> "; + std::getline(std::cin, input); + } +} +*/