Initial check-in of langkit

2009-05-25 18:52:01 +00:00
commit 23fc75ca52
4 changed files with 205 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(langkit)
+
+SET (CMAKE_BUILD_TYPE gdb)
+SET (CMAKE_C_FLAGS_GDB " -Wall -ggdb")
+SET (CMAKE_CXX_FLAGS_GDB " -Wall -ggdb")
+
+find_package( Boost 1.36.0 COMPONENTS regex)
+if(Boost_FOUND)
+   include_directories(${Boost_INCLUDE_DIRS})
+   
+   add_executable(langkit langkit/lexer.cpp langkit/main.cpp langkit/langkit_lexer.cpp)
+   target_link_libraries(langkit ${Boost_LIBRARIES})
+endif()
--- a/langkit/langkit_lexer.cpp
+++ b/langkit/langkit_lexer.cpp
@@ -0,0 +1,56 @@
+// This file is distributed under the BSD License.
+// See LICENSE.TXT for details.
+
+#include <iostream>
+#include "langkit_lexer.hpp"
+
+Lexer Lexer::operator<<(const Pattern &p) {
+    lex_patterns.push_back(p);
+    return *this;
+}
+
+std::vector<Token> Lexer::lex(const std::string &input) {
+    std::vector<Pattern>::iterator iter, end;
+    //std::string::const_iterator str_end = input.end();
+    std::vector<Token> retval;
+    bool found;
+    std::string::const_iterator input_iter = input.begin();
+
+    while (input_iter != input.end()) {
+        found = false;
+        for (iter = lex_patterns.begin(), end = lex_patterns.end(); iter != end; ++iter) {
+            boost::match_results<std::string::const_iterator> what;
+            if (regex_search(input_iter, input.end(), what, iter->regex, boost::match_continuous)) {
+                Token t(what[0], iter->identifier);
+                t.start.column = input_iter - input.begin();
+                t.end.column = t.start.column + t.text.size();
+                retval.push_back(t);
+                input_iter += t.text.size();
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            for (iter = skip_patterns.begin(), end = skip_patterns.end(); iter != end; ++iter) {
+                boost::match_results<std::string::const_iterator> what;
+                if (regex_search(input_iter, input.end(), what, iter->regex, boost::match_continuous)) {
+                    std::string whitespace(what[0]);
+                    input_iter += whitespace.size();
+                    found = true;
+                    break;
+                }
+            }
+
+            if (!found) {
+                const std::string err(input_iter, input.end());
+                std::cout << "Unknown string at: " << err << std::endl;
+                return retval;
+            }
+        }
+    }
+    return retval;
+}
+
+void Lexer::set_skip(const Pattern &p) {
+    skip_patterns.push_back(p);
+}
--- a/langkit/langkit_lexer.hpp
+++ b/langkit/langkit_lexer.hpp
@@ -0,0 +1,47 @@
+// This file is distributed under the BSD License.
+// See LICENSE.TXT for details.
+
+#ifndef LANGKIT_LEXER_HPP_
+#define LANGKIT_LEXER_HPP_
+
+#include <string>
+#include <boost/regex.hpp>
+
+struct File_Position {
+    int row;
+    int column;
+    char *filename;
+
+    File_Position(int file_row, int file_column, char *fname)
+        : row(file_row), column(file_column), filename(fname) { }
+
+    File_Position() : row(0), column(0), filename(NULL) { }
+};
+
+struct Pattern {
+    boost::regex regex;
+    int identifier;
+
+    Pattern(const std::string &regexp, int id) : regex(regexp), identifier(id) { }
+};
+
+struct Token {
+    std::string text;
+    int identifier;
+    File_Position start, end;
+
+    Token(const std::string &token_text, int id) : text(token_text), identifier(id) { }
+};
+
+struct Lexer {
+    std::vector<Pattern> lex_patterns;
+    std::vector<Pattern> skip_patterns;
+
+    Lexer operator<<(const Pattern &p);
+    std::vector<Token> lex(const std::string &input);
+
+    void set_skip(const Pattern &p);
+};
+
+
+#endif /* LANGKIT_LEXER_HPP_ */
--- a/langkit/main.cpp
+++ b/langkit/main.cpp
@@ -0,0 +1,87 @@
+// This file is distributed under the BSD License.
+// See LICENSE.TXT for details.
+
+#include <iostream>
+#include <map>
+
+#include "langkit_lexer.hpp"
+//#include "lexer.hpp"
+//#include "parser.hpp"
+//#include "eval.hpp"
+
+class TokenType { public: enum Type { Whitespace, Identifier, Number, Operator, Parens_Open, Parens_Close,
+    Square_Open, Square_Close, Curly_Open, Curly_Close }; };
+
+void debug_print(std::vector<Token> &tokens) {
+    for (unsigned int i = 0; i < tokens.size(); ++i) {
+        std::cout << "Token: " << tokens[i].text << "(" << tokens[i].identifier << ") @ " << tokens[i].start.column
+            << " to " << tokens[i].end.column << std::endl;
+    }
+}
+
+int main(int argc, char *argv[]) {
+    std::string input;
+
+    Lexer lexer;
+    lexer.set_skip(Pattern("\\s+", TokenType::Whitespace));
+    lexer << Pattern("[A-Za-z]+", TokenType::Identifier);
+    lexer << Pattern("[0-9]+(\\.[0-9]+)?", TokenType::Number);
+    lexer << Pattern("[!@#$%^&*\\-+=/:]+", TokenType::Operator);
+    lexer << Pattern("\\(", TokenType::Parens_Open);
+    lexer << Pattern("\\)", TokenType::Parens_Close);
+    lexer << Pattern("\\[", TokenType::Square_Open);
+    lexer << Pattern("\\]", TokenType::Square_Close);
+    lexer << Pattern("\\{", TokenType::Curly_Open);
+    lexer << Pattern("\\}", TokenType::Curly_Close);
+    lexer << Pattern("[!@#$%^&*\\-+=/<>]+", TokenType::Operator);
+
+    std::cout << "Expression> ";
+    std::getline(std::cin, input);
+    while (input != "quit") {
+        std::vector<Token> tokens = lexer.lex(input);
+        debug_print(tokens);
+        std::cout << "Expression> ";
+        std::getline(std::cin, input);
+    }
+}
+
+/*
+int main(int argc, char *argv[]) {
+    std::string input;
+    std::map<std::string, NodePtr> symbols;
+
+    std::cout << "Expression> ";
+    std::getline(std::cin, input);
+    while (input != "quit") {
+        if (input == "vars") {
+            //debug_print(symbols);
+        }
+        else {
+            try {
+                NodePtr lex_nodes = lex(input);
+                //clean_whitespace(lex_nodes);
+                //NodePtr parse_nodes = parse(lex_nodes);
+                //Result result = eval(parse_nodes, symbols);
+
+                //debug_print(lex_nodes);
+                //std::cout << std::endl;
+                //debug_print(parse_nodes, "");
+                //if (result.type == ResultType::NUMBER) {
+                //    std::cout << result.value << std::endl;
+                //}
+            }
+            catch (LexerError &le) {
+                std::cerr << "Lexer error: " << le.reason << std::endl;
+            }
+            //catch (ParserError &pe) {
+            //    std::cerr << "Parser error: " << pe.reason << std::endl;
+            //}
+            //catch (EvalError &ee) {
+            //    std::cerr << "Eval error: " << ee.reason << std::endl;
+            //}
+        }
+        std::cout << "Expression> ";
+        std::getline(std::cin, input);
+    }
+}
+*/