From 19a730b78b8aa3a38a468f927567a4d94d4f7f2c Mon Sep 17 00:00:00 2001
From: ELynx <esurovin@yandex.ru>
Date: Mon, 27 Jun 2016 11:52:22 +0300
Subject: [PATCH] Basic support for Unicode 'slash'uABCD escape notation -
 parser understands escape sequence and following data

---
 .../chaiscript/language/chaiscript_parser.hpp | 64 +++++++++++++++++--
 1 file changed, 60 insertions(+), 4 deletions(-)
diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp
index 1ea7371..4104278 100644
--- a/include/chaiscript/language/chaiscript_parser.hpp
+++ b/include/chaiscript/language/chaiscript_parser.hpp
@@ -928,6 +928,29 @@ namespace chaiscript
         return false;
       }
 
+      // Generic for u16, u32 and (probably) wchar
+      template<typename string_type>
+      static string_type str_from_ll(long long val)
+      {
+        return string_type(1, string_type::value_type(val)); //size, character
+      }
+
+      // Specialization for char
+      template<>
+      static std::string str_from_ll<std::string>(long long val)
+      {
+        std::string::value_type c[2];
+        c[1] = val;
+        c[0] = val >> 8;
+
+        if (c[0] == 0)
+        {
+          return std::string(1, c[1]); //size, character
+        }
+
+        return std::string(c, 2); //char buffer, size
+      }
+
       template<typename string_type>
       struct Char_Parser
       {
@@ -938,6 +961,7 @@ namespace chaiscript
         bool saw_interpolation_marker;
         bool is_octal;
         bool is_hex;
+        bool is_unicode;
         const bool interpolation_allowed;
 
         string_type octal_matches;
@@ -950,6 +974,7 @@ namespace chaiscript
             saw_interpolation_marker(false),
             is_octal(false),
             is_hex(false),
+            is_unicode(false),
             interpolation_allowed(t_interpolation_allowed)
         {
         }
@@ -964,6 +989,10 @@ namespace chaiscript
           if (is_hex) {
             process_hex();
           }
+
+          if (is_unicode) {
+            process_unicode();
+          }
         }
 
         void process_hex()
@@ -985,9 +1014,23 @@ namespace chaiscript
           is_octal = false;
         }
 
+
+        void process_unicode()
+        {
+          auto val = stoll(hex_matches, 0, 16);
+          hex_matches.clear();
+          match += str_from_ll<string_type>(val);
+          is_escaped = false;
+          is_unicode = false;
+        }
+
         void parse(const char_type t_char, const int line, const int col, const std::string &filename) {
           const bool is_octal_char = t_char >= '0' && t_char <= '7';
 
+          const bool is_hex_char  = (t_char >= '0' && t_char <= '9')
+                                 || (t_char >= 'a' && t_char <= 'f')
+                                 || (t_char >= 'A' && t_char <= 'F');
+
           if (is_octal) {
             if (is_octal_char) {
               octal_matches.push_back(t_char);
@@ -1000,10 +1043,6 @@ namespace chaiscript
               process_octal();
             }
           } else if (is_hex) {
-            const bool is_hex_char = (t_char >= '0' && t_char <= '9')
-                                  || (t_char >= 'a' && t_char <= 'f')
-                                  || (t_char >= 'A' && t_char <= 'F');
-
             if (is_hex_char) {
               hex_matches.push_back(t_char);
 
@@ -1018,6 +1057,21 @@ namespace chaiscript
             } else {
               process_hex();
             }
+          } else if (is_unicode) {
+            if (is_hex_char) {
+              hex_matches.push_back(t_char);
+
+            if(hex_matches.size() == 4) {
+              // Format is specified to be 'slash'uABCD
+              // on collecting from A to D do parsing
+              process_unicode();
+            }
+            return;
+            } else {
+              // Not a unicode anymore, try parsing any way
+              // May be someone used 'slash'uAA only
+              process_unicode();
+            }
           }
 
           if (t_char == '\\') {
@@ -1034,6 +1088,8 @@ namespace chaiscript
                 octal_matches.push_back(t_char);
               } else if (t_char == 'x') {
                 is_hex = true;
+              } else if (t_char == 'u') {
+                is_unicode = true;
               } else {
                 switch (t_char) {
                   case ('\'') : match.push_back('\''); break;