From 34c6b1721569ac23024ec648253b1352cda41aac Mon Sep 17 00:00:00 2001
From: Christian Kaeser <christian@khs-net.de>
Date: Sun, 8 Nov 2015 11:39:48 +0100
Subject: [PATCH 1/2] Fix broken escape sequence parsing after octal/hex escape

The parser code just added the first character after an octal/hex sequence
as raw text, resulting in erroneous data whenever another escape
sequence follows directly after.
---
 .../chaiscript/language/chaiscript_parser.hpp | 52 ++++++++++---------
 unittests/hex_escapes.chai                    |  1 +
 unittests/octal_escapes.chai                  |  1 +
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp
index 37b717f..e50046d 100644
--- a/include/chaiscript/language/chaiscript_parser.hpp
+++ b/include/chaiscript/language/chaiscript_parser.hpp
@@ -961,6 +961,32 @@ namespace chaiscript
         }
 
         void parse(const char_type t_char, const int line, const int col, const std::string &filename) {
+          const bool is_octal_char = t_char >= '0' && t_char <= '7';
+
+          if (is_octal) {
+            if (is_octal_char) {
+              octal_matches.push_back(t_char);
+
+              if (octal_matches.size() == 3) {
+                process_octal();
+              }
+              return;
+            } else {
+              process_octal();
+            }
+          } else if (is_hex) {
+            const bool is_hex_char = (t_char >= '0' && t_char <= '9')
+                                  || (t_char >= 'a' && t_char <= 'f')
+                                  || (t_char >= 'A' && t_char <= 'F');
+
+            if (is_hex_char) {
+              hex_matches.push_back(t_char);
+              return;
+            } else {
+              process_hex();
+            }
+          }
+
           if (t_char == '\\') {
             if (is_escaped) {
               match.push_back('\\');
@@ -970,31 +996,7 @@ namespace chaiscript
             }
           } else {
             if (is_escaped) {
-              const bool is_octal_char = t_char >= '0' && t_char <= '7';
-
-              if (is_octal) {
-                if (is_octal_char) {
-                  octal_matches.push_back(t_char);
-
-                  if (octal_matches.size() == 3) {
-                    process_octal();
-                  }
-                } else {
-                  process_octal();
-                  match.push_back(t_char);
-                }
-              } else if (is_hex) {
-                const bool is_hex_char = (t_char >= '0' && t_char <= '9')
-                                      || (t_char >= 'a' && t_char <= 'f')
-                                      || (t_char >= 'A' && t_char <= 'F');
-
-                if (is_hex_char) {
-                  hex_matches.push_back(t_char);
-                } else {
-                  process_hex();
-                  match.push_back(t_char);
-                }
-              } else if (is_octal_char) {
+              if (is_octal_char) {
                 is_octal = true;
                 octal_matches.push_back(t_char);
               } else if (t_char == 'x') {
diff --git a/unittests/hex_escapes.chai b/unittests/hex_escapes.chai
index 283ef87..14ec62e 100644
--- a/unittests/hex_escapes.chai
+++ b/unittests/hex_escapes.chai
@@ -3,4 +3,5 @@ assert_equal("\x39", "9")
 assert_equal("\x039", "9")
 assert_equal("\x39g", "9g")
 assert_equal("b\x39g", "b9g")
+assert_equal("\x39\x38g", "98g")
 
diff --git a/unittests/octal_escapes.chai b/unittests/octal_escapes.chai
index 83b5392..a220d57 100644
--- a/unittests/octal_escapes.chai
+++ b/unittests/octal_escapes.chai
@@ -3,4 +3,5 @@ assert_equal("\71", "9")
 assert_equal("\071", "9")
 assert_equal("\71a", "9a")
 assert_equal("b\71a", "b9a")
+assert_equal("\71\70a", "98a")
 

From 202204a82ac2160780e12a51ffa6b1357b3dea35 Mon Sep 17 00:00:00 2001
From: Christian Kaeser <christian@khs-net.de>
Date: Sun, 8 Nov 2015 18:36:16 +0100
Subject: [PATCH 2/2] Limit hexadecimal escape sequence length

Helps with cases like "\xFFecho" by limiting the number of hex digits
that will be parsed to maximum suitable for the char type.
This rule differs from the C/C++ standard, but ChaiScript does not offer
the same workaround options.
Furthermore, without it having hexadecimal sequences longer than can fit
into the char type is undefined behavior anyway.
---
 include/chaiscript/language/chaiscript_parser.hpp | 8 ++++++++
 unittests/hex_escapes.chai                        | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp
index e50046d..6548521 100644
--- a/include/chaiscript/language/chaiscript_parser.hpp
+++ b/include/chaiscript/language/chaiscript_parser.hpp
@@ -981,6 +981,14 @@ namespace chaiscript
 
             if (is_hex_char) {
               hex_matches.push_back(t_char);
+
+              if (hex_matches.size() == 2*sizeof(char_type)) {
+                // This rule differs from the C/C++ standard, but ChaiScript
+                // does not offer the same workaround options, and having
+                // hexadecimal sequences longer than can fit into the char
+                // type is undefined behavior anyway.
+                process_hex();
+              }
               return;
             } else {
               process_hex();
diff --git a/unittests/hex_escapes.chai b/unittests/hex_escapes.chai
index 14ec62e..fdd0a8a 100644
--- a/unittests/hex_escapes.chai
+++ b/unittests/hex_escapes.chai
@@ -1,6 +1,6 @@
 
 assert_equal("\x39", "9")
-assert_equal("\x039", "9")
+assert_equal("\x39ec", "9ec")
 assert_equal("\x39g", "9g")
 assert_equal("b\x39g", "b9g")
 assert_equal("\x39\x38g", "98g")