From 34c6b1721569ac23024ec648253b1352cda41aac Mon Sep 17 00:00:00 2001 From: Christian Kaeser Date: Sun, 8 Nov 2015 11:39:48 +0100 Subject: [PATCH 1/2] Fix broken escape sequence parsing after octal/hex escape The parser code just added the first character after an octal/hex sequence as raw text, resulting in erroneous data whenever another escape sequence follows directly after. --- .../chaiscript/language/chaiscript_parser.hpp | 52 ++++++++++--------- unittests/hex_escapes.chai | 1 + unittests/octal_escapes.chai | 1 + 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 37b717f..e50046d 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -961,6 +961,32 @@ namespace chaiscript } void parse(const char_type t_char, const int line, const int col, const std::string &filename) { + const bool is_octal_char = t_char >= '0' && t_char <= '7'; + + if (is_octal) { + if (is_octal_char) { + octal_matches.push_back(t_char); + + if (octal_matches.size() == 3) { + process_octal(); + } + return; + } else { + process_octal(); + } + } else if (is_hex) { + const bool is_hex_char = (t_char >= '0' && t_char <= '9') + || (t_char >= 'a' && t_char <= 'f') + || (t_char >= 'A' && t_char <= 'F'); + + if (is_hex_char) { + hex_matches.push_back(t_char); + return; + } else { + process_hex(); + } + } + if (t_char == '\\') { if (is_escaped) { match.push_back('\\'); @@ -970,31 +996,7 @@ namespace chaiscript } } else { if (is_escaped) { - const bool is_octal_char = t_char >= '0' && t_char <= '7'; - - if (is_octal) { - if (is_octal_char) { - octal_matches.push_back(t_char); - - if (octal_matches.size() == 3) { - process_octal(); - } - } else { - process_octal(); - match.push_back(t_char); - } - } else if (is_hex) { - const bool is_hex_char = (t_char >= '0' && t_char <= '9') - || (t_char >= 'a' && t_char <= 'f') - || (t_char >= 'A' && t_char <= 'F'); - - if (is_hex_char) { - hex_matches.push_back(t_char); - } else { - process_hex(); - match.push_back(t_char); - } - } else if (is_octal_char) { + if (is_octal_char) { is_octal = true; octal_matches.push_back(t_char); } else if (t_char == 'x') { diff --git a/unittests/hex_escapes.chai b/unittests/hex_escapes.chai index 283ef87..14ec62e 100644 --- a/unittests/hex_escapes.chai +++ b/unittests/hex_escapes.chai @@ -3,4 +3,5 @@ assert_equal("\x39", "9") assert_equal("\x039", "9") assert_equal("\x39g", "9g") assert_equal("b\x39g", "b9g") +assert_equal("\x39\x38g", "98g") diff --git a/unittests/octal_escapes.chai b/unittests/octal_escapes.chai index 83b5392..a220d57 100644 --- a/unittests/octal_escapes.chai +++ b/unittests/octal_escapes.chai @@ -3,4 +3,5 @@ assert_equal("\71", "9") assert_equal("\071", "9") assert_equal("\71a", "9a") assert_equal("b\71a", "b9a") +assert_equal("\71\70a", "98a") From 202204a82ac2160780e12a51ffa6b1357b3dea35 Mon Sep 17 00:00:00 2001 From: Christian Kaeser Date: Sun, 8 Nov 2015 18:36:16 +0100 Subject: [PATCH 2/2] Limit hexadecimal escape sequence length Helps with cases like "\xFFecho" by limiting the number of hex digits that will be parsed to maximum suitable for the char type. This rule differs from the C/C++ standard, but ChaiScript does not offer the same workaround options. Furthermore, without it having hexadecimal sequences longer than can fit into the char type is undefined behavior anyway. --- include/chaiscript/language/chaiscript_parser.hpp | 8 ++++++++ unittests/hex_escapes.chai | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index e50046d..6548521 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -981,6 +981,14 @@ namespace chaiscript if (is_hex_char) { hex_matches.push_back(t_char); + + if (hex_matches.size() == 2*sizeof(char_type)) { + // This rule differs from the C/C++ standard, but ChaiScript + // does not offer the same workaround options, and having + // hexadecimal sequences longer than can fit into the char + // type is undefined behavior anyway. + process_hex(); + } return; } else { process_hex(); diff --git a/unittests/hex_escapes.chai b/unittests/hex_escapes.chai index 14ec62e..fdd0a8a 100644 --- a/unittests/hex_escapes.chai +++ b/unittests/hex_escapes.chai @@ -1,6 +1,6 @@ assert_equal("\x39", "9") -assert_equal("\x039", "9") +assert_equal("\x39ec", "9ec") assert_equal("\x39g", "9g") assert_equal("b\x39g", "b9g") assert_equal("\x39\x38g", "98g")