From 29de485a466327433ec7fe2ab9db67a1820534df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Piwi=C5=84ski?= Date: Mon, 16 Dec 2019 22:43:40 +0100 Subject: [PATCH] unescape Backslash char in UTF8 unescape method --- CONTRIBUTORS | 1 + Foundation/src/UTF8String.cpp | 79 +++++++++++++-------- Foundation/testsuite/src/UTF8StringTest.cpp | 2 + 3 files changed, 54 insertions(+), 28 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 7a74a9cc0..102a4a299 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -50,3 +50,4 @@ Jeff Adams Martin Osborne Björn Schramke Francis Andre +Kacper Piwiński diff --git a/Foundation/src/UTF8String.cpp b/Foundation/src/UTF8String.cpp index e5020695a..d9336f3b2 100644 --- a/Foundation/src/UTF8String.cpp +++ b/Foundation/src/UTF8String.cpp @@ -32,11 +32,11 @@ namespace int UTF8::icompare(const std::string& str, std::string::size_type pos, std::string::size_type n, std::string::const_iterator it2, std::string::const_iterator end2) -{ +{ std::string::size_type sz = str.size(); if (pos > sz) pos = sz; if (pos + n > sz) n = sz - pos; - TextIterator uit1(str.begin() + pos, str.begin() + pos + n, utf8); + TextIterator uit1(str.begin() + pos, str.begin() + pos + n, utf8); TextIterator uend1(str.begin() + pos + n); TextIterator uit2(it2, end2, utf8); TextIterator uend2(end2); @@ -50,7 +50,7 @@ int UTF8::icompare(const std::string& str, std::string::size_type pos, std::stri return 1; ++uit1; ++uit2; } - + if (uit1 == uend1) return uit2 == uend2 ? 0 : -1; else @@ -162,9 +162,9 @@ std::string& UTF8::toLowerInPlace(std::string& str) void UTF8::removeBOM(std::string& str) { - if (str.size() >= 3 - && static_cast(str[0]) == 0xEF - && static_cast(str[1]) == 0xBB + if (str.size() >= 3 + && static_cast(str[0]) == 0xEF + && static_cast(str[1]) == 0xBB && static_cast(str[2]) == 0xBF) { str.erase(0, 3); @@ -264,42 +264,74 @@ std::string UTF8::unescape(const std::string::const_iterator& begin, const std:: //Invalid sequence! } - if (*it == 'n') + switch (*it) + { + case 'U': + { + char digs[9]; + std::memset(digs, 0, 9); + unsigned int dno = 0; + + it++; + while (it != end && Ascii::isHexDigit(*it) && dno < 8) + { + digs[dno++] = *it++; + } + if (dno > 0) + { + ch = std::strtol(digs, NULL, 16); + } + break; + } + case '\\': + { + ch = '\\'; + it++; + break; + } + case 'n': { ch = '\n'; it++; + break; } - else if (*it == 't') + case 't': { ch = '\t'; it++; + break; } - else if (*it == 'r') + case 'r': { ch = '\r'; it++; + break; } - else if (*it == 'b') + case 'b': { ch = '\b'; it++; + break; } - else if (*it == 'f') + case 'f': { ch = '\f'; it++; + break; } - else if (*it == 'v') + case 'v': { ch = '\v'; it++; + break; } - else if (*it == 'a') + case 'a': { ch = '\a'; it++; + break; } - else if (*it == 'u') + case 'u': { char digs[5]; std::memset(digs, 0, 5); @@ -345,23 +377,14 @@ std::string UTF8::unescape(const std::string::const_iterator& begin, const std:: } } } + break; } - else if (*it == 'U') + default: { - char digs[9]; - std::memset(digs, 0, 9); - unsigned int dno = 0; - - it++; - while (it != end && Ascii::isHexDigit(*it) && dno < 8) - { - digs[dno++] = *it++; - } - if (dno > 0) - { - ch = std::strtol(digs, NULL, 16); - } + //Invalid sequence! + break; } + }//end switch } unsigned char utf8[4]; diff --git a/Foundation/testsuite/src/UTF8StringTest.cpp b/Foundation/testsuite/src/UTF8StringTest.cpp index 42a48fbcf..6799ff35d 100644 --- a/Foundation/testsuite/src/UTF8StringTest.cpp +++ b/Foundation/testsuite/src/UTF8StringTest.cpp @@ -96,9 +96,11 @@ void UTF8StringTest::testUnescape() { std::string s1("A \\t, a \\u000B, and an \\u0007 walk into a |, and the barman says \\u0402"); std::string s2("A \\t, a \\v, and an \\a walk into a |, and the barman says \\u0402"); + std::string s3("\\\\"); assertTrue (UTF8::unescape(s1) == "A \t, a \v, and an \a walk into a |, and the barman says \xD0\x82"); assertTrue (UTF8::unescape(s2) == "A \t, a \v, and an \a walk into a |, and the barman says \xD0\x82"); + assertTrue (UTF8::unescape(s3) == "\\"); }