mirror of
https://github.com/open-source-parsers/jsoncpp.git
synced 2025-04-05 18:41:10 +02:00
Serialize UTF-8 string with Unicode escapes (#687)
Squashed and merged.
This commit is contained in:
parent
a3a4059367
commit
42a161fc80
@ -81,24 +81,6 @@ typedef std::unique_ptr<StreamWriter> StreamWriterPtr;
|
|||||||
typedef std::auto_ptr<StreamWriter> StreamWriterPtr;
|
typedef std::auto_ptr<StreamWriter> StreamWriterPtr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static bool containsControlCharacter(const char* str) {
|
|
||||||
while (*str) {
|
|
||||||
if (isControlCharacter(*(str++)))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool containsControlCharacter0(const char* str, unsigned len) {
|
|
||||||
char const* end = str + len;
|
|
||||||
while (end != str) {
|
|
||||||
if (isControlCharacter(*str) || 0==*str)
|
|
||||||
return true;
|
|
||||||
++str;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
JSONCPP_STRING valueToString(LargestInt value) {
|
JSONCPP_STRING valueToString(LargestInt value) {
|
||||||
UIntToStringBuffer buffer;
|
UIntToStringBuffer buffer;
|
||||||
char* current = buffer + sizeof(buffer);
|
char* current = buffer + sizeof(buffer);
|
||||||
@ -176,89 +158,103 @@ JSONCPP_STRING valueToString(double value) { return valueToString(value, false,
|
|||||||
|
|
||||||
JSONCPP_STRING valueToString(bool value) { return value ? "true" : "false"; }
|
JSONCPP_STRING valueToString(bool value) { return value ? "true" : "false"; }
|
||||||
|
|
||||||
JSONCPP_STRING valueToQuotedString(const char* value) {
|
static bool isAnyCharRequiredQuoting(char const* s, size_t n) {
|
||||||
if (value == NULL)
|
assert(s || !n);
|
||||||
return "";
|
|
||||||
// Not sure how to handle unicode...
|
|
||||||
if (strpbrk(value, "\"\\\b\f\n\r\t") == NULL &&
|
|
||||||
!containsControlCharacter(value))
|
|
||||||
return JSONCPP_STRING("\"") + value + "\"";
|
|
||||||
// We have to walk value and escape any special characters.
|
|
||||||
// Appending to JSONCPP_STRING is not efficient, but this should be rare.
|
|
||||||
// (Note: forward slashes are *not* rare, but I am not escaping them.)
|
|
||||||
JSONCPP_STRING::size_type maxsize =
|
|
||||||
strlen(value) * 2 + 3; // allescaped+quotes+NULL
|
|
||||||
JSONCPP_STRING result;
|
|
||||||
result.reserve(maxsize); // to avoid lots of mallocs
|
|
||||||
result += "\"";
|
|
||||||
for (const char* c = value; *c != 0; ++c) {
|
|
||||||
switch (*c) {
|
|
||||||
case '\"':
|
|
||||||
result += "\\\"";
|
|
||||||
break;
|
|
||||||
case '\\':
|
|
||||||
result += "\\\\";
|
|
||||||
break;
|
|
||||||
case '\b':
|
|
||||||
result += "\\b";
|
|
||||||
break;
|
|
||||||
case '\f':
|
|
||||||
result += "\\f";
|
|
||||||
break;
|
|
||||||
case '\n':
|
|
||||||
result += "\\n";
|
|
||||||
break;
|
|
||||||
case '\r':
|
|
||||||
result += "\\r";
|
|
||||||
break;
|
|
||||||
case '\t':
|
|
||||||
result += "\\t";
|
|
||||||
break;
|
|
||||||
// case '/':
|
|
||||||
// Even though \/ is considered a legal escape in JSON, a bare
|
|
||||||
// slash is also legal, so I see no reason to escape it.
|
|
||||||
// (I hope I am not misunderstanding something.
|
|
||||||
// blep notes: actually escaping \/ may be useful in javascript to avoid </
|
|
||||||
// sequence.
|
|
||||||
// Should add a flag to allow this compatibility mode and prevent this
|
|
||||||
// sequence from occurring.
|
|
||||||
default:
|
|
||||||
if (isControlCharacter(*c)) {
|
|
||||||
JSONCPP_OSTRINGSTREAM oss;
|
|
||||||
oss << "\\u" << std::hex << std::uppercase << std::setfill('0')
|
|
||||||
<< std::setw(4) << static_cast<int>(*c);
|
|
||||||
result += oss.str();
|
|
||||||
} else {
|
|
||||||
result += *c;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result += "\"";
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://github.com/upcaste/upcaste/blob/master/src/upcore/src/cstring/strnpbrk.cpp
|
|
||||||
static char const* strnpbrk(char const* s, char const* accept, size_t n) {
|
|
||||||
assert((s || !n) && accept);
|
|
||||||
|
|
||||||
char const* const end = s + n;
|
char const* const end = s + n;
|
||||||
for (char const* cur = s; cur < end; ++cur) {
|
for (char const* cur = s; cur < end; ++cur) {
|
||||||
int const c = *cur;
|
if (*cur == '\\' || *cur == '\"' || *cur < ' '
|
||||||
for (char const* a = accept; *a; ++a) {
|
|| static_cast<unsigned char>(*cur) < 0x80)
|
||||||
if (*a == c) {
|
return true;
|
||||||
return cur;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return NULL;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int utf8ToCodepoint(const char*& s, const char* e) {
|
||||||
|
const unsigned int REPLACEMENT_CHARACTER = 0xFFFD;
|
||||||
|
|
||||||
|
unsigned int firstByte = static_cast<unsigned char>(*s);
|
||||||
|
|
||||||
|
if (firstByte < 0x80)
|
||||||
|
return firstByte;
|
||||||
|
|
||||||
|
if (firstByte < 0xE0) {
|
||||||
|
if (e - s < 2)
|
||||||
|
return REPLACEMENT_CHARACTER;
|
||||||
|
|
||||||
|
unsigned int calculated = ((firstByte & 0x1F) << 6)
|
||||||
|
| (static_cast<unsigned int>(s[1]) & 0x3F);
|
||||||
|
s += 1;
|
||||||
|
// oversized encoded characters are invalid
|
||||||
|
return calculated < 0x80 ? REPLACEMENT_CHARACTER : calculated;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstByte < 0xF0) {
|
||||||
|
if (e - s < 3)
|
||||||
|
return REPLACEMENT_CHARACTER;
|
||||||
|
|
||||||
|
unsigned int calculated = ((firstByte & 0x0F) << 12)
|
||||||
|
| ((static_cast<unsigned int>(s[1]) & 0x3F) << 6)
|
||||||
|
| (static_cast<unsigned int>(s[2]) & 0x3F);
|
||||||
|
s += 2;
|
||||||
|
// surrogates aren't valid codepoints itself
|
||||||
|
// shouldn't be UTF-8 encoded
|
||||||
|
if (calculated >= 0xD800 && calculated >= 0xDFFF)
|
||||||
|
return REPLACEMENT_CHARACTER;
|
||||||
|
// oversized encoded characters are invalid
|
||||||
|
return calculated < 0x800 ? REPLACEMENT_CHARACTER : calculated;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstByte < 0xF8) {
|
||||||
|
if (e - s < 4)
|
||||||
|
return REPLACEMENT_CHARACTER;
|
||||||
|
|
||||||
|
unsigned int calculated = ((firstByte & 0x07) << 24)
|
||||||
|
| ((static_cast<unsigned int>(s[1]) & 0x3F) << 12)
|
||||||
|
| ((static_cast<unsigned int>(s[2]) & 0x3F) << 6)
|
||||||
|
| (static_cast<unsigned int>(s[3]) & 0x3F);
|
||||||
|
s += 3;
|
||||||
|
// oversized encoded characters are invalid
|
||||||
|
return calculated < 0x10000 ? REPLACEMENT_CHARACTER : calculated;
|
||||||
|
}
|
||||||
|
|
||||||
|
return REPLACEMENT_CHARACTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char hex2[] =
|
||||||
|
"000102030405060708090a0b0c0d0e0f"
|
||||||
|
"101112131415161718191a1b1c1d1e1f"
|
||||||
|
"202122232425262728292a2b2c2d2e2f"
|
||||||
|
"303132333435363738393a3b3c3d3e3f"
|
||||||
|
"404142434445464748494a4b4c4d4e4f"
|
||||||
|
"505152535455565758595a5b5c5d5e5f"
|
||||||
|
"606162636465666768696a6b6c6d6e6f"
|
||||||
|
"707172737475767778797a7b7c7d7e7f"
|
||||||
|
"808182838485868788898a8b8c8d8e8f"
|
||||||
|
"909192939495969798999a9b9c9d9e9f"
|
||||||
|
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
|
||||||
|
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
|
||||||
|
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
|
||||||
|
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
|
||||||
|
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
||||||
|
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
||||||
|
|
||||||
|
static JSONCPP_STRING toHex16Bit(unsigned int x) {
|
||||||
|
const unsigned int hi = (x >> 8) & 0xff;
|
||||||
|
const unsigned int lo = x & 0xff;
|
||||||
|
JSONCPP_STRING result(4, ' ');
|
||||||
|
result[0] = hex2[2 * hi];
|
||||||
|
result[1] = hex2[2 * hi + 1];
|
||||||
|
result[2] = hex2[2 * lo];
|
||||||
|
result[3] = hex2[2 * lo + 1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
|
static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
|
||||||
if (value == NULL)
|
if (value == NULL)
|
||||||
return "";
|
return "";
|
||||||
// Not sure how to handle unicode...
|
|
||||||
if (strnpbrk(value, "\"\\\b\f\n\r\t", length) == NULL &&
|
if (!isAnyCharRequiredQuoting(value, length))
|
||||||
!containsControlCharacter0(value, length))
|
|
||||||
return JSONCPP_STRING("\"") + value + "\"";
|
return JSONCPP_STRING("\"") + value + "\"";
|
||||||
// We have to walk value and escape any special characters.
|
// We have to walk value and escape any special characters.
|
||||||
// Appending to JSONCPP_STRING is not efficient, but this should be rare.
|
// Appending to JSONCPP_STRING is not efficient, but this should be rare.
|
||||||
@ -300,14 +296,24 @@ static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
|
|||||||
// sequence.
|
// sequence.
|
||||||
// Should add a flag to allow this compatibility mode and prevent this
|
// Should add a flag to allow this compatibility mode and prevent this
|
||||||
// sequence from occurring.
|
// sequence from occurring.
|
||||||
default:
|
default: {
|
||||||
if ((isControlCharacter(*c)) || (*c == 0)) {
|
unsigned int cp = utf8ToCodepoint(c, end);
|
||||||
JSONCPP_OSTRINGSTREAM oss;
|
// don't escape non-control characters
|
||||||
oss << "\\u" << std::hex << std::uppercase << std::setfill('0')
|
// (short escape sequence are applied above)
|
||||||
<< std::setw(4) << static_cast<int>(*c);
|
if (cp < 0x80 && cp >= 0x20)
|
||||||
result += oss.str();
|
result += static_cast<char>(cp);
|
||||||
} else {
|
else if (cp < 0x10000) { // codepoint is in Basic Multilingual Plane
|
||||||
result += *c;
|
result += "\\u";
|
||||||
|
result += toHex16Bit(cp);
|
||||||
|
}
|
||||||
|
else { // codepoint is not in Basic Multilingual Plane
|
||||||
|
// convert to surrogate pair first
|
||||||
|
cp -= 0x10000;
|
||||||
|
result += "\\u";
|
||||||
|
result += toHex16Bit((cp >> 10) + 0xD800);
|
||||||
|
result += "\\u";
|
||||||
|
result += toHex16Bit((cp & 0x3FF) + 0xDC00);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -316,6 +322,10 @@ static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JSONCPP_STRING valueToQuotedString(const char* value) {
|
||||||
|
return valueToQuotedStringN(value, static_cast<unsigned int>(strlen(value)));
|
||||||
|
}
|
||||||
|
|
||||||
// Class Writer
|
// Class Writer
|
||||||
// //////////////////////////////////////////////////////////////////
|
// //////////////////////////////////////////////////////////////////
|
||||||
Writer::~Writer() {}
|
Writer::~Writer() {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user