mirror of
https://github.com/open-source-parsers/jsoncpp.git
synced 2025-05-03 07:52:28 +02:00
Added emitUTF8 setting. (#1045)
* Added emitUTF8 setting to emit UTF8 format JSON. * Added a test for emitUTF8, with it in default, on and off states. * Review comments addressed. * Merged master into my branch & resolved conflicts. * Fix clang-format errors. * Fix clang-format errors. * Fixed clang-format errors. * Fixed clang-format errors.
This commit is contained in:
parent
f59ac2a1d7
commit
a955529e47
@ -264,7 +264,8 @@ static String toHex16Bit(unsigned int x) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static String valueToQuotedStringN(const char* value, unsigned length) {
|
static String valueToQuotedStringN(const char* value, unsigned length,
|
||||||
|
bool emitUTF8 = false) {
|
||||||
if (value == nullptr)
|
if (value == nullptr)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
@ -310,21 +311,31 @@ static String valueToQuotedStringN(const char* value, unsigned length) {
|
|||||||
// Should add a flag to allow this compatibility mode and prevent this
|
// Should add a flag to allow this compatibility mode and prevent this
|
||||||
// sequence from occurring.
|
// sequence from occurring.
|
||||||
default: {
|
default: {
|
||||||
unsigned int cp = utf8ToCodepoint(c, end);
|
if (emitUTF8) {
|
||||||
// don't escape non-control characters
|
result += *c;
|
||||||
// (short escape sequence are applied above)
|
} else {
|
||||||
if (cp < 0x80 && cp >= 0x20)
|
unsigned int codepoint = utf8ToCodepoint(c, end);
|
||||||
result += static_cast<char>(cp);
|
const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20;
|
||||||
else if (cp < 0x10000) { // codepoint is in Basic Multilingual Plane
|
const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F;
|
||||||
result += "\\u";
|
const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000;
|
||||||
result += toHex16Bit(cp);
|
// don't escape non-control characters
|
||||||
} else { // codepoint is not in Basic Multilingual Plane
|
// (short escape sequence are applied above)
|
||||||
// convert to surrogate pair first
|
if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
|
||||||
cp -= 0x10000;
|
codepoint <= LAST_NON_CONTROL_CODEPOINT) {
|
||||||
result += "\\u";
|
result += static_cast<char>(codepoint);
|
||||||
result += toHex16Bit((cp >> 10) + 0xD800);
|
} else if (codepoint <
|
||||||
result += "\\u";
|
FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
|
||||||
result += toHex16Bit((cp & 0x3FF) + 0xDC00);
|
// Multilingual Plane
|
||||||
|
result += "\\u";
|
||||||
|
result += toHex16Bit(codepoint);
|
||||||
|
} else { // codepoint is not in Basic Multilingual Plane
|
||||||
|
// convert to surrogate pair first
|
||||||
|
codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
|
||||||
|
result += "\\u";
|
||||||
|
result += toHex16Bit((codepoint >> 10) + 0xD800);
|
||||||
|
result += "\\u";
|
||||||
|
result += toHex16Bit((codepoint & 0x3FF) + 0xDC00);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
}
|
}
|
||||||
@ -864,7 +875,8 @@ struct BuiltStyledStreamWriter : public StreamWriter {
|
|||||||
BuiltStyledStreamWriter(String indentation, CommentStyle::Enum cs,
|
BuiltStyledStreamWriter(String indentation, CommentStyle::Enum cs,
|
||||||
String colonSymbol, String nullSymbol,
|
String colonSymbol, String nullSymbol,
|
||||||
String endingLineFeedSymbol, bool useSpecialFloats,
|
String endingLineFeedSymbol, bool useSpecialFloats,
|
||||||
unsigned int precision, PrecisionType precisionType);
|
bool emitUTF8, unsigned int precision,
|
||||||
|
PrecisionType precisionType);
|
||||||
int write(Value const& root, OStream* sout) override;
|
int write(Value const& root, OStream* sout) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -893,19 +905,20 @@ private:
|
|||||||
bool addChildValues_ : 1;
|
bool addChildValues_ : 1;
|
||||||
bool indented_ : 1;
|
bool indented_ : 1;
|
||||||
bool useSpecialFloats_ : 1;
|
bool useSpecialFloats_ : 1;
|
||||||
|
bool emitUTF8_ : 1;
|
||||||
unsigned int precision_;
|
unsigned int precision_;
|
||||||
PrecisionType precisionType_;
|
PrecisionType precisionType_;
|
||||||
};
|
};
|
||||||
BuiltStyledStreamWriter::BuiltStyledStreamWriter(
|
BuiltStyledStreamWriter::BuiltStyledStreamWriter(
|
||||||
String indentation, CommentStyle::Enum cs, String colonSymbol,
|
String indentation, CommentStyle::Enum cs, String colonSymbol,
|
||||||
String nullSymbol, String endingLineFeedSymbol, bool useSpecialFloats,
|
String nullSymbol, String endingLineFeedSymbol, bool useSpecialFloats,
|
||||||
unsigned int precision, PrecisionType precisionType)
|
bool emitUTF8, unsigned int precision, PrecisionType precisionType)
|
||||||
: rightMargin_(74), indentation_(std::move(indentation)), cs_(cs),
|
: rightMargin_(74), indentation_(std::move(indentation)), cs_(cs),
|
||||||
colonSymbol_(std::move(colonSymbol)), nullSymbol_(std::move(nullSymbol)),
|
colonSymbol_(std::move(colonSymbol)), nullSymbol_(std::move(nullSymbol)),
|
||||||
endingLineFeedSymbol_(std::move(endingLineFeedSymbol)),
|
endingLineFeedSymbol_(std::move(endingLineFeedSymbol)),
|
||||||
addChildValues_(false), indented_(false),
|
addChildValues_(false), indented_(false),
|
||||||
useSpecialFloats_(useSpecialFloats), precision_(precision),
|
useSpecialFloats_(useSpecialFloats), emitUTF8_(emitUTF8),
|
||||||
precisionType_(precisionType) {}
|
precision_(precision), precisionType_(precisionType) {}
|
||||||
int BuiltStyledStreamWriter::write(Value const& root, OStream* sout) {
|
int BuiltStyledStreamWriter::write(Value const& root, OStream* sout) {
|
||||||
sout_ = sout;
|
sout_ = sout;
|
||||||
addChildValues_ = false;
|
addChildValues_ = false;
|
||||||
@ -942,7 +955,8 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
|
|||||||
char const* end;
|
char const* end;
|
||||||
bool ok = value.getString(&str, &end);
|
bool ok = value.getString(&str, &end);
|
||||||
if (ok)
|
if (ok)
|
||||||
pushValue(valueToQuotedStringN(str, static_cast<unsigned>(end - str)));
|
pushValue(valueToQuotedStringN(str, static_cast<unsigned>(end - str),
|
||||||
|
emitUTF8_));
|
||||||
else
|
else
|
||||||
pushValue("");
|
pushValue("");
|
||||||
break;
|
break;
|
||||||
@ -966,7 +980,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
|
|||||||
Value const& childValue = value[name];
|
Value const& childValue = value[name];
|
||||||
writeCommentBeforeValue(childValue);
|
writeCommentBeforeValue(childValue);
|
||||||
writeWithIndent(valueToQuotedStringN(
|
writeWithIndent(valueToQuotedStringN(
|
||||||
name.data(), static_cast<unsigned>(name.length())));
|
name.data(), static_cast<unsigned>(name.length()), emitUTF8_));
|
||||||
*sout_ << colonSymbol_;
|
*sout_ << colonSymbol_;
|
||||||
writeValue(childValue);
|
writeValue(childValue);
|
||||||
if (++it == members.end()) {
|
if (++it == members.end()) {
|
||||||
@ -1142,12 +1156,13 @@ StreamWriter::Factory::~Factory() = default;
|
|||||||
StreamWriterBuilder::StreamWriterBuilder() { setDefaults(&settings_); }
|
StreamWriterBuilder::StreamWriterBuilder() { setDefaults(&settings_); }
|
||||||
StreamWriterBuilder::~StreamWriterBuilder() = default;
|
StreamWriterBuilder::~StreamWriterBuilder() = default;
|
||||||
StreamWriter* StreamWriterBuilder::newStreamWriter() const {
|
StreamWriter* StreamWriterBuilder::newStreamWriter() const {
|
||||||
String indentation = settings_["indentation"].asString();
|
const String indentation = settings_["indentation"].asString();
|
||||||
String cs_str = settings_["commentStyle"].asString();
|
const String cs_str = settings_["commentStyle"].asString();
|
||||||
String pt_str = settings_["precisionType"].asString();
|
const String pt_str = settings_["precisionType"].asString();
|
||||||
bool eyc = settings_["enableYAMLCompatibility"].asBool();
|
const bool eyc = settings_["enableYAMLCompatibility"].asBool();
|
||||||
bool dnp = settings_["dropNullPlaceholders"].asBool();
|
const bool dnp = settings_["dropNullPlaceholders"].asBool();
|
||||||
bool usf = settings_["useSpecialFloats"].asBool();
|
const bool usf = settings_["useSpecialFloats"].asBool();
|
||||||
|
const bool emitUTF8 = settings_["emitUTF8"].asBool();
|
||||||
unsigned int pre = settings_["precision"].asUInt();
|
unsigned int pre = settings_["precision"].asUInt();
|
||||||
CommentStyle::Enum cs = CommentStyle::All;
|
CommentStyle::Enum cs = CommentStyle::All;
|
||||||
if (cs_str == "All") {
|
if (cs_str == "All") {
|
||||||
@ -1179,7 +1194,7 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const {
|
|||||||
pre = 17;
|
pre = 17;
|
||||||
String endingLineFeedSymbol;
|
String endingLineFeedSymbol;
|
||||||
return new BuiltStyledStreamWriter(indentation, cs, colonSymbol, nullSymbol,
|
return new BuiltStyledStreamWriter(indentation, cs, colonSymbol, nullSymbol,
|
||||||
endingLineFeedSymbol, usf, pre,
|
endingLineFeedSymbol, usf, emitUTF8, pre,
|
||||||
precisionType);
|
precisionType);
|
||||||
}
|
}
|
||||||
static void getValidWriterKeys(std::set<String>* valid_keys) {
|
static void getValidWriterKeys(std::set<String>* valid_keys) {
|
||||||
@ -1189,6 +1204,7 @@ static void getValidWriterKeys(std::set<String>* valid_keys) {
|
|||||||
valid_keys->insert("enableYAMLCompatibility");
|
valid_keys->insert("enableYAMLCompatibility");
|
||||||
valid_keys->insert("dropNullPlaceholders");
|
valid_keys->insert("dropNullPlaceholders");
|
||||||
valid_keys->insert("useSpecialFloats");
|
valid_keys->insert("useSpecialFloats");
|
||||||
|
valid_keys->insert("emitUTF8");
|
||||||
valid_keys->insert("precision");
|
valid_keys->insert("precision");
|
||||||
valid_keys->insert("precisionType");
|
valid_keys->insert("precisionType");
|
||||||
}
|
}
|
||||||
@ -1220,6 +1236,7 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) {
|
|||||||
(*settings)["enableYAMLCompatibility"] = false;
|
(*settings)["enableYAMLCompatibility"] = false;
|
||||||
(*settings)["dropNullPlaceholders"] = false;
|
(*settings)["dropNullPlaceholders"] = false;
|
||||||
(*settings)["useSpecialFloats"] = false;
|
(*settings)["useSpecialFloats"] = false;
|
||||||
|
(*settings)["emitUTF8"] = false;
|
||||||
(*settings)["precision"] = 17;
|
(*settings)["precision"] = 17;
|
||||||
(*settings)["precisionType"] = "significant";
|
(*settings)["precisionType"] = "significant";
|
||||||
//! [StreamWriterBuilderDefaults]
|
//! [StreamWriterBuilderDefaults]
|
||||||
|
@ -2481,6 +2481,35 @@ JSONTEST_FIXTURE_LOCAL(StreamWriterTest, writeZeroes) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JSONTEST_FIXTURE_LOCAL(StreamWriterTest, unicode) {
|
||||||
|
// Create a Json value containing UTF-8 string with some chars that need
|
||||||
|
// escape (tab,newline).
|
||||||
|
Json::Value root;
|
||||||
|
root["test"] = "\t\n\xF0\x91\xA2\xA1\x3D\xC4\xB3\xF0\x9B\x84\x9B\xEF\xBD\xA7";
|
||||||
|
|
||||||
|
Json::StreamWriterBuilder b;
|
||||||
|
|
||||||
|
// Default settings - should be unicode escaped.
|
||||||
|
JSONTEST_ASSERT(Json::writeString(b, root) ==
|
||||||
|
"{\n\t\"test\" : "
|
||||||
|
"\"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}");
|
||||||
|
|
||||||
|
b.settings_["emitUTF8"] = true;
|
||||||
|
|
||||||
|
// Should not be unicode escaped.
|
||||||
|
JSONTEST_ASSERT(
|
||||||
|
Json::writeString(b, root) ==
|
||||||
|
"{\n\t\"test\" : "
|
||||||
|
"\"\\t\\n\xF0\x91\xA2\xA1=\xC4\xB3\xF0\x9B\x84\x9B\xEF\xBD\xA7\"\n}");
|
||||||
|
|
||||||
|
b.settings_["emitUTF8"] = false;
|
||||||
|
|
||||||
|
// Should be unicode escaped.
|
||||||
|
JSONTEST_ASSERT(Json::writeString(b, root) ==
|
||||||
|
"{\n\t\"test\" : "
|
||||||
|
"\"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}");
|
||||||
|
}
|
||||||
|
|
||||||
struct ReaderTest : JsonTest::TestCase {};
|
struct ReaderTest : JsonTest::TestCase {};
|
||||||
|
|
||||||
JSONTEST_FIXTURE_LOCAL(ReaderTest, parseWithNoErrors) {
|
JSONTEST_FIXTURE_LOCAL(ReaderTest, parseWithNoErrors) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user