mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-10 03:29:59 +01:00
Merge branch 'issue69writeescapeunicode'
This commit is contained in:
commit
54a5d10349
@ -6,6 +6,7 @@
|
||||
#ifdef _MSC_VER
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
|
||||
RAPIDJSON_DIAG_OFF(4702) // unreachable code
|
||||
#elif defined(__GNUC__)
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(effc++)
|
||||
@ -23,6 +24,8 @@ namespace rapidjson {
|
||||
concept Encoding {
|
||||
typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
|
||||
|
||||
enum { supportUnicode = 1 }; // or 0 if not supporting unicode
|
||||
|
||||
//! \brief Encode a Unicode codepoint to an output stream.
|
||||
//! \param os Output stream.
|
||||
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
|
||||
@ -78,6 +81,8 @@ template<typename CharType = char>
|
||||
struct UTF8 {
|
||||
typedef CharType Ch;
|
||||
|
||||
enum { supportUnicode = 1 };
|
||||
|
||||
template<typename OutputStream>
|
||||
static void Encode(OutputStream& os, unsigned codepoint) {
|
||||
if (codepoint <= 0x7F)
|
||||
@ -222,6 +227,8 @@ struct UTF16 {
|
||||
typedef CharType Ch;
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
|
||||
|
||||
enum { supportUnicode = 1 };
|
||||
|
||||
template<typename OutputStream>
|
||||
static void Encode(OutputStream& os, unsigned codepoint) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
|
||||
@ -351,6 +358,8 @@ struct UTF32 {
|
||||
typedef CharType Ch;
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
|
||||
|
||||
enum { supportUnicode = 1 };
|
||||
|
||||
template<typename OutputStream>
|
||||
static void Encode(OutputStream& os, unsigned codepoint) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
|
||||
@ -447,6 +456,66 @@ struct UTF32BE : UTF32<CharType> {
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// ASCII
|
||||
|
||||
//! ASCII encoding.
|
||||
/*! http://en.wikipedia.org/wiki/ASCII
|
||||
\tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
|
||||
\note implements Encoding concept
|
||||
*/
|
||||
template<typename CharType = char>
|
||||
struct ASCII {
|
||||
typedef CharType Ch;
|
||||
|
||||
enum { supportUnicode = 0 };
|
||||
|
||||
template<typename OutputStream>
|
||||
static void Encode(OutputStream& os, unsigned codepoint) {
|
||||
RAPIDJSON_ASSERT(codepoint <= 0x7F);
|
||||
os.Put(static_cast<Ch>(codepoint & 0xFF));
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
static bool Decode(InputStream& is, unsigned* codepoint) {
|
||||
unsigned char c = static_cast<unsigned char>(is.Take());
|
||||
*codepoint = c;
|
||||
return c <= 0X7F;
|
||||
}
|
||||
|
||||
template <typename InputStream, typename OutputStream>
|
||||
static bool Validate(InputStream& is, OutputStream& os) {
|
||||
unsigned char c = is.Take();
|
||||
os.Put(c);
|
||||
return c <= 0x7F;
|
||||
}
|
||||
|
||||
template <typename InputByteStream>
|
||||
static CharType TakeBOM(InputByteStream& is) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
|
||||
Ch c = Take(is);
|
||||
return c;
|
||||
}
|
||||
|
||||
template <typename InputByteStream>
|
||||
static Ch Take(InputByteStream& is) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
|
||||
return is.Take();
|
||||
}
|
||||
|
||||
template <typename OutputByteStream>
|
||||
static void PutBOM(OutputByteStream& os) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
|
||||
(void)os;
|
||||
}
|
||||
|
||||
template <typename OutputByteStream>
|
||||
static void Put(OutputByteStream& os, Ch c) {
|
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
|
||||
os.Put(static_cast<typename OutputByteStream::Ch>(c));
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// AutoUTF
|
||||
|
||||
@ -466,6 +535,8 @@ template<typename CharType>
|
||||
struct AutoUTF {
|
||||
typedef CharType Ch;
|
||||
|
||||
enum { supportUnicode = 1 };
|
||||
|
||||
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
|
||||
|
||||
template<typename OutputStream>
|
||||
|
@ -22,6 +22,7 @@
|
||||
#ifdef _MSC_VER
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
|
||||
RAPIDJSON_DIAG_OFF(4702) // unreachable code
|
||||
#endif
|
||||
|
||||
#define RAPIDJSON_NOTHING /* deliberately empty */
|
||||
|
@ -289,7 +289,39 @@ protected:
|
||||
GenericStringStream<SourceEncoding> is(str);
|
||||
while (is.Tell() < length) {
|
||||
const Ch c = is.Peek();
|
||||
if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) {
|
||||
if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) {
|
||||
// Unicode escaping
|
||||
unsigned codepoint;
|
||||
if (!SourceEncoding::Decode(is, &codepoint))
|
||||
return false;
|
||||
os_->Put('\\');
|
||||
os_->Put('u');
|
||||
if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
|
||||
os_->Put(hexDigits[(codepoint >> 12) & 15]);
|
||||
os_->Put(hexDigits[(codepoint >> 8) & 15]);
|
||||
os_->Put(hexDigits[(codepoint >> 4) & 15]);
|
||||
os_->Put(hexDigits[(codepoint ) & 15]);
|
||||
}
|
||||
else if (codepoint >= 0x010000 && codepoint <= 0x10FFFF) {
|
||||
// Surrogate pair
|
||||
unsigned s = codepoint - 0x010000;
|
||||
unsigned lead = (s >> 10) + 0xD800;
|
||||
unsigned trail = (s & 0x3FF) + 0xDC00;
|
||||
os_->Put(hexDigits[(lead >> 12) & 15]);
|
||||
os_->Put(hexDigits[(lead >> 8) & 15]);
|
||||
os_->Put(hexDigits[(lead >> 4) & 15]);
|
||||
os_->Put(hexDigits[(lead ) & 15]);
|
||||
os_->Put('\\');
|
||||
os_->Put('u');
|
||||
os_->Put(hexDigits[(trail >> 12) & 15]);
|
||||
os_->Put(hexDigits[(trail >> 8) & 15]);
|
||||
os_->Put(hexDigits[(trail >> 4) & 15]);
|
||||
os_->Put(hexDigits[(trail ) & 15]);
|
||||
}
|
||||
else
|
||||
return false; // invalid code point
|
||||
}
|
||||
else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) {
|
||||
is.Take();
|
||||
os_->Put('\\');
|
||||
os_->Put(escape[(unsigned char)c]);
|
||||
|
@ -117,14 +117,34 @@ TEST(Writer,DoublePrecision) {
|
||||
}
|
||||
|
||||
TEST(Writer, Transcode) {
|
||||
const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}";
|
||||
|
||||
// UTF8 -> UTF16 -> UTF8
|
||||
StringStream s("{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3], \"dollar\":\"\x24\", \"cents\":\"\xC2\xA2\", \"euro\":\"\xE2\x82\xAC\", \"gclef\":\"\xF0\x9D\x84\x9E\" } ");
|
||||
StringBuffer buffer;
|
||||
Writer<StringBuffer, UTF16<>, UTF8<> > writer(buffer);
|
||||
GenericReader<UTF8<>, UTF16<> > reader;
|
||||
reader.Parse<0>(s, writer);
|
||||
EXPECT_TRUE(writer.IsComplete());
|
||||
EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}", buffer.GetString());
|
||||
{
|
||||
StringStream s(json);
|
||||
StringBuffer buffer;
|
||||
Writer<StringBuffer, UTF16<>, UTF8<> > writer(buffer);
|
||||
GenericReader<UTF8<>, UTF16<> > reader;
|
||||
reader.Parse(s, writer);
|
||||
EXPECT_STREQ(json, buffer.GetString());
|
||||
}
|
||||
|
||||
// UTF8 -> UTF8 -> ASCII -> UTF8 -> UTF8
|
||||
{
|
||||
StringStream s(json);
|
||||
StringBuffer buffer;
|
||||
Writer<StringBuffer, UTF8<>, ASCII<> > writer(buffer);
|
||||
Reader reader;
|
||||
reader.Parse(s, writer);
|
||||
|
||||
StringBuffer buffer2;
|
||||
Writer<StringBuffer> writer2(buffer2);
|
||||
GenericReader<ASCII<>, UTF8<> > reader2;
|
||||
StringStream s2(buffer.GetString());
|
||||
reader2.Parse(s2, writer2);
|
||||
|
||||
EXPECT_STREQ(json, buffer2.GetString());
|
||||
}
|
||||
}
|
||||
|
||||
#include <sstream>
|
||||
@ -270,4 +290,4 @@ TEST(Writer, RootArrayIsComplete) {
|
||||
EXPECT_FALSE(writer.IsComplete());
|
||||
writer.EndArray();
|
||||
EXPECT_TRUE(writer.IsComplete());
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user