mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-06 13:41:35 +01:00
Added encoding validation feature
git-svn-id: https://rapidjson.googlecode.com/svn/trunk@30 c5894555-1306-4e8d-425f-1f6f381ee07c
This commit is contained in:
parent
78492f9962
commit
f198c486ee
@ -16,8 +16,10 @@
|
||||
#ifdef _MSC_VER
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#define RAPIDJSON_FORCEINLINE __forceinline
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#define RAPIDJSON_FORCEINLINE
|
||||
#endif
|
||||
#endif // RAPIDJSON_NO_INT64TYPEDEF
|
||||
|
||||
@ -343,6 +345,47 @@ struct UTF8 {
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
RAPIDJSON_FORCEINLINE static Ch* Validate(Ch *buffer, Stream& s) {
|
||||
#define X1 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
|
||||
#define X5 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
|
||||
static const char utf8[256] = {
|
||||
X1,X1,X1,X1,X1,X1,X1,X1, // 00-7F 1 byte
|
||||
X5,X5,X5,X5, // 80-BF Continuation
|
||||
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // C0-C1: invalid, C2-CF: 2 bytes
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // D0-DF: 2 bytes
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // E0-EF: 3 bytes
|
||||
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // F0-F4: 4 bytes
|
||||
};
|
||||
#undef X1
|
||||
#undef X5
|
||||
|
||||
#define TAIL() c = *buffer++ = s.Take(); if ((c & 0xC0) != 0x80) return NULL;
|
||||
|
||||
Ch c = *buffer++ = s.Take();
|
||||
if ((unsigned char)c < 0x80u)
|
||||
return buffer;
|
||||
|
||||
switch(utf8[(unsigned char)c]) {
|
||||
case 2:
|
||||
TAIL();
|
||||
return buffer;
|
||||
|
||||
case 3:
|
||||
TAIL();
|
||||
TAIL();
|
||||
return buffer;
|
||||
|
||||
case 4:
|
||||
TAIL();
|
||||
TAIL();
|
||||
TAIL();
|
||||
return buffer;
|
||||
}
|
||||
return NULL;
|
||||
#undef TAIL
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -370,6 +413,21 @@ struct UTF16 {
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
static Ch* Validate(Ch *buffer, Stream& s) {
|
||||
Ch c = *buffer++ = s.Take();
|
||||
if (c < 0xD800 || c > 0xDFFF)
|
||||
;
|
||||
else if (c < 0xDBFF) {
|
||||
Ch c = *buffer++ = s.Take();
|
||||
if (c < 0xDC00 || c > 0xDFFF)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
return buffer;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -389,6 +447,12 @@ struct UTF32 {
|
||||
*buffer++ = codepoint;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
static Ch* Validate(Ch *buffer, Stream& s) {
|
||||
Ch c = *buffer++ = s.Take();
|
||||
return c <= 0x10FFFF ? buffer : 0;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -27,7 +27,8 @@ namespace rapidjson {
|
||||
//! Combination of parseFlags
|
||||
enum ParseFlag {
|
||||
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
|
||||
kParseInsituFlag = 1 //!< In-situ(destructive) parsing.
|
||||
kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
|
||||
kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -401,8 +402,9 @@ private:
|
||||
} while(false)
|
||||
|
||||
for (;;) {
|
||||
Ch c = s.Take();
|
||||
Ch c = s.Peek();
|
||||
if (c == '\\') { // Escape
|
||||
s.Take();
|
||||
Ch e = s.Take();
|
||||
if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e])
|
||||
RAPIDJSON_PUT(escape[(unsigned char)e]);
|
||||
@ -438,6 +440,7 @@ private:
|
||||
}
|
||||
}
|
||||
else if (c == '"') { // Closing double quote
|
||||
s.Take();
|
||||
if (parseFlags & kParseInsituFlag) {
|
||||
size_t length = s.PutEnd(head);
|
||||
RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
|
||||
@ -459,8 +462,29 @@ private:
|
||||
RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1);
|
||||
return;
|
||||
}
|
||||
else
|
||||
RAPIDJSON_PUT(c); // Normal character, just copy
|
||||
else if (parseFlags & kParseValidateEncodingFlag) {
|
||||
Ch buffer[4];
|
||||
Ch* end = Encoding::Validate(&buffer[0], s);
|
||||
if (end == NULL) {
|
||||
RAPIDJSON_PARSE_ERROR("Invalid encoding", s.Tell());
|
||||
return;
|
||||
}
|
||||
|
||||
if (parseFlags & kParseInsituFlag)
|
||||
for (Ch* p = &buffer[0]; p != end; ++p)
|
||||
s.Put(*p);
|
||||
else {
|
||||
SizeType l = SizeType(end - &buffer[0]);
|
||||
Ch* q = stack_.template Push<Ch>(l);
|
||||
for (Ch* p = &buffer[0]; p != end; ++p)
|
||||
*q++ = *p;
|
||||
len += l;
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
RAPIDJSON_PUT(s.Take()); // Normal character, just copy
|
||||
}
|
||||
}
|
||||
#undef RAPIDJSON_PUT
|
||||
}
|
||||
|
@ -2,14 +2,14 @@
|
||||
#define PERFTEST_H_
|
||||
|
||||
#define TEST_RAPIDJSON 1
|
||||
#define TEST_JSONCPP 1
|
||||
#define TEST_YAJL 1
|
||||
#define TEST_ULTRAJSON 1
|
||||
#define TEST_PLATFORM 1
|
||||
#define TEST_JSONCPP 0
|
||||
#define TEST_YAJL 0
|
||||
#define TEST_ULTRAJSON 0
|
||||
#define TEST_PLATFORM 0
|
||||
|
||||
#if TEST_RAPIDJSON
|
||||
//#define RAPIDJSON_SSE2
|
||||
//#define RAPIDJSON_SSE42
|
||||
#define RAPIDJSON_SSE42
|
||||
#endif
|
||||
|
||||
#if TEST_YAJL
|
||||
|
@ -41,22 +41,41 @@ protected:
|
||||
Document doc_;
|
||||
};
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_NullHandler)) {
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler)) {
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
memcpy(temp_, json_, length_ + 1);
|
||||
InsituStringStream s(temp_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
reader.Parse<kParseInsituFlag>(s, h);
|
||||
EXPECT_TRUE(reader.Parse<kParseInsituFlag>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler)) {
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_ValidateEncoding)) {
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
memcpy(temp_, json_, length_ + 1);
|
||||
InsituStringStream s(temp_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
EXPECT_TRUE(reader.Parse<kParseInsituFlag | kParseValidateEncodingFlag>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
StringStream s(json_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
reader.Parse<0>(s, h);
|
||||
EXPECT_TRUE(reader.Parse<0>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
StringStream s(json_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
EXPECT_TRUE(reader.Parse<kParseValidateEncodingFlag>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,7 +254,7 @@ TEST_F(RapidJson, FileReadStream) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler_FileReadStream)) {
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
FILE *fp = fopen(filename_, "rb");
|
||||
char buffer[65536];
|
||||
|
@ -204,7 +204,7 @@ TEST(Reader, ParseString) {
|
||||
GenericInsituStringStream<Encoding> is(buffer); \
|
||||
ParseStringHandler<Encoding> h; \
|
||||
GenericReader<Encoding> reader; \
|
||||
reader.ParseString<kParseInsituFlag>(is, h); \
|
||||
reader.ParseString<kParseInsituFlag | kParseValidateEncodingFlag>(is, h); \
|
||||
EXPECT_EQ(0, StrCmp<Encoding::Ch>(e, h.str_)); \
|
||||
EXPECT_EQ(StrLen(e), h.length_); \
|
||||
free(buffer); \
|
||||
@ -286,7 +286,6 @@ TEST(Reader, ParseString_NonDestructive) {
|
||||
EXPECT_EQ(11, h.length_);
|
||||
}
|
||||
|
||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
||||
TEST(Reader, ParseString_Error) {
|
||||
#define TEST_STRING_ERROR(str) \
|
||||
{ \
|
||||
@ -295,18 +294,28 @@ TEST(Reader, ParseString_Error) {
|
||||
InsituStringStream s(buffer); \
|
||||
BaseReaderHandler<> h; \
|
||||
Reader reader; \
|
||||
EXPECT_ERROR(reader.ParseString<0>(s, h), ParseException); \
|
||||
EXPECT_FALSE(reader.Parse<kParseValidateEncodingFlag>(s, h)); \
|
||||
}
|
||||
|
||||
TEST_STRING_ERROR("\"\\a\""); // Unknown escape character
|
||||
TEST_STRING_ERROR("\"\\uABCG\""); // Incorrect hex digit after \\u escape
|
||||
TEST_STRING_ERROR("\"\\uD800X\""); // Missing the second \\u in surrogate pair
|
||||
TEST_STRING_ERROR("\"\\uD800\\uFFFF\""); // The second \\u in surrogate pair is invalid
|
||||
TEST_STRING_ERROR("\"Test"); // lacks ending quotation before the end of string
|
||||
#define ARRAY(...) { __VA_ARGS__ }
|
||||
#define TEST_STRINGARRAY_ERROR(Encoding, array) \
|
||||
{ \
|
||||
static const Encoding::Ch e[] = array; \
|
||||
TEST_STRING_ERROR(e); \
|
||||
}
|
||||
|
||||
TEST_STRING_ERROR("[\"\\a\"]"); // Unknown escape character
|
||||
TEST_STRING_ERROR("[\"\\uABCG\"]"); // Incorrect hex digit after \\u escape
|
||||
TEST_STRING_ERROR("[\"\\uD800X\"]"); // Missing the second \\u in surrogate pair
|
||||
TEST_STRING_ERROR("[\"\\uD800\\uFFFF\"]"); // The second \\u in surrogate pair is invalid
|
||||
TEST_STRING_ERROR("[\"Test]"); // lacks ending quotation before the end of string
|
||||
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0x80u, ']')); // Incorrect UTF8 sequence
|
||||
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0xC0u, 0x40, ']')); // Incorrect UTF8 sequence
|
||||
|
||||
#undef ARRAY
|
||||
#undef TEST_STRINGARRAY_ERROR
|
||||
#undef TEST_STRING_ERROR
|
||||
}
|
||||
#endif // RAPIDJSON_USE_EXCEPTION
|
||||
|
||||
template <unsigned count>
|
||||
struct ParseArrayHandler : BaseReaderHandler<> {
|
||||
@ -340,7 +349,6 @@ TEST(Reader, ParseArray) {
|
||||
free(json);
|
||||
}
|
||||
|
||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
||||
TEST(Reader, ParseArray_Error) {
|
||||
#define TEST_ARRAY_ERROR(str) \
|
||||
{ \
|
||||
@ -348,8 +356,8 @@ TEST(Reader, ParseArray_Error) {
|
||||
strncpy(buffer, str, 1000); \
|
||||
InsituStringStream s(buffer); \
|
||||
BaseReaderHandler<> h; \
|
||||
Reader<UTF8<>, CrtAllocator> reader; \
|
||||
EXPECT_ERROR(reader.ParseArray<0>(s, h), ParseException); \
|
||||
GenericReader<UTF8<>, CrtAllocator> reader; \
|
||||
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||
}
|
||||
|
||||
// Must be a comma or ']' after an array element.
|
||||
@ -359,7 +367,6 @@ TEST(Reader, ParseArray_Error) {
|
||||
|
||||
#undef TEST_ARRAY_ERROR
|
||||
}
|
||||
#endif // RAPIDJSON_USE_EXCEPTION
|
||||
|
||||
struct ParseObjectHandler : BaseReaderHandler<> {
|
||||
ParseObjectHandler() : step_(0) {}
|
||||
@ -446,7 +453,6 @@ TEST(Reader, Parse_EmptyObject) {
|
||||
EXPECT_EQ(2, h.step_);
|
||||
}
|
||||
|
||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
||||
TEST(Reader, ParseObject_Error) {
|
||||
#define TEST_OBJECT_ERROR(str) \
|
||||
{ \
|
||||
@ -454,8 +460,8 @@ TEST(Reader, ParseObject_Error) {
|
||||
strncpy(buffer, str, 1000); \
|
||||
InsituStringStream s(buffer); \
|
||||
BaseReaderHandler<> h; \
|
||||
Reader<UTF8<>, CrtAllocator> reader; \
|
||||
EXPECT_ERROR(reader.ParseObject<0>(s, h), ParseException); \
|
||||
GenericReader<UTF8<>, CrtAllocator> reader; \
|
||||
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||
}
|
||||
|
||||
// Name of an object member must be a string
|
||||
@ -477,9 +483,7 @@ TEST(Reader, ParseObject_Error) {
|
||||
|
||||
#undef TEST_OBJECT_ERROR
|
||||
}
|
||||
#endif // RAPIDJSON_USE_EXCEPTION
|
||||
|
||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
||||
TEST(Reader, Parse_Error) {
|
||||
#define TEST_ERROR(str) \
|
||||
{ \
|
||||
@ -488,7 +492,7 @@ TEST(Reader, Parse_Error) {
|
||||
InsituStringStream s(buffer); \
|
||||
BaseReaderHandler<> h; \
|
||||
Reader reader; \
|
||||
EXPECT_ERROR(reader.Parse<0>(s, h), ParseException); \
|
||||
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||
}
|
||||
|
||||
// Text only contains white space(s)
|
||||
@ -514,4 +518,3 @@ TEST(Reader, Parse_Error) {
|
||||
|
||||
#undef TEST_ERROR
|
||||
}
|
||||
#endif // RAPIDJSON_USE_EXCEPTION
|
||||
|
Loading…
x
Reference in New Issue
Block a user