From 3e21bb429d492206c9ce2f3fd44264a5220913c4 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Sun, 20 Mar 2016 01:10:33 -0400 Subject: [PATCH 1/3] Added optional support for trailing commas This adds kParseTrailingCommasFlag to allow a trailing comma at the end of maps and arrays. This is part of issue #36, adding optional support for relaxed JSON syntax. --- doc/dom.md | 1 + include/rapidjson/reader.h | 19 ++++++++++++++++++ test/unittest/readertest.cpp | 37 ++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/doc/dom.md b/doc/dom.md index cb25fc4f..79b68175 100644 --- a/doc/dom.md +++ b/doc/dom.md @@ -116,6 +116,7 @@ Parse flags | Meaning `kParseStopWhenDoneFlag` | After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate `kParseErrorDocumentRootNotSingular` error. Using this flag for parsing multiple JSONs in the same stream. `kParseFullPrecisionFlag` | Parse number in full precision (slower). If this flag is not set, the normal precision (faster) is used. Normal precision has maximum 3 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) error. `kParseCommentsFlag` | Allow one-line `// ...` and multi-line `/* ... */` comments (relaxed JSON syntax). +`kParseTrailingCommasFlag` | Allow trailing commas at the end of objects and arrays (relaxed JSON syntax). By using a non-type template parameter, instead of a function parameter, C++ compiler can generate code which is optimized for specified combinations, improving speed, and reducing code size (if only using a single specialization). The downside is the flags needed to be determined in compile-time. diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 3d7bb634..4f090185 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -149,6 +149,7 @@ enum ParseFlag { kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. + kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS }; @@ -636,6 +637,15 @@ private: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; } + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == '}') { + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + } } } @@ -676,6 +686,15 @@ private: } else RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == ']') { + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + } } } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 32af8a86..df3b4039 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -778,6 +778,10 @@ TEST(Reader, ParseArray_Error) { TEST_ARRAY_ERROR(kParseErrorArrayMissCommaOrSquareBracket, "[1}", 2); TEST_ARRAY_ERROR(kParseErrorArrayMissCommaOrSquareBracket, "[1 2]", 3); + // Array cannot have a trailing comma (without kParseTrailingCommasFlag); + // a value must follow a comma + TEST_ARRAY_ERROR(kParseErrorValueInvalid, "[1,]", 3); + #undef TEST_ARRAY_ERROR } @@ -978,6 +982,10 @@ TEST(Reader, ParseObject_Error) { // Must be a comma or '}' after an object member TEST_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, "{\"a\":1]", 6); + // Object cannot have a trailing comma (without kParseTrailingCommasFlag); + // an object member name must follow a comma + TEST_ERROR(kParseErrorObjectMissName, "{\"a\":1,}", 7); + // This tests that MemoryStream is checking the length in Peek(). { MemoryStream ms("{\"a\"", 1); @@ -1552,6 +1560,35 @@ TEST(Reader, NumbersAsStrings) { } } +TEST(Reader, TrailingCommas) { + { + // trailing array comma + StringStream s("[1,2,3,]"); + ParseArrayHandler<3> h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_EQ(5u, h.step_); + } + { + // trailing object comma + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3],}"; + StringStream s(json); + ParseObjectHandler h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_EQ(20u, h.step_); + } + { + // trailing object and array commas with whitespace + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3\n,\n]\n,\n } "; + StringStream s(json); + ParseObjectHandler h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_EQ(20u, h.step_); + } +} + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif From 7c0e9d941d657c0e155b4018dc2a2867d2a1ff6e Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Sun, 20 Mar 2016 11:39:00 -0400 Subject: [PATCH 2/3] Added additional tests for trailing commas --- test/unittest/readertest.cpp | 88 ++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index df3b4039..7c72f68f 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -1562,7 +1562,6 @@ TEST(Reader, NumbersAsStrings) { TEST(Reader, TrailingCommas) { { - // trailing array comma StringStream s("[1,2,3,]"); ParseArrayHandler<3> h; Reader reader; @@ -1570,8 +1569,8 @@ TEST(Reader, TrailingCommas) { EXPECT_EQ(5u, h.step_); } { - // trailing object comma - const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3],}"; + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false," + "\"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3],}"; StringStream s(json); ParseObjectHandler h; Reader reader; @@ -1579,14 +1578,93 @@ TEST(Reader, TrailingCommas) { EXPECT_EQ(20u, h.step_); } { - // trailing object and array commas with whitespace - const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3\n,\n]\n,\n } "; + // whitespace around trailing commas + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false," + "\"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3\n,\n]\n,\n} "; StringStream s(json); ParseObjectHandler h; Reader reader; EXPECT_TRUE(reader.Parse(s, h)); EXPECT_EQ(20u, h.step_); } + { + // comments around trailing commas + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null," + "\"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3/*test*/,/*test*/]/*test*/,/*test*/}"; + StringStream s(json); + ParseObjectHandler h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_EQ(20u, h.step_); + } +} + +TEST(Reader, MultipleTrailingCommaErrors) { + // only a single trailing comma is allowed. + { + StringStream s("[1,2,3,,]"); + ParseArrayHandler<3> h; + Reader reader; + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorValueInvalid, r.Code()); + EXPECT_EQ(7u, r.Offset()); + } + { + const char* json = "{ \"hello\" : \"world\", \"t\" : true , \"f\" : false," + "\"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3,],,}"; + StringStream s(json); + ParseObjectHandler h; + Reader reader; + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorObjectMissName, r.Code()); + EXPECT_EQ(95u, r.Offset()); + } +} + +TEST(Reader, EmptyExceptForCommaErrors) { + // not allowed even with trailing commas enabled; the + // trailing comma must follow a value. + { + StringStream s("[,]"); + ParseArrayHandler<3> h; + Reader reader; + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorValueInvalid, r.Code()); + EXPECT_EQ(1u, r.Offset()); + } + { + StringStream s("{,}"); + ParseObjectHandler h; + Reader reader; + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorObjectMissName, r.Code()); + EXPECT_EQ(1u, r.Offset()); + } +} + +TEST(Reader, TrailingCommaHandlerTermination) { + { + HandlerTerminateAtEndArray h; + Reader reader; + StringStream s("[1,2,3,]"); + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(8u, r.Offset()); + } + { + HandlerTerminateAtEndObject h; + Reader reader; + StringStream s("{\"t\": true, \"f\": false,}"); + ParseResult r = reader.Parse(s, h); + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(24u, r.Offset()); + } } #ifdef __GNUC__ From 68217548f338af3bd38a2f51cb683b0bab26298d Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Sun, 20 Mar 2016 12:52:48 -0400 Subject: [PATCH 3/3] Added trailing comma support to iterative parser This also fixes cases where the iterative parser should have produced kParseErrorValueInvalid rather than kParseErrorUnspecifiedSyntaxError when expecting a value (after a colon in an object, after a comma in an array, and at the start of an array.) --- include/rapidjson/reader.h | 21 ++++++++-- test/unittest/readertest.cpp | 78 ++++++++++++++++++++++++++++-------- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 4f090185..30251faf 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -640,9 +640,9 @@ private: if (parseFlags & kParseTrailingCommasFlag) { if (is.Peek() == '}') { - is.Take(); if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); return; } } @@ -689,9 +689,9 @@ private: if (parseFlags & kParseTrailingCommasFlag) { if (is.Peek() == ']') { - is.Take(); if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); return; } } @@ -1541,7 +1541,7 @@ private: IterativeParsingErrorState, // Left bracket IterativeParsingErrorState, // Right bracket IterativeParsingErrorState, // Left curly bracket - IterativeParsingErrorState, // Right curly bracket + IterativeParsingObjectFinishState, // Right curly bracket IterativeParsingErrorState, // Comma IterativeParsingErrorState, // Colon IterativeParsingMemberKeyState, // String @@ -1587,7 +1587,7 @@ private: // ElementDelimiter { IterativeParsingArrayInitialState, // Left bracket(push Element state) - IterativeParsingErrorState, // Right bracket + IterativeParsingArrayFinishState, // Right bracket IterativeParsingObjectInitialState, // Left curly bracket(push Element state) IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma @@ -1689,6 +1689,11 @@ private: case IterativeParsingObjectFinishState: { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + return IterativeParsingErrorState; + } // Get member count. SizeType c = *stack_.template Pop(1); // If the object is not empty, count the last member. @@ -1714,6 +1719,11 @@ private: case IterativeParsingArrayFinishState: { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); + return IterativeParsingErrorState; + } // Get element count. SizeType c = *stack_.template Pop(1); // If the array is not empty, count the last element. @@ -1773,6 +1783,9 @@ private: case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; + case IterativeParsingKeyValueDelimiterState: + case IterativeParsingArrayInitialState: + case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); return; } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 7c72f68f..83c1802f 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -1127,6 +1127,16 @@ TEST(Reader, IterativeParsing_ErrorHandling) { TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u); TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u); TESTERRORHANDLING("{\"a: 1", kParseErrorStringMissQuotationMark, 6u); + TESTERRORHANDLING("{\"a\":}", kParseErrorValueInvalid, 5u); + TESTERRORHANDLING("{\"a\":]", kParseErrorValueInvalid, 5u); + TESTERRORHANDLING("[1,2,}", kParseErrorValueInvalid, 5u); + TESTERRORHANDLING("[}]", kParseErrorValueInvalid, 1u); + TESTERRORHANDLING("[,]", kParseErrorValueInvalid, 1u); + TESTERRORHANDLING("[1,,]", kParseErrorValueInvalid, 3u); + + // Trailing commas are not allowed without kParseTrailingCommasFlag + TESTERRORHANDLING("{\"a\": 1,}", kParseErrorObjectMissName, 8u); + TESTERRORHANDLING("[1,2,3,]", kParseErrorValueInvalid, 7u); // Any JSON value can be a valid root element in RFC7159. TESTERRORHANDLING("\"ab", kParseErrorStringMissQuotationMark, 3u); @@ -1560,12 +1570,13 @@ TEST(Reader, NumbersAsStrings) { } } -TEST(Reader, TrailingCommas) { +template +void TestTrailingCommas() { { StringStream s("[1,2,3,]"); ParseArrayHandler<3> h; Reader reader; - EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_TRUE(reader.Parse(s, h)); EXPECT_EQ(5u, h.step_); } { @@ -1574,7 +1585,7 @@ TEST(Reader, TrailingCommas) { StringStream s(json); ParseObjectHandler h; Reader reader; - EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_TRUE(reader.Parse(s, h)); EXPECT_EQ(20u, h.step_); } { @@ -1584,7 +1595,7 @@ TEST(Reader, TrailingCommas) { StringStream s(json); ParseObjectHandler h; Reader reader; - EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_TRUE(reader.Parse(s, h)); EXPECT_EQ(20u, h.step_); } { @@ -1594,18 +1605,27 @@ TEST(Reader, TrailingCommas) { StringStream s(json); ParseObjectHandler h; Reader reader; - EXPECT_TRUE(reader.Parse(s, h)); + EXPECT_TRUE(reader.Parse(s, h)); EXPECT_EQ(20u, h.step_); } } -TEST(Reader, MultipleTrailingCommaErrors) { +TEST(Reader, TrailingCommas) { + TestTrailingCommas(); +} + +TEST(Reader, TrailingCommasIterative) { + TestTrailingCommas(); +} + +template +void TestMultipleTrailingCommaErrors() { // only a single trailing comma is allowed. { StringStream s("[1,2,3,,]"); ParseArrayHandler<3> h; Reader reader; - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorValueInvalid, r.Code()); EXPECT_EQ(7u, r.Offset()); @@ -1616,21 +1636,30 @@ TEST(Reader, MultipleTrailingCommaErrors) { StringStream s(json); ParseObjectHandler h; Reader reader; - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorObjectMissName, r.Code()); EXPECT_EQ(95u, r.Offset()); } } -TEST(Reader, EmptyExceptForCommaErrors) { +TEST(Reader, MultipleTrailingCommaErrors) { + TestMultipleTrailingCommaErrors(); +} + +TEST(Reader, MultipleTrailingCommaErrorsIterative) { + TestMultipleTrailingCommaErrors(); +} + +template +void TestEmptyExceptForCommaErrors() { // not allowed even with trailing commas enabled; the // trailing comma must follow a value. { StringStream s("[,]"); ParseArrayHandler<3> h; Reader reader; - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorValueInvalid, r.Code()); EXPECT_EQ(1u, r.Offset()); @@ -1639,34 +1668,51 @@ TEST(Reader, EmptyExceptForCommaErrors) { StringStream s("{,}"); ParseObjectHandler h; Reader reader; - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorObjectMissName, r.Code()); EXPECT_EQ(1u, r.Offset()); } } -TEST(Reader, TrailingCommaHandlerTermination) { +TEST(Reader, EmptyExceptForCommaErrors) { + TestEmptyExceptForCommaErrors(); +} + +TEST(Reader, EmptyExceptForCommaErrorsIterative) { + TestEmptyExceptForCommaErrors(); +} + +template +void TestTrailingCommaHandlerTermination() { { HandlerTerminateAtEndArray h; Reader reader; StringStream s("[1,2,3,]"); - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(8u, r.Offset()); + EXPECT_EQ(7u, r.Offset()); } { HandlerTerminateAtEndObject h; Reader reader; StringStream s("{\"t\": true, \"f\": false,}"); - ParseResult r = reader.Parse(s, h); + ParseResult r = reader.Parse(s, h); EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(24u, r.Offset()); + EXPECT_EQ(23u, r.Offset()); } } +TEST(Reader, TrailingCommaHandlerTermination) { + TestTrailingCommaHandlerTermination(); +} + +TEST(Reader, TrailingCommaHandlerTerminationIterative) { + TestTrailingCommaHandlerTermination(); +} + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif