mirror of
https://github.com/open-source-parsers/jsoncpp.git
synced 2024-12-12 10:03:51 +01:00
Fix a parser bug where tokens are misidentified as commas. (#1502)
* Fix a parser bug where tokens are misidentified as commas. In the old and new readers, when parsing an object, a comment followed by any non-`}` token is treated as a comma. The new unit test required changing the runjsontests.py flag regime so that failure tests could be run with default settings. * Honor allowComments==false mode. Much of the comment handling in the parsers is bespoke, and does not honor this flag. By unfiying it under a common API, the parser is simplified and strict mode is now more correctly strict. Note that allowComments mode does not allow for comments in arbitrary locations; they are allowed only in certain positions. Rectifying this is a bigger effort, since collectComments mode requires storing the comments somewhere, and it's not immediately clear where in the DOM all such comments should live. --------- Co-authored-by: Jordan Bayles <bayles.jordan@gmail.com>
This commit is contained in:
parent
c3a986600f
commit
0a9b9d9c6e
@ -190,6 +190,7 @@ private:
|
||||
using Errors = std::deque<ErrorInfo>;
|
||||
|
||||
bool readToken(Token& token);
|
||||
bool readTokenSkippingComments(Token& token);
|
||||
void skipSpaces();
|
||||
bool match(const Char* pattern, int patternLength);
|
||||
bool readComment();
|
||||
@ -221,7 +222,6 @@ private:
|
||||
int& column) const;
|
||||
String getLocationLineAndColumn(Location location) const;
|
||||
void addComment(Location begin, Location end, CommentPlacement placement);
|
||||
void skipCommentTokens(Token& token);
|
||||
|
||||
static bool containsNewLine(Location begin, Location end);
|
||||
static String normalizeEOL(Location begin, Location end);
|
||||
|
@ -240,11 +240,14 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
|
||||
return printUsage(argv);
|
||||
}
|
||||
int index = 1;
|
||||
if (Json::String(argv[index]) == "--json-checker") {
|
||||
opts->features = Json::Features::strictMode();
|
||||
if (Json::String(argv[index]) == "--parse-only") {
|
||||
opts->parseOnly = true;
|
||||
++index;
|
||||
}
|
||||
if (Json::String(argv[index]) == "--strict") {
|
||||
opts->features = Json::Features::strictMode();
|
||||
++index;
|
||||
}
|
||||
if (Json::String(argv[index]) == "--json-config") {
|
||||
printConfig();
|
||||
return 3;
|
||||
|
@ -129,7 +129,7 @@ bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
|
||||
|
||||
bool successful = readValue();
|
||||
Token token;
|
||||
skipCommentTokens(token);
|
||||
readTokenSkippingComments(token);
|
||||
if (collectComments_ && !commentsBefore_.empty())
|
||||
root.setComment(commentsBefore_, commentAfter);
|
||||
if (features_.strictRoot_) {
|
||||
@ -157,7 +157,7 @@ bool Reader::readValue() {
|
||||
throwRuntimeError("Exceeded stackLimit in readValue().");
|
||||
|
||||
Token token;
|
||||
skipCommentTokens(token);
|
||||
readTokenSkippingComments(token);
|
||||
bool successful = true;
|
||||
|
||||
if (collectComments_ && !commentsBefore_.empty()) {
|
||||
@ -225,14 +225,14 @@ bool Reader::readValue() {
|
||||
return successful;
|
||||
}
|
||||
|
||||
void Reader::skipCommentTokens(Token& token) {
|
||||
bool Reader::readTokenSkippingComments(Token& token) {
|
||||
bool success = readToken(token);
|
||||
if (features_.allowComments_) {
|
||||
do {
|
||||
readToken(token);
|
||||
} while (token.type_ == tokenComment);
|
||||
} else {
|
||||
readToken(token);
|
||||
while (success && token.type_ == tokenComment) {
|
||||
success = readToken(token);
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool Reader::readToken(Token& token) {
|
||||
@ -446,12 +446,7 @@ bool Reader::readObject(Token& token) {
|
||||
Value init(objectValue);
|
||||
currentValue().swapPayload(init);
|
||||
currentValue().setOffsetStart(token.start_ - begin_);
|
||||
while (readToken(tokenName)) {
|
||||
bool initialTokenOk = true;
|
||||
while (tokenName.type_ == tokenComment && initialTokenOk)
|
||||
initialTokenOk = readToken(tokenName);
|
||||
if (!initialTokenOk)
|
||||
break;
|
||||
while (readTokenSkippingComments(tokenName)) {
|
||||
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
|
||||
return true;
|
||||
name.clear();
|
||||
@ -480,15 +475,11 @@ bool Reader::readObject(Token& token) {
|
||||
return recoverFromError(tokenObjectEnd);
|
||||
|
||||
Token comma;
|
||||
if (!readToken(comma) ||
|
||||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
|
||||
comma.type_ != tokenComment)) {
|
||||
if (!readTokenSkippingComments(comma) ||
|
||||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
|
||||
return addErrorAndRecover("Missing ',' or '}' in object declaration",
|
||||
comma, tokenObjectEnd);
|
||||
}
|
||||
bool finalizeTokenOk = true;
|
||||
while (comma.type_ == tokenComment && finalizeTokenOk)
|
||||
finalizeTokenOk = readToken(comma);
|
||||
if (comma.type_ == tokenObjectEnd)
|
||||
return true;
|
||||
}
|
||||
@ -518,10 +509,7 @@ bool Reader::readArray(Token& token) {
|
||||
|
||||
Token currentToken;
|
||||
// Accept Comment after last item in the array.
|
||||
ok = readToken(currentToken);
|
||||
while (currentToken.type_ == tokenComment && ok) {
|
||||
ok = readToken(currentToken);
|
||||
}
|
||||
ok = readTokenSkippingComments(currentToken);
|
||||
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
|
||||
currentToken.type_ != tokenArrayEnd);
|
||||
if (!ok || badTokenType) {
|
||||
@ -943,6 +931,7 @@ private:
|
||||
using Errors = std::deque<ErrorInfo>;
|
||||
|
||||
bool readToken(Token& token);
|
||||
bool readTokenSkippingComments(Token& token);
|
||||
void skipSpaces();
|
||||
void skipBom(bool skipBom);
|
||||
bool match(const Char* pattern, int patternLength);
|
||||
@ -976,7 +965,6 @@ private:
|
||||
int& column) const;
|
||||
String getLocationLineAndColumn(Location location) const;
|
||||
void addComment(Location begin, Location end, CommentPlacement placement);
|
||||
void skipCommentTokens(Token& token);
|
||||
|
||||
static String normalizeEOL(Location begin, Location end);
|
||||
static bool containsNewLine(Location begin, Location end);
|
||||
@ -1030,7 +1018,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
|
||||
bool successful = readValue();
|
||||
nodes_.pop();
|
||||
Token token;
|
||||
skipCommentTokens(token);
|
||||
readTokenSkippingComments(token);
|
||||
if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
|
||||
addError("Extra non-whitespace after JSON value.", token);
|
||||
return false;
|
||||
@ -1058,7 +1046,7 @@ bool OurReader::readValue() {
|
||||
if (nodes_.size() > features_.stackLimit_)
|
||||
throwRuntimeError("Exceeded stackLimit in readValue().");
|
||||
Token token;
|
||||
skipCommentTokens(token);
|
||||
readTokenSkippingComments(token);
|
||||
bool successful = true;
|
||||
|
||||
if (collectComments_ && !commentsBefore_.empty()) {
|
||||
@ -1145,14 +1133,14 @@ bool OurReader::readValue() {
|
||||
return successful;
|
||||
}
|
||||
|
||||
void OurReader::skipCommentTokens(Token& token) {
|
||||
bool OurReader::readTokenSkippingComments(Token& token) {
|
||||
bool success = readToken(token);
|
||||
if (features_.allowComments_) {
|
||||
do {
|
||||
readToken(token);
|
||||
} while (token.type_ == tokenComment);
|
||||
} else {
|
||||
readToken(token);
|
||||
while (success && token.type_ == tokenComment) {
|
||||
success = readToken(token);
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool OurReader::readToken(Token& token) {
|
||||
@ -1449,12 +1437,7 @@ bool OurReader::readObject(Token& token) {
|
||||
Value init(objectValue);
|
||||
currentValue().swapPayload(init);
|
||||
currentValue().setOffsetStart(token.start_ - begin_);
|
||||
while (readToken(tokenName)) {
|
||||
bool initialTokenOk = true;
|
||||
while (tokenName.type_ == tokenComment && initialTokenOk)
|
||||
initialTokenOk = readToken(tokenName);
|
||||
if (!initialTokenOk)
|
||||
break;
|
||||
while (readTokenSkippingComments(tokenName)) {
|
||||
if (tokenName.type_ == tokenObjectEnd &&
|
||||
(name.empty() ||
|
||||
features_.allowTrailingCommas_)) // empty object or trailing comma
|
||||
@ -1491,15 +1474,11 @@ bool OurReader::readObject(Token& token) {
|
||||
return recoverFromError(tokenObjectEnd);
|
||||
|
||||
Token comma;
|
||||
if (!readToken(comma) ||
|
||||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
|
||||
comma.type_ != tokenComment)) {
|
||||
if (!readTokenSkippingComments(comma) ||
|
||||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
|
||||
return addErrorAndRecover("Missing ',' or '}' in object declaration",
|
||||
comma, tokenObjectEnd);
|
||||
}
|
||||
bool finalizeTokenOk = true;
|
||||
while (comma.type_ == tokenComment && finalizeTokenOk)
|
||||
finalizeTokenOk = readToken(comma);
|
||||
if (comma.type_ == tokenObjectEnd)
|
||||
return true;
|
||||
}
|
||||
@ -1533,10 +1512,7 @@ bool OurReader::readArray(Token& token) {
|
||||
|
||||
Token currentToken;
|
||||
// Accept Comment after last item in the array.
|
||||
ok = readToken(currentToken);
|
||||
while (currentToken.type_ == tokenComment && ok) {
|
||||
ok = readToken(currentToken);
|
||||
}
|
||||
ok = readTokenSkippingComments(currentToken);
|
||||
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
|
||||
currentToken.type_ != tokenArrayEnd);
|
||||
if (!ok || badTokenType) {
|
||||
|
4
test/data/fail_strict_comment_01.json
Normal file
4
test/data/fail_strict_comment_01.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"a": "aaa",
|
||||
"b": "bbb" // comments not allowed in strict mode
|
||||
}
|
4
test/data/fail_strict_comment_02.json
Normal file
4
test/data/fail_strict_comment_02.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"a": "aaa", // comments not allowed in strict mode
|
||||
"b": "bbb"
|
||||
}
|
3
test/data/fail_strict_comment_03.json
Normal file
3
test/data/fail_strict_comment_03.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"array" : [1, 2, 3 /* comments not allowed in strict mode */]
|
||||
}
|
1
test/data/fail_test_object_02.json
Normal file
1
test/data/fail_test_object_02.json
Normal file
@ -0,0 +1 @@
|
||||
{"one": 1 /* } */ { "two" : 2 }
|
@ -97,14 +97,17 @@ def runAllTests(jsontest_executable_path, input_dir = None,
|
||||
valgrind_path = use_valgrind and VALGRIND_CMD or ''
|
||||
for input_path in tests + test_jsonchecker:
|
||||
expect_failure = os.path.basename(input_path).startswith('fail')
|
||||
is_json_checker_test = (input_path in test_jsonchecker) or expect_failure
|
||||
is_json_checker_test = input_path in test_jsonchecker
|
||||
is_parse_only = is_json_checker_test or expect_failure
|
||||
is_strict_test = ('_strict_' in os.path.basename(input_path)) or is_json_checker_test
|
||||
print('TESTING:', input_path, end=' ')
|
||||
options = is_json_checker_test and '--json-checker' or ''
|
||||
options = is_parse_only and '--parse-only' or ''
|
||||
options += is_strict_test and ' --strict' or ''
|
||||
options += ' --json-writer %s'%writerClass
|
||||
cmd = '%s%s %s "%s"' % ( valgrind_path, jsontest_executable_path, options,
|
||||
input_path)
|
||||
status, process_output = getStatusOutput(cmd)
|
||||
if is_json_checker_test:
|
||||
if is_parse_only:
|
||||
if expect_failure:
|
||||
if not status:
|
||||
print('FAILED')
|
||||
|
Loading…
Reference in New Issue
Block a user