Improve performance for comment parsing (#1052)

* Improve performance for comment parsing

* Fix weird main.cpp issue

* Readd newline

* remove carriage return feed char

* Remove unnecessary checks
This commit is contained in:
Jordan Bayles 2019-10-17 10:43:25 -07:00 committed by GitHub
parent aebc7faa4f
commit a07b37e4ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 25 deletions

View File

@ -12,6 +12,7 @@
#endif // if !defined(JSON_IS_AMALGAMATION) #endif // if !defined(JSON_IS_AMALGAMATION)
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
#include <iostream>
#include <istream> #include <istream>
#include <limits> #include <limits>
#include <memory> #include <memory>
@ -942,7 +943,7 @@ private:
void skipSpaces(); void skipSpaces();
bool match(const Char* pattern, int patternLength); bool match(const Char* pattern, int patternLength);
bool readComment(); bool readComment();
bool readCStyleComment(); bool readCStyleComment(bool* containsNewLineResult);
bool readCppStyleComment(); bool readCppStyleComment();
bool readString(); bool readString();
bool readStringSingleQuote(); bool readStringSingleQuote();
@ -977,18 +978,20 @@ private:
static bool containsNewLine(Location begin, Location end); static bool containsNewLine(Location begin, Location end);
using Nodes = std::stack<Value*>; using Nodes = std::stack<Value*>;
Nodes nodes_;
Errors errors_; Nodes nodes_{};
String document_; Errors errors_{};
Location begin_; String document_{};
Location end_; Location begin_ = nullptr;
Location current_; Location end_ = nullptr;
Location lastValueEnd_; Location current_ = nullptr;
Value* lastValue_; Location lastValueEnd_ = nullptr;
String commentsBefore_; Value* lastValue_ = nullptr;
bool lastValueHasAComment_ = false;
String commentsBefore_{};
OurFeatures const features_; OurFeatures const features_;
bool collectComments_; bool collectComments_ = false;
}; // OurReader }; // OurReader
// complete copy of Read impl, for OurReader // complete copy of Read impl, for OurReader
@ -1001,9 +1004,7 @@ bool OurReader::containsNewLine(OurReader::Location begin,
return false; return false;
} }
OurReader::OurReader(OurFeatures const& features) OurReader::OurReader(OurFeatures const& features) : features_(features) {}
: begin_(), end_(), current_(), lastValueEnd_(), lastValue_(),
features_(features), collectComments_() {}
bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
bool collectComments) { bool collectComments) {
@ -1134,6 +1135,7 @@ bool OurReader::readValue() {
if (collectComments_) { if (collectComments_) {
lastValueEnd_ = current_; lastValueEnd_ = current_;
lastValueHasAComment_ = false;
lastValue_ = &currentValue(); lastValue_ = &currentValue();
} }
@ -1280,21 +1282,32 @@ bool OurReader::match(const Char* pattern, int patternLength) {
} }
bool OurReader::readComment() { bool OurReader::readComment() {
Location commentBegin = current_ - 1; const Location commentBegin = current_ - 1;
Char c = getNextChar(); const Char c = getNextChar();
bool successful = false; bool successful = false;
if (c == '*') bool cStyleWithEmbeddedNewline = false;
successful = readCStyleComment();
else if (c == '/') const bool isCStyleComment = (c == '*');
const bool isCppStyleComment = (c == '/');
if (isCStyleComment) {
successful = readCStyleComment(&cStyleWithEmbeddedNewline);
} else if (isCppStyleComment) {
successful = readCppStyleComment(); successful = readCppStyleComment();
}
if (!successful) if (!successful)
return false; return false;
if (collectComments_) { if (collectComments_) {
CommentPlacement placement = commentBefore; CommentPlacement placement = commentBefore;
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
if (c != '*' || !containsNewLine(commentBegin, current_)) if (!lastValueHasAComment_) {
placement = commentAfterOnSameLine; if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
placement = commentAfterOnSameLine;
lastValueHasAComment_ = true;
}
}
} }
addComment(commentBegin, current_, placement); addComment(commentBegin, current_, placement);
@ -1334,12 +1347,18 @@ void OurReader::addComment(Location begin, Location end,
} }
} }
bool OurReader::readCStyleComment() { bool OurReader::readCStyleComment(bool* containsNewLineResult) {
*containsNewLineResult = false;
while ((current_ + 1) < end_) { while ((current_ + 1) < end_) {
Char c = getNextChar(); Char c = getNextChar();
if (c == '*' && *current_ == '/') if (c == '*' && *current_ == '/') {
break; break;
} else if (c == '\n') {
*containsNewLineResult = true;
}
} }
return getNextChar() == '/'; return getNextChar() == '/';
} }

View File

@ -23,7 +23,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
return 0; return 0;
} }
uint32_t hash_settings = *(const uint32_t*)data; const uint32_t hash_settings = static_cast<uint32_t>(data[0]) |
(static_cast<uint32_t>(data[1]) << 8) |
(static_cast<uint32_t>(data[2]) << 16) |
(static_cast<uint32_t>(data[3]) << 24);
data += sizeof(uint32_t); data += sizeof(uint32_t);
size -= sizeof(uint32_t); size -= sizeof(uint32_t);
@ -36,6 +39,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
builder.settings_["failIfExtra_"] = hash_settings & (1 << 6); builder.settings_["failIfExtra_"] = hash_settings & (1 << 6);
builder.settings_["rejectDupKeys_"] = hash_settings & (1 << 7); builder.settings_["rejectDupKeys_"] = hash_settings & (1 << 7);
builder.settings_["allowSpecialFloats_"] = hash_settings & (1 << 8); builder.settings_["allowSpecialFloats_"] = hash_settings & (1 << 8);
builder.settings_["collectComments"] = hash_settings & (1 << 9);
std::unique_ptr<Json::CharReader> reader(builder.newCharReader()); std::unique_ptr<Json::CharReader> reader(builder.newCharReader());