From 5e3e68af2e456a3acaf9fb13a85e0173123ab9bb Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 16:41:19 -0600 Subject: [PATCH 1/8] OldReader copied from Reader --- src/lib_json/json_reader.cpp | 220 ++++++++++++++++++++++++++++++++++- 1 file changed, 217 insertions(+), 3 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 703a5c2..cd1b9cc 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -892,11 +892,225 @@ bool Reader::good() const { return !errors_.size(); } -class OldReader : public CharReader { +// exact copy of Reader, renamed to OldReader +class OldReader { +public: + typedef char Char; + typedef const Char* Location; + + /** \brief An error tagged with where in the JSON text it was encountered. + * + * The offsets give the [start, limit) range of bytes within the text. Note + * that this is bytes, not codepoints. + * + */ + struct StructuredError { + size_t offset_start; + size_t offset_limit; + std::string message; + }; + + /** \brief Constructs a Reader allowing all features + * for parsing. + */ + Reader(); + + /** \brief Constructs a Reader allowing the specified feature set + * for parsing. + */ + Reader(const Features& features); + + /** \brief Read a Value from a JSON + * document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them + * back during + * serialization, \c false to discard comments. + * This parameter is ignored if + * Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an + * error occurred. + */ + bool + parse(const std::string& document, Value& root, bool collectComments = true); + + /** \brief Read a Value from a JSON + document. + * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the + document to read. + * \param endDoc Pointer on the end of the UTF-8 encoded string of the + document to read. + * Must be >= beginDoc. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them + back during + * serialization, \c false to discard comments. + * This parameter is ignored if + Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an + error occurred. + */ + bool parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments = true); + + /// \brief Parse from input stream. + /// \see Json::operator>>(std::istream&, Json::Value&). + bool parse(std::istream& is, Value& root, bool collectComments = true); + + /** \brief Returns a user friendly string that list errors in the parsed + * document. + * \return Formatted error message with the list of errors with their location + * in + * the parsed document. An empty string is returned if no error + * occurred + * during parsing. + * \deprecated Use getFormattedErrorMessages() instead (typo fix). + */ + JSONCPP_DEPRECATED("Use getFormattedErrorMessages instead") + std::string getFormatedErrorMessages() const; + + /** \brief Returns a user friendly string that list errors in the parsed + * document. + * \return Formatted error message with the list of errors with their location + * in + * the parsed document. An empty string is returned if no error + * occurred + * during parsing. + */ + std::string getFormattedErrorMessages() const; + + /** \brief Returns a vector of structured erros encounted while parsing. + * \return A (possibly empty) vector of StructuredError objects. Currently + * only one error can be returned, but the caller should tolerate + * multiple + * errors. This can occur if the parser recovers from a non-fatal + * parse error and then encounters additional errors. + */ + std::vector getStructuredErrors() const; + + /** \brief Add a semantic error message. + * \param value JSON Value location associated with the error + * \param message The error message. + * \return \c true if the error was successfully added, \c false if the + * Value offset exceeds the document size. + */ + bool pushError(const Value& value, const std::string& message); + + /** \brief Add a semantic error message with extra context. + * \param value JSON Value location associated with the error + * \param message The error message. + * \param extra Additional JSON Value location to contextualize the error + * \return \c true if the error was successfully added, \c false if either + * Value offset exceeds the document size. + */ + bool pushError(const Value& value, const std::string& message, const Value& extra); + + /** \brief Return whether there are any errors. + * \return \c true if there are no errors to report \c false if + * errors have occurred. + */ + bool good() const; + +private: + enum TokenType { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo { + public: + Token token_; + std::string message_; + Location extra_; + }; + + typedef std::deque Errors; + + bool readToken(Token& token); + void skipSpaces(); + bool match(Location pattern, int patternLength); + bool readComment(); + bool readCStyleComment(); + bool readCppStyleComment(); + bool readString(); + void readNumber(); + bool readValue(); + bool readObject(Token& token); + bool readArray(Token& token); + bool decodeNumber(Token& token); + bool decodeNumber(Token& token, Value& decoded); + bool decodeString(Token& token); + bool decodeString(Token& token, std::string& decoded); + bool decodeDouble(Token& token); + bool decodeDouble(Token& token, Value& decoded); + bool decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool addError(const std::string& message, Token& token, Location extra = 0); + bool recoverFromError(TokenType skipUntilToken); + bool addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken); + void skipUntilSpace(); + Value& currentValue(); + Char getNextChar(); + void + getLocationLineAndColumn(Location location, int& line, int& column) const; + std::string getLocationLineAndColumn(Location location) const; + void addComment(Location begin, Location end, CommentPlacement placement); + void skipCommentTokens(Token& token); + + typedef std::stack Nodes; + Nodes nodes_; + Errors errors_; + std::string document_; + Location begin_; + Location end_; + Location current_; + Location lastValueEnd_; + Value* lastValue_; + std::string commentsBefore_; + Features features_; + bool collectComments_; +}; // Reader + + +class OldCharReader : public CharReader { bool const collectComments_; Reader reader_; public: - OldReader( + OldCharReader( bool collectComments, Features const& features) : collectComments_(collectComments) @@ -930,7 +1144,7 @@ CharReader* CharReaderBuilder::newCharReader() const features.strictRoot_ = settings_["strictRoot"].asBool(); features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool(); features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool(); - return new OldReader(collectComments, features); + return new OldCharReader(collectComments, features); } static void getValidReaderKeys(std::set* valid_keys) { From 7477bcfa3a4836e8cd3837f022c91e7d3fdb5a14 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 16:44:14 -0600 Subject: [PATCH 2/8] renames for OldReader --- src/lib_json/json_reader.cpp | 116 ++--------------------------------- 1 file changed, 6 insertions(+), 110 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index cd1b9cc..42fd5ae 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -897,125 +897,21 @@ class OldReader { public: typedef char Char; typedef const Char* Location; - - /** \brief An error tagged with where in the JSON text it was encountered. - * - * The offsets give the [start, limit) range of bytes within the text. Note - * that this is bytes, not codepoints. - * - */ struct StructuredError { size_t offset_start; size_t offset_limit; std::string message; }; - /** \brief Constructs a Reader allowing all features - * for parsing. - */ - Reader(); - - /** \brief Constructs a Reader allowing the specified feature set - * for parsing. - */ - Reader(const Features& features); - - /** \brief Read a Value from a JSON - * document. - * \param document UTF-8 encoded string containing the document to read. - * \param root [out] Contains the root value of the document if it was - * successfully parsed. - * \param collectComments \c true to collect comment and allow writing them - * back during - * serialization, \c false to discard comments. - * This parameter is ignored if - * Features::allowComments_ - * is \c false. - * \return \c true if the document was successfully parsed, \c false if an - * error occurred. - */ - bool - parse(const std::string& document, Value& root, bool collectComments = true); - - /** \brief Read a Value from a JSON - document. - * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the - document to read. - * \param endDoc Pointer on the end of the UTF-8 encoded string of the - document to read. - * Must be >= beginDoc. - * \param root [out] Contains the root value of the document if it was - * successfully parsed. - * \param collectComments \c true to collect comment and allow writing them - back during - * serialization, \c false to discard comments. - * This parameter is ignored if - Features::allowComments_ - * is \c false. - * \return \c true if the document was successfully parsed, \c false if an - error occurred. - */ + OldReader(OldFeatures const* features); bool parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments = true); - - /// \brief Parse from input stream. - /// \see Json::operator>>(std::istream&, Json::Value&). - bool parse(std::istream& is, Value& root, bool collectComments = true); - - /** \brief Returns a user friendly string that list errors in the parsed - * document. - * \return Formatted error message with the list of errors with their location - * in - * the parsed document. An empty string is returned if no error - * occurred - * during parsing. - * \deprecated Use getFormattedErrorMessages() instead (typo fix). - */ - JSONCPP_DEPRECATED("Use getFormattedErrorMessages instead") - std::string getFormatedErrorMessages() const; - - /** \brief Returns a user friendly string that list errors in the parsed - * document. - * \return Formatted error message with the list of errors with their location - * in - * the parsed document. An empty string is returned if no error - * occurred - * during parsing. - */ std::string getFormattedErrorMessages() const; - - /** \brief Returns a vector of structured erros encounted while parsing. - * \return A (possibly empty) vector of StructuredError objects. Currently - * only one error can be returned, but the caller should tolerate - * multiple - * errors. This can occur if the parser recovers from a non-fatal - * parse error and then encounters additional errors. - */ std::vector getStructuredErrors() const; - - /** \brief Add a semantic error message. - * \param value JSON Value location associated with the error - * \param message The error message. - * \return \c true if the error was successfully added, \c false if the - * Value offset exceeds the document size. - */ bool pushError(const Value& value, const std::string& message); - - /** \brief Add a semantic error message with extra context. - * \param value JSON Value location associated with the error - * \param message The error message. - * \param extra Additional JSON Value location to contextualize the error - * \return \c true if the error was successfully added, \c false if either - * Value offset exceeds the document size. - */ bool pushError(const Value& value, const std::string& message, const Value& extra); - - /** \brief Return whether there are any errors. - * \return \c true if there are no errors to report \c false if - * errors have occurred. - */ bool good() const; private: @@ -1101,20 +997,20 @@ private: Location lastValueEnd_; Value* lastValue_; std::string commentsBefore_; - Features features_; + OldFeatures features_; bool collectComments_; -}; // Reader +}; // OldReader class OldCharReader : public CharReader { bool const collectComments_; - Reader reader_; + OldReader reader_; public: OldCharReader( bool collectComments, - Features const& features) + Features const* features) : collectComments_(collectComments) - , reader_(features) + , reader_(&features) {} virtual bool parse( char const* beginDoc, char const* endDoc, From 6123bd1505e279184f633aa5fedfbacf5e4e8622 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 16:50:52 -0600 Subject: [PATCH 3/8] copy Reader impl to OldReader --- src/lib_json/json_reader.cpp | 785 +++++++++++++++++++++++++++++++++++ 1 file changed, 785 insertions(+) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 42fd5ae..8aab434 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -915,6 +915,9 @@ public: bool good() const; private: + OldReader(OldReader const&); // no impl + void operator=(OldReader const&); // no impl + enum TokenType { tokenEndOfStream = 0, tokenObjectBegin, @@ -1001,6 +1004,788 @@ private: bool collectComments_; }; // OldReader +// complete copy of Read impl, for OldReader + +OldReader::Reader(const Features& features) + : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), + lastValue_(), commentsBefore_(), features_(features), collectComments_() { +} + +bool OldReader::parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = 0; + lastValue_ = 0; + commentsBefore_ = ""; + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + bool successful = readValue(); + Token token; + skipCommentTokens(token); + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool OldReader::readValue() { + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_ = ""; + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: + { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenFalse: + { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenNull: + { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenArraySeparator: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } + // Else, fall through... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + return successful; +} + +void OldReader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool OldReader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return true; +} + +void OldReader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +bool OldReader::match(Location pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool OldReader::readComment() { + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if (c == '*') + successful = readCStyleComment(); + else if (c == '/') + successful = readCppStyleComment(); + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (c != '*' || !containsNewLine(commentBegin, current_)) + placement = commentAfterOnSameLine; + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +void +OldReader::addComment(Location begin, Location end, CommentPlacement placement) { + assert(collectComments_); + const std::string& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != 0); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool OldReader::readCStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + } + return getNextChar() == '/'; +} + +bool OldReader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} + +void OldReader::readNumber() { + const char *p = current_; + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : 0; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } +} + +bool OldReader::readString() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + +bool OldReader::readObject(Token& tokenStart) { + Token tokenName; + std::string name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object + return true; + name = ""; + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover( + "Missing ':' after object member name", colon, tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover( + "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover( + "Missing '}' or object member name", tokenName, tokenObjectEnd); +} + +bool OldReader::readArray(Token& tokenStart) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + skipSpaces(); + if (*current_ == ']') // empty array + { + Token endArray; + readToken(endArray); + return true; + } + int index = 0; + for (;;) { + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token token; + // Accept Comment after last item in the array. + ok = readToken(token); + while (token.type_ == tokenComment && ok) { + ok = readToken(token); + } + bool badTokenType = + (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover( + "Missing ',' or ']' in array declaration", token, tokenArrayEnd); + } + if (token.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool OldReader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OldReader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + bool isNegative = *current == '-'; + if (isNegative) + ++current; + // TODO: Help the compiler do the div and mod at compile time or get rid of them. + Value::LargestUInt maxIntegerValue = + isNegative ? Value::LargestUInt(-Value::minLargestInt) + : Value::maxLargestUInt; + Value::LargestUInt threshold = maxIntegerValue / 10; + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + Value::UInt digit(c - '0'); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, b) this is the last digit, and + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > maxIntegerValue % 10) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + if (isNegative) + decoded = -Value::LargestInt(value); + else if (value <= Value::LargestUInt(Value::maxInt)) + decoded = Value::LargestInt(value); + else + decoded = value; + return true; +} + +bool OldReader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OldReader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + const int bufferSize = 32; + int count; + int length = int(token.end_ - token.start_); + + // Sanity check to avoid buffer overflow exploits. + if (length < 0) { + return addError("Unable to parse token length", token); + } + + // Avoid using a string constant for the format control string given to + // sscanf, as this can cause hard to debug crashes on OS X. See here for more + // info: + // + // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html + char format[] = "%lf"; + + if (length <= bufferSize) { + Char buffer[bufferSize + 1]; + memcpy(buffer, token.start_, length); + buffer[length] = 0; + count = sscanf(buffer, format, &value); + } else { + std::string buffer(token.start_, token.end_); + count = sscanf(buffer.c_str(), format, &value); + } + + if (count != 1) + return addError("'" + std::string(token.start_, token.end_) + + "' is not a number.", + token); + decoded = value; + return true; +} + +bool OldReader::decodeString(Token& token) { + std::string decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OldReader::decodeString(Token& token, std::string& decoded) { + decoded.reserve(token.end_ - token.start_ - 2); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + else if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool OldReader::decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, + current); + unsigned int surrogatePair; + if (*(current++) == '\\' && *(current++) == 'u') { + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, + current); + } + return true; +} + +bool OldReader::decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", + token, + current); + unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, + current); + } + return true; +} + +bool +OldReader::addError(const std::string& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool OldReader::recoverFromError(TokenType skipUntilToken) { + int errorCount = int(errors_.size()); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool OldReader::addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& OldReader::currentValue() { return *(nodes_.top()); } + +OldReader::Char OldReader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void OldReader::getLocationLineAndColumn(Location location, + int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +std::string OldReader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) +#if defined(WINCE) + _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#else + sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif +#else + snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif + return buffer; +} + +// Deprecated. Preserved for backward compatibility +std::string OldReader::getFormatedErrorMessages() const { + return getFormattedErrorMessages(); +} + +std::string OldReader::getFormattedErrorMessages() const { + std::string formattedMessage; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector OldReader::getStructuredErrors() const { + std::vector allErrors; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + OldReader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +bool OldReader::pushError(const Value& value, const std::string& message) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = end_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = 0; + errors_.push_back(info); + return true; +} + +bool OldReader::pushError(const Value& value, const std::string& message, const Value& extra) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length + || extra.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = begin_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = begin_ + extra.getOffsetStart(); + errors_.push_back(info); + return true; +} + +bool OldReader::good() const { + return !errors_.size(); +} + class OldCharReader : public CharReader { bool const collectComments_; From 435d2a2f8d7160089536f4e5c7d7f1a836b1cde3 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 16:56:49 -0600 Subject: [PATCH 4/8] passes --- src/lib_json/json_reader.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 8aab434..6490822 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -903,7 +903,7 @@ public: std::string message; }; - OldReader(OldFeatures const* features); + OldReader(Features const& features); bool parse(const char* beginDoc, const char* endDoc, Value& root, @@ -1000,13 +1000,13 @@ private: Location lastValueEnd_; Value* lastValue_; std::string commentsBefore_; - OldFeatures features_; + Features features_; bool collectComments_; }; // OldReader // complete copy of Read impl, for OldReader -OldReader::Reader(const Features& features) +OldReader::OldReader(Features const& features) : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(features), collectComments_() { } @@ -1711,11 +1711,6 @@ std::string OldReader::getLocationLineAndColumn(Location location) const { return buffer; } -// Deprecated. Preserved for backward compatibility -std::string OldReader::getFormatedErrorMessages() const { - return getFormattedErrorMessages(); -} - std::string OldReader::getFormattedErrorMessages() const { std::string formattedMessage; for (Errors::const_iterator itError = errors_.begin(); @@ -1793,9 +1788,9 @@ class OldCharReader : public CharReader { public: OldCharReader( bool collectComments, - Features const* features) + Features const& features) : collectComments_(collectComments) - , reader_(&features) + , reader_(features) {} virtual bool parse( char const* beginDoc, char const* endDoc, From 052050df079c31b7374c27907c763f0633e84767 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 17:00:34 -0600 Subject: [PATCH 5/8] copy Features to OldFeatures --- src/lib_json/json_reader.cpp | 43 +++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 6490822..51d0feb 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -892,6 +892,39 @@ bool Reader::good() const { return !errors_.size(); } +// exact copy of Features +class OldFeatures { +public: + static OldFeatures all(); + static OldFeatures strictMode(); + OldFeatures(); + bool allowComments_; + bool strictRoot_; + bool allowDroppedNullPlaceholders_; + bool allowNumericKeys_; +}; // OldFeatures + +// exact copy of Implementation of class Features +// //////////////////////////////// + +OldFeatures::OldFeatures() + : allowComments_(true), strictRoot_(false), + allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {} + +OldFeatures OldFeatures::all() { return OldFeatures(); } + +OldFeatures OldFeatures::strictMode() { + OldFeatures features; + features.allowComments_ = false; + features.strictRoot_ = true; + features.allowDroppedNullPlaceholders_ = false; + features.allowNumericKeys_ = false; + return features; +} + +// Implementation of class Reader +// //////////////////////////////// + // exact copy of Reader, renamed to OldReader class OldReader { public: @@ -903,7 +936,7 @@ public: std::string message; }; - OldReader(Features const& features); + OldReader(OldFeatures const& features); bool parse(const char* beginDoc, const char* endDoc, Value& root, @@ -1000,13 +1033,13 @@ private: Location lastValueEnd_; Value* lastValue_; std::string commentsBefore_; - Features features_; + OldFeatures features_; bool collectComments_; }; // OldReader // complete copy of Read impl, for OldReader -OldReader::OldReader(Features const& features) +OldReader::OldReader(OldFeatures const& features) : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(features), collectComments_() { } @@ -1788,7 +1821,7 @@ class OldCharReader : public CharReader { public: OldCharReader( bool collectComments, - Features const& features) + OldFeatures const& features) : collectComments_(collectComments) , reader_(features) {} @@ -1815,7 +1848,7 @@ CharReader* CharReaderBuilder::newCharReader() const // TODO: Maybe serialize the invalid settings into the exception. bool collectComments = settings_["collectComments"].asBool(); - Features features = Features::all(); + OldFeatures features = OldFeatures::all(); features.allowComments_ = settings_["allowComments"].asBool(); features.strictRoot_ = settings_["strictRoot"].asBool(); features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool(); From 07f0e9308de56f52caa76eae0ab517b6fd47699c Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 20:45:42 -0600 Subject: [PATCH 6/8] nullRef, since we had to add that kludge to 0.8.0 --- include/json/value.h | 3 ++- src/lib_json/json_value.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/json/value.h b/include/json/value.h index efc34ac..0c507a3 100644 --- a/include/json/value.h +++ b/include/json/value.h @@ -133,7 +133,8 @@ public: typedef Json::LargestUInt LargestUInt; typedef Json::ArrayIndex ArrayIndex; - static const Value& null; + static const Value& null; ///! We regret this reference to a global instance; prefer the simpler Value(). + static const Value& nullRef; ///! just a kludge for binary-compatibility; same as null /// Minimum signed integer value that can be stored in a Json::Value. static const LargestInt minLargestInt; /// Maximum signed integer value that can be stored in a Json::Value. diff --git a/src/lib_json/json_value.cpp b/src/lib_json/json_value.cpp index ed5aafe..f9139c7 100644 --- a/src/lib_json/json_value.cpp +++ b/src/lib_json/json_value.cpp @@ -36,6 +36,7 @@ namespace Json { static const unsigned char ALIGNAS(8) kNull[sizeof(Value)] = { 0 }; const unsigned char& kNullRef = kNull[0]; const Value& Value::null = reinterpret_cast(kNullRef); +const Value& Value::nullRef = null; const Int Value::minInt = Int(~(UInt(-1) / 2)); const Int Value::maxInt = Int(UInt(-1) / 2); From 5a744708fc4348ab00e16ccb48a0277d10d37ac8 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 21:15:43 -0600 Subject: [PATCH 7/8] enableYAMLCompatibility and dropNullPlaceholders for StreamWriterBuilder --- include/json/writer.h | 9 ++++++++- src/lib_json/json_writer.cpp | 13 ++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/json/writer.h b/include/json/writer.h index 2b081a7..4951ff4 100644 --- a/include/json/writer.h +++ b/include/json/writer.h @@ -90,8 +90,15 @@ public: // without a major version bump. /** Configuration of this builder. Available settings (case-sensitive): - - "commentStyle": "None", "Some", or "All" + - "commentStyle": "None" or "All" - "indentation": "" + - "enableYAMLCompatibility": False or True + - slightly change the whitespace around colons + - "dropNullPlaceholders": False or True + - Drop the "null" string from the writer's output for nullValues. + Strictly speaking, this is not valid JSON. But when the output is being + fed to a browser's Javascript, it makes for smaller output and the + browser can handle the output just fine. You can examine 'settings_` yourself to see the defaults. You can also write and read them just like any diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp index 27d5f56..372e945 100644 --- a/src/lib_json/json_writer.cpp +++ b/src/lib_json/json_writer.cpp @@ -973,6 +973,8 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const std::string indentation = settings_["indentation"].asString(); std::string cs_str = settings_["commentStyle"].asString(); + bool eyc = settings_["enableYAMLCompatibility"].asBool(); + bool dnp = settings_["dropNullPlaceholders"].asBool(); CommentStyle::Enum cs = CommentStyle::All; if (cs_str == "All") { cs = CommentStyle::All; @@ -982,10 +984,15 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const return NULL; } std::string colonSymbol = " : "; - if (indentation.empty()) { + if (eyc) { + colonSymbol = ": "; + } else if (indentation.empty()) { colonSymbol = ":"; } std::string nullSymbol = "null"; + if (dnp) { + nullSymbol = ""; + } std::string endingLineFeedSymbol = ""; return new BuiltStyledStreamWriter( indentation, cs, @@ -996,6 +1003,8 @@ static void getValidWriterKeys(std::set* valid_keys) valid_keys->clear(); valid_keys->insert("indentation"); valid_keys->insert("commentStyle"); + valid_keys->insert("enableYAMLCompatibility"); + valid_keys->insert("dropNullPlaceholders"); } bool StreamWriterBuilder::validate(Json::Value* invalid) const { @@ -1021,6 +1030,8 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) //! [StreamWriterBuilderDefaults] (*settings)["commentStyle"] = "All"; (*settings)["indentation"] = "\t"; + (*settings)["enableYAMLCompatibility"] = false; + (*settings)["dropNullPlaceholders"] = false; //! [StreamWriterBuilderDefaults] } From 20d09676c26ae1a669f04a6f1c764b9dadb8fb82 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Tue, 10 Feb 2015 21:29:35 -0600 Subject: [PATCH 8/8] drop experimental OldCompressingStreamWriterBuilder --- include/json/writer.h | 50 +----------------------------------- src/lib_json/json_writer.cpp | 21 --------------- 2 files changed, 1 insertion(+), 70 deletions(-) diff --git a/include/json/writer.h b/include/json/writer.h index 4951ff4..9f51d07 100644 --- a/include/json/writer.h +++ b/include/json/writer.h @@ -127,54 +127,6 @@ public: static void setDefaults(Json::Value* settings); }; -/** \brief Build a StreamWriter implementation. - * Comments are not written, and most whitespace is omitted. - * In addition, there are some special settings to allow compatibility - * with the old FastWriter. - * Usage: - * \code - * OldCompressingStreamWriterBuilder b; - * b.dropNullPlaceHolders_ = true; // etc. - * StreamWriter* w = b.newStreamWriter(); - * w->write(value, &std::cout); - * delete w; - * \endcode - * - * \deprecated Use StreamWriterBuilder - */ -class JSON_API OldCompressingStreamWriterBuilder : public StreamWriter::Factory -{ -public: - // Note: We cannot add data-members to this class without a major version bump. - // So these might as well be completely exposed. - - /** \brief Drop the "null" string from the writer's output for nullValues. - * Strictly speaking, this is not valid JSON. But when the output is being - * fed to a browser's Javascript, it makes for smaller output and the - * browser can handle the output just fine. - */ - bool dropNullPlaceholders_; - /** \brief Do not add \n at end of document. - * Normally, we add an extra newline, just because. - */ - bool omitEndingLineFeed_; - /** \brief Add a space after ':'. - * If indentation is non-empty, we surround colon with whitespace, - * e.g. " : " - * This will add back the trailing space when there is no indentation. - * This seems dubious when the entire document is on a single line, - * but we leave this here to repduce the behavior of the old `FastWriter`. - */ - bool enableYAMLCompatibility_; - - OldCompressingStreamWriterBuilder() - : dropNullPlaceholders_(false) - , omitEndingLineFeed_(false) - , enableYAMLCompatibility_(false) - {} - virtual StreamWriter* newStreamWriter() const; -}; - /** \brief Abstract class for writers. * \deprecated Use StreamWriter. */ @@ -192,7 +144,7 @@ public: *consumption, * but may be usefull to support feature such as RPC where bandwith is limited. * \sa Reader, Value - * \deprecated Use OldCompressingStreamWriterBuilder. + * \deprecated Use StreamWriterBuilder. */ class JSON_API FastWriter : public Writer { public: diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp index 372e945..7cf46f4 100644 --- a/src/lib_json/json_writer.cpp +++ b/src/lib_json/json_writer.cpp @@ -1035,27 +1035,6 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) //! [StreamWriterBuilderDefaults] } -StreamWriter* OldCompressingStreamWriterBuilder::newStreamWriter() const -{ - std::string colonSymbol = " : "; - if (enableYAMLCompatibility_) { - colonSymbol = ": "; - } else { - colonSymbol = ":"; - } - std::string nullSymbol = "null"; - if (dropNullPlaceholders_) { - nullSymbol = ""; - } - std::string endingLineFeedSymbol = "\n"; - if (omitEndingLineFeed_) { - endingLineFeedSymbol = ""; - } - return new BuiltStyledStreamWriter( - "", CommentStyle::None, - colonSymbol, nullSymbol, endingLineFeedSymbol); -} - std::string writeString(StreamWriter::Factory const& builder, Value const& root) { std::ostringstream sout; StreamWriterPtr const writer(builder.newStreamWriter());