From c28610fb5d1c0fa7b2e47d888a4e262edbd59c05 Mon Sep 17 00:00:00 2001 From: Christopher Dunn Date: Sat, 21 Feb 2015 11:44:16 -0600 Subject: [PATCH] fix StaticString test * support zeroes in string_ * support zeroes in writer; provide getString(char**, unsigned*) * valueToQuotedStringN(), isCC0(), etc * allow zeroes for cpptl ConstString * allocated => non-static --- include/json/value.h | 68 ++++++---- src/lib_json/json_value.cpp | 185 ++++++++++++++++++++-------- src/lib_json/json_valueiterator.inl | 22 +++- src/lib_json/json_writer.cpp | 110 ++++++++++++++++- 4 files changed, 304 insertions(+), 81 deletions(-) diff --git a/include/json/value.h b/include/json/value.h index 229aa9e..04dc87d 100644 --- a/include/json/value.h +++ b/include/json/value.h @@ -99,22 +99,21 @@ private: * The type of the held value is represented by a #ValueType and * can be obtained using type(). * - * values of an #objectValue or #arrayValue can be accessed using operator[]() - *methods. - * Non const methods will automatically create the a #nullValue element + * Values of an #objectValue or #arrayValue can be accessed using operator[]() + * methods. + * Non-const methods will automatically create the a #nullValue element * if it does not exist. - * The sequence of an #arrayValue will be automatically resize and initialized + * The sequence of an #arrayValue will be automatically resized and initialized * with #nullValue. resize() can be used to enlarge or truncate an #arrayValue. * - * The get() methods can be used to obtanis default value in the case the - *required element - * does not exist. + * The get() methods can be used to obtain default value in the case the + * required element does not exist. * * It is possible to iterate over the list of a #objectValue values using * the getMemberNames() method. * - * \note Value string-length fit in size_t, but keys must fit in unsigned. - * (The reason is an implementation detail.) The readers will raise an + * \note #Value string-length fit in size_t, but keys must be < 2^30. + * (The reason is an implementation detail.) A #CharReader will raise an * exception if a bound is exceeded to avoid security holes in your app, * but the Value API does *not* check bounds. That is the responsibility * of the caller. @@ -170,24 +169,27 @@ private: duplicateOnCopy }; CZString(ArrayIndex index); - CZString(char const* cstr, unsigned length, DuplicationPolicy allocate); - CZString(const CZString& other); + CZString(char const* str, unsigned length, DuplicationPolicy allocate); + CZString(CZString const& other); ~CZString(); CZString& operator=(CZString other); - bool operator<(const CZString& other) const; - bool operator==(const CZString& other) const; + bool operator<(CZString const& other) const; + bool operator==(CZString const& other) const; ArrayIndex index() const; - const char* c_str() const; + //const char* c_str() const; ///< \deprecated + char const* data() const; + unsigned length() const; bool isStaticString() const; private: void swap(CZString& other); + struct StringStorage { DuplicationPolicy policy_: 2; unsigned length_: 30; // 1GB max }; - const char* cstr_; + char const* cstr_; // actually, a prefixed string, unless policy is noDup union { ArrayIndex index_; StringStorage storage_; @@ -233,9 +235,14 @@ Json::Value obj_value(Json::objectValue); // {} * Like other value string constructor but do not duplicate the string for * internal storage. The given string must remain alive after the call to this * constructor. + * \note This works only for null-terminated strings. (We cannot change the + * size of this class, so we have nowhere to store the length, + * which might be computed later for various operations.) + * * Example of usage: * \code - * Json::Value aValue( StaticString("some text") ); + * static StaticString foo("some text"); + * Json::Value aValue(foo); * \endcode */ Value(const StaticString& value); @@ -268,6 +275,11 @@ Json::Value obj_value(Json::objectValue); // {} const char* asCString() const; ///! Embedded zeroes could cause you trouble! std::string asString() const; ///! Embedded zeroes are possible. + /** Get raw char* of string-value. + * \return false if !string. (Seg-fault if str or end are NULL.) + */ + bool getString( + char const** str, char const** end) const; #ifdef JSON_USE_CPPTL CppTL::ConstString asConstString() const; #endif @@ -374,8 +386,6 @@ Json::Value obj_value(Json::objectValue); // {} /** \brief Access an object value by name, create a null member if it does not exist. - * \param key may contain embedded nulls. - * * If the object has no entry for that name, then the member name used to store * the new entry is not duplicated. * Example of use: @@ -409,6 +419,10 @@ Json::Value obj_value(Json::objectValue); // {} /// and operator[]const /// \note As stated elsewhere, behavior is undefined if (end-key) >= 2^30 Value const* find(char const* key, char const* end) const; + /// Most general and efficient version of object-mutators. + /// \note As stated elsewhere, behavior is undefined if (end-key) >= 2^30 + /// \return non-zero, but JSON_ASSERT if this is neither object nor nullValue. + Value const* demand(char const* key, char const* end); /// \brief Remove and return the named member. /// /// Do nothing if it did not exist. @@ -442,6 +456,7 @@ Json::Value obj_value(Json::objectValue); // {} bool removeIndex(ArrayIndex i, Value* removed); /// Return true if the object has a member named key. + /// \note 'key' must be null-terminated. bool isMember(const char* key) const; /// Return true if the object has a member named key. /// \param key may contain embedded nulls. @@ -493,7 +508,8 @@ Json::Value obj_value(Json::objectValue); // {} private: void initBasic(ValueType type, bool allocated = false); - Value& resolveReference(const char* key, bool isStatic); + Value& resolveReference(const char* key); + Value& resolveReference(const char* key, const char* end); struct CommentInfo { CommentInfo(); @@ -518,11 +534,12 @@ private: LargestUInt uint_; double real_; bool bool_; - char* string_; // actually ptr to unsigned, followed by str + char* string_; // actually ptr to unsigned, followed by str, unless !allocated_ ObjectValues* map_; } value_; ValueType type_ : 8; unsigned int allocated_ : 1; // Notes: if declared as bool, bitfield is useless. + // If not allocated_, string_ must be null-terminated. CommentInfo* comments_; // [start, limit) byte offsets in the source JSON text from which this Value @@ -624,7 +641,12 @@ public: /// Return the member name of the referenced Value. "" if it is not an /// objectValue. - const char* memberName() const; + /// \deprecated This cannot be used for UTF-8 strings, since there can be embedded nulls. + char const* memberName() const; + /// Return the member name of the referenced Value, or NULL if it is not an + /// objectValue. + /// Better version than memberName(). Allows embedded nulls. + char const* memberName(char const** end) const; protected: Value& deref() const; @@ -653,8 +675,8 @@ class JSON_API ValueConstIterator : public ValueIteratorBase { public: typedef const Value value_type; - typedef unsigned int size_t; - typedef int difference_type; + //typedef unsigned int size_t; + //typedef int difference_type; typedef const Value& reference; typedef const Value* pointer; typedef ValueConstIterator SelfType; diff --git a/src/lib_json/json_value.cpp b/src/lib_json/json_value.cpp index eae87db..3452ba4 100644 --- a/src/lib_json/json_value.cpp +++ b/src/lib_json/json_value.cpp @@ -102,27 +102,38 @@ static inline char* duplicateStringValue(const char* value, /* Record the length as a prefix. */ -static inline char* duplicatePrefixedStringValue( +static inline char* duplicateAndPrefixStringValue( const char* value, - unsigned int length = unknown) + unsigned int length) { - if (length == unknown) - length = (unsigned int)strlen(value); - // Avoid an integer overflow in the call to malloc below by limiting length // to a sane value. - if (length >= (unsigned)Value::maxInt) - length = Value::maxInt - 1; - - char* newString = static_cast(malloc(length + 1)); + JSON_ASSERT_MESSAGE(length <= (unsigned)Value::maxInt - sizeof(unsigned) - 1U, + "in Json::Value::duplicateAndPrefixStringValue(): " + "length too big for prefixing"); + unsigned actualLength = length + sizeof(unsigned) + 1U; + char* newString = static_cast(malloc(actualLength)); JSON_ASSERT_MESSAGE(newString != 0, - "in Json::Value::duplicateStringValue(): " + "in Json::Value::duplicateAndPrefixStringValue(): " "Failed to allocate string value buffer"); - memcpy(newString, value, length); - newString[length] = 0; + *reinterpret_cast(newString) = length; + memcpy(newString + sizeof(unsigned), value, length); + newString[actualLength - 1U] = 0; // to avoid buffer over-run accidents by users later return newString; } -/** Free the string duplicated by duplicateStringValue(). +inline static void decodePrefixedString( + bool isPrefixed, char const* prefixed, + unsigned* length, char const** value) +{ + if (!isPrefixed) { + *length = strlen(prefixed); + *value = prefixed; + } else { + *length = *reinterpret_cast(prefixed); + *value = prefixed + sizeof(unsigned); + } +} +/** Free the string duplicated by duplicateStringValue()/duplicateAndPrefixStringValue(). */ static inline void releaseStringValue(char* value) { free(value); } @@ -241,8 +252,9 @@ bool Value::CZString::operator==(const CZString& other) const { ArrayIndex Value::CZString::index() const { return index_; } -const char* Value::CZString::c_str() const { return cstr_; } - +//const char* Value::CZString::c_str() const { return cstr_; } +const char* Value::CZString::data() const { return cstr_; } +unsigned Value::CZString::length() const { return storage_.length_; } bool Value::CZString::isStaticString() const { return storage_.policy_ == noDuplication; } // ////////////////////////////////////////////////////////////////// @@ -311,19 +323,19 @@ Value::Value(double value) { Value::Value(const char* value) { initBasic(stringValue, true); - value_.string_ = duplicateStringValue(value); + value_.string_ = duplicateAndPrefixStringValue(value, strlen(value)); } Value::Value(const char* beginValue, const char* endValue) { initBasic(stringValue, true); value_.string_ = - duplicateStringValue(beginValue, (unsigned int)(endValue - beginValue)); + duplicateAndPrefixStringValue(beginValue, (unsigned int)(endValue - beginValue)); } Value::Value(const std::string& value) { initBasic(stringValue, true); value_.string_ = - duplicateStringValue(value.c_str(), (unsigned int)value.length()); + duplicateAndPrefixStringValue(value.data(), (unsigned int)value.length()); } Value::Value(const StaticString& value) { @@ -334,7 +346,7 @@ Value::Value(const StaticString& value) { #ifdef JSON_USE_CPPTL Value::Value(const CppTL::ConstString& value) { initBasic(stringValue, true); - value_.string_ = duplicateStringValue(value, value.length()); + value_.string_ = duplicateAndPrefixStringValue(value, value.length()); } #endif @@ -357,7 +369,11 @@ Value::Value(const Value& other) break; case stringValue: if (other.value_.string_) { - value_.string_ = duplicateStringValue(other.value_.string_); + unsigned len; + char const* str; + decodePrefixedString(other.allocated_, other.value_.string_, + &len, &str); + value_.string_ = duplicateAndPrefixStringValue(str, len); allocated_ = true; } else { value_.string_ = 0; @@ -454,9 +470,23 @@ bool Value::operator<(const Value& other) const { case booleanValue: return value_.bool_ < other.value_.bool_; case stringValue: - return (value_.string_ == 0 && other.value_.string_) || - (other.value_.string_ && value_.string_ && - strcmp(value_.string_, other.value_.string_) < 0); + { + if ((value_.string_ == 0) || (other.value_.string_ == 0)) { + if (other.value_.string_) return true; + else return false; + } + unsigned this_len; + unsigned other_len; + char const* this_str; + char const* other_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + decodePrefixedString(other.allocated_, other.value_.string_, &other_len, &other_str); + unsigned min_len = std::min(this_len, other_len); + int comp = memcmp(this_str, other_str, min_len); + if (comp < 0) return true; + if (comp > 0) return false; + return (this_len < other_len); + } case arrayValue: case objectValue: { int delta = int(value_.map_->size() - other.value_.map_->size()); @@ -496,9 +526,20 @@ bool Value::operator==(const Value& other) const { case booleanValue: return value_.bool_ == other.value_.bool_; case stringValue: - return (value_.string_ == other.value_.string_) || - (other.value_.string_ && value_.string_ && - strcmp(value_.string_, other.value_.string_) == 0); + { + if ((value_.string_ == 0) || (other.value_.string_ == 0)) { + return (value_.string_ == other.value_.string_); + } + unsigned this_len; + unsigned other_len; + char const* this_str; + char const* other_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + decodePrefixedString(other.allocated_, other.value_.string_, &other_len, &other_str); + if (this_len != other_len) return false; + int comp = memcmp(this_str, other_str, this_len); + return comp == 0; + } case arrayValue: case objectValue: return value_.map_->size() == other.value_.map_->size() && @@ -514,7 +555,20 @@ bool Value::operator!=(const Value& other) const { return !(*this == other); } const char* Value::asCString() const { JSON_ASSERT_MESSAGE(type_ == stringValue, "in Json::Value::asCString(): requires stringValue"); - return value_.string_; + if (value_.string_ == 0) return 0; + unsigned this_len; + char const* this_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + return this_str; +} + +bool Value::getString(char const** str, char const** end) const { + if (type_ != stringValue) return false; + if (value_.string_ == 0) return false; + unsigned length; + decodePrefixedString(this->allocated_, this->value_.string_, &length, str); + *end = *str + length; + return true; } std::string Value::asString() const { @@ -522,7 +576,13 @@ std::string Value::asString() const { case nullValue: return ""; case stringValue: - return value_.string_ ? value_.string_ : ""; + { + if (value_.string_ == 0) return ""; + unsigned this_len; + char const* this_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + return std::string(this_str, this_len); + } case booleanValue: return value_.bool_ ? "true" : "false"; case intValue: @@ -538,7 +598,11 @@ std::string Value::asString() const { #ifdef JSON_USE_CPPTL CppTL::ConstString Value::asConstString() const { - return CppTL::ConstString(asString().c_str()); + unsigned len; + char const* str; + decodePrefixedString(allocated_, value_.string_, + &len, &str); + return CppTL::ConstString(str, len); } #endif @@ -852,10 +916,6 @@ const Value& Value::operator[](int index) const { return (*this)[ArrayIndex(index)]; } -Value& Value::operator[](const char* key) { - return resolveReference(key, false); -} - void Value::initBasic(ValueType type, bool allocated) { type_ = type; allocated_ = allocated; @@ -864,14 +924,37 @@ void Value::initBasic(ValueType type, bool allocated) { limit_ = 0; } -Value& Value::resolveReference(const char* key, bool isStatic) { +// Access an object value by name, create a null member if it does not exist. +// @pre Type of '*this' is object or null. +// @param key is null-terminated. +Value& Value::resolveReference(const char* key) { JSON_ASSERT_MESSAGE( type_ == nullValue || type_ == objectValue, "in Json::Value::resolveReference(): requires objectValue"); if (type_ == nullValue) *this = Value(objectValue); CZString actualKey( - key, strlen(key), isStatic ? CZString::noDuplication : CZString::duplicateOnCopy); + key, strlen(key), CZString::noDuplication); // NOTE! + ObjectValues::iterator it = value_.map_->lower_bound(actualKey); + if (it != value_.map_->end() && (*it).first == actualKey) + return (*it).second; + + ObjectValues::value_type defaultValue(actualKey, null); + it = value_.map_->insert(it, defaultValue); + Value& value = (*it).second; + return value; +} + +// @param key is not null-terminated. +Value& Value::resolveReference(char const* key, char const* end) +{ + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == objectValue, + "in Json::Value::resolveReference(key, end): requires objectValue"); + if (type_ == nullValue) + *this = Value(objectValue); + CZString actualKey( + key, (end-key), CZString::duplicateOnCopy); ObjectValues::iterator it = value_.map_->lower_bound(actualKey); if (it != value_.map_->end() && (*it).first == actualKey) return (*it).second; @@ -906,24 +989,28 @@ const Value& Value::operator[](const char* key) const if (!found) return null; return *found; } -Value& Value::operator[](const std::string& key) -{ - return (*this)[key.c_str()]; -} Value const& Value::operator[](std::string const& key) const { Value const* found = find(key.data(), key.data() + key.length()); if (!found) return null; return *found; } -Value& Value::operator[](const StaticString& key) -{ - return resolveReference(key, true); + +Value& Value::operator[](const char* key) { + return resolveReference(key, key + strlen(key)); +} + +Value& Value::operator[](const std::string& key) { + return resolveReference(key.data(), key.data() + key.length()); +} + +Value& Value::operator[](const StaticString& key) { + return resolveReference(key.c_str()); } #ifdef JSON_USE_CPPTL Value& Value::operator[](const CppTL::ConstString& key) { - return (*this)[key.c_str()]; + return resolveReference(key.c_str(), key.end_c_str()); } Value const& Value::operator[](CppTL::ConstString const& key) const { @@ -1022,18 +1109,18 @@ bool Value::isMember(char const* key, char const* end) const Value const* value = find(key, end); return NULL != value; } -bool Value::isMember(const char* key) const +bool Value::isMember(char const* key) const { return isMember(key, key + strlen(key)); } -bool Value::isMember(const std::string& key) const +bool Value::isMember(std::string const& key) const { return isMember(key.data(), key.data() + key.length()); } #ifdef JSON_USE_CPPTL bool Value::isMember(const CppTL::ConstString& key) const { - return isMember(key.c_str()); + return isMember(key.c_str(), key.end_c_str()); } #endif @@ -1047,8 +1134,10 @@ Value::Members Value::getMemberNames() const { members.reserve(value_.map_->size()); ObjectValues::const_iterator it = value_.map_->begin(); ObjectValues::const_iterator itEnd = value_.map_->end(); - for (; it != itEnd; ++it) - members.push_back(std::string((*it).first.c_str())); + for (; it != itEnd; ++it) { + members.push_back(std::string((*it).first.data(), + (*it).first.length())); + } return members; } // diff --git a/src/lib_json/json_valueiterator.inl b/src/lib_json/json_valueiterator.inl index 3cb9e06..1e9a247 100644 --- a/src/lib_json/json_valueiterator.inl +++ b/src/lib_json/json_valueiterator.inl @@ -77,26 +77,36 @@ void ValueIteratorBase::copy(const SelfType& other) { Value ValueIteratorBase::key() const { const Value::CZString czstring = (*current_).first; - if (czstring.c_str()) { + if (czstring.data()) { if (czstring.isStaticString()) - return Value(StaticString(czstring.c_str())); - return Value(czstring.c_str()); + return Value(StaticString(czstring.data())); + return Value(czstring.data(), czstring.data() + czstring.length()); } return Value(czstring.index()); } UInt ValueIteratorBase::index() const { const Value::CZString czstring = (*current_).first; - if (!czstring.c_str()) + if (!czstring.data()) return czstring.index(); return Value::UInt(-1); } -const char* ValueIteratorBase::memberName() const { - const char* name = (*current_).first.c_str(); +char const* ValueIteratorBase::memberName() const { + const char* name = (*current_).first.data(); return name ? name : ""; } +char const* ValueIteratorBase::memberName(char const** end) const { + const char* name = (*current_).first.data(); + if (!name) { + *end = NULL; + return NULL; + } + *end = name + (*current_).first.length(); + return name; +} + // ////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////// diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp index 29f6619..f7c630d 100644 --- a/src/lib_json/json_writer.cpp +++ b/src/lib_json/json_writer.cpp @@ -50,6 +50,16 @@ static bool containsControlCharacter(const char* str) { return false; } +static bool containsControlCharacter0(const char* str, unsigned len) { + char const* end = str + len; + while (end != str) { + if (isControlCharacter(*str) || 0==*str) + return true; + ++str; + } + return false; +} + std::string valueToString(LargestInt value) { UIntToStringBuffer buffer; char* current = buffer + sizeof(buffer); @@ -184,6 +194,84 @@ std::string valueToQuotedString(const char* value) { return result; } +// https://github.com/upcaste/upcaste/blob/master/src/upcore/src/cstring/strnpbrk.cpp +static char const* strnpbrk(char const* s, char const* accept, size_t n) { + assert((s || !n) && accept); + + char const* const end = s + n; + for (char const* cur = s; cur < end; ++cur) { + int const c = *cur; + for (char const* a = accept; *a; ++a) { + if (*a == c) { + return cur; + } + } + } + return NULL; +} +static std::string valueToQuotedStringN(const char* value, unsigned length) { + if (value == NULL) + return ""; + // Not sure how to handle unicode... + if (strnpbrk(value, "\"\\\b\f\n\r\t", length) == NULL && + !containsControlCharacter0(value, length)) + return std::string("\"") + value + "\""; + // We have to walk value and escape any special characters. + // Appending to std::string is not efficient, but this should be rare. + // (Note: forward slashes are *not* rare, but I am not escaping them.) + std::string::size_type maxsize = + length * 2 + 3; // allescaped+quotes+NULL + std::string result; + result.reserve(maxsize); // to avoid lots of mallocs + result += "\""; + char const* end = value + length; + for (const char* c = value; c != end; ++c) { + switch (*c) { + case '\"': + result += "\\\""; + break; + case '\\': + result += "\\\\"; + break; + case '\b': + result += "\\b"; + break; + case '\f': + result += "\\f"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + // case '/': + // Even though \/ is considered a legal escape in JSON, a bare + // slash is also legal, so I see no reason to escape it. + // (I hope I am not misunderstanding something.) + // blep notes: actually escaping \/ may be useful in javascript to avoid (*c); + result += oss.str(); + } else { + result += *c; + } + break; + } + } + result += "\""; + return result; +} + // Class Writer // ////////////////////////////////////////////////////////////////// Writer::~Writer() {} @@ -248,7 +336,7 @@ void FastWriter::writeValue(const Value& value) { const std::string& name = *it; if (it != members.begin()) document_ += ','; - document_ += valueToQuotedString(name.c_str()); + document_ += valueToQuotedStringN(name.data(), name.length()); document_ += yamlCompatiblityEnabled_ ? ": " : ":"; writeValue(value[name]); } @@ -289,8 +377,15 @@ void StyledWriter::writeValue(const Value& value) { pushValue(valueToString(value.asDouble())); break; case stringValue: - pushValue(valueToQuotedString(value.asCString())); + { + // Is NULL is possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) pushValue(valueToQuotedStringN(str, static_cast(end-str))); + else pushValue(""); break; + } case booleanValue: pushValue(valueToString(value.asBool())); break; @@ -767,8 +862,15 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) { pushValue(valueToString(value.asDouble())); break; case stringValue: - pushValue(valueToQuotedString(value.asCString())); + { + // Is NULL is possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) pushValue(valueToQuotedStringN(str, static_cast(end-str))); + else pushValue(""); break; + } case booleanValue: pushValue(valueToString(value.asBool())); break; @@ -787,7 +889,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) { std::string const& name = *it; Value const& childValue = value[name]; writeCommentBeforeValue(childValue); - writeWithIndent(valueToQuotedString(name.c_str())); + writeWithIndent(valueToQuotedStringN(name.data(), name.length())); *sout_ << colonSymbol_; writeValue(childValue); if (++it == members.end()) {