diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h index 2e5ca583..8ef5766b 100644 --- a/include/rapidjson/internal/regex.h +++ b/include/rapidjson/internal/regex.h @@ -71,13 +71,17 @@ class GenericRegex { public: typedef typename Encoding::Ch Ch; - GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() { + GenericRegex(const Ch* source, Allocator* allocator = 0) : + states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), + stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_() + { GenericStringStream ss(source); DecodedStream > ds(ss); Parse(ds); } ~GenericRegex() { + Allocator::Free(stateSet_); } bool IsValid() const { @@ -308,6 +312,14 @@ private: printf("\n"); #endif } + + // Preallocate buffer for SearchWithAnchoring() + RAPIDJSON_ASSERT(stateSet_ == 0); + if (stateCount_ > 0) { + stateSet_ = static_cast(states_.GetAllocator().Malloc(GetStateSetSize())); + state0_.Reserve(stateCount_); + state1_.Reserve(stateCount_); + } } SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { @@ -568,21 +580,15 @@ private: RAPIDJSON_ASSERT(IsValid()); DecodedStream ds(is); - Allocator allocator; - Stack state0(&allocator, stateCount_ * sizeof(SizeType)); - Stack state1(&allocator, stateCount_ * sizeof(SizeType)); - Stack *current = &state0, *next = &state1; - - const size_t stateSetSize = (stateCount_ + 31) / 32 * 4; - unsigned* stateSet = static_cast(allocator.Malloc(stateSetSize)); - std::memset(stateSet, 0, stateSetSize); - - bool matched = false; - matched = AddState(stateSet, *current, root_); + state0_.Clear(); + Stack *current = &state0_, *next = &state1_; + const size_t stateSetSize = GetStateSetSize(); + std::memset(stateSet_, 0, stateSetSize); + bool matched = AddState(*current, root_); unsigned codepoint; while (!current->Empty() && (codepoint = ds.Take()) != 0) { - std::memset(stateSet, 0, stateSetSize); + std::memset(stateSet_, 0, stateSetSize); next->Clear(); matched = false; for (const SizeType* s = current->template Bottom(); s != current->template End(); ++s) { @@ -591,39 +597,38 @@ private: sr.codepoint == kAnyCharacterClass || (sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) { - matched = AddState(stateSet, *next, sr.out) || matched; + matched = AddState(*next, sr.out) || matched; if (!anchorEnd && matched) - goto exit; + return true; } if (!anchorBegin) - AddState(stateSet, *next, root_); + AddState(*next, root_); } - Stack* temp = current; - current = next; - next = temp; + internal::Swap(current, next); } - exit: - Allocator::Free(stateSet); return matched; } + size_t GetStateSetSize() const { + return (stateCount_ + 31) / 32 * 4; + } + // Return whether the added states is a match state - bool AddState(unsigned* stateSet, Stack& l, SizeType index) const { + bool AddState(Stack& l, SizeType index) const { if (index == kRegexInvalidState) return true; const State& s = GetState(index); if (s.out1 != kRegexInvalidState) { // Split - bool matched = AddState(stateSet, l, s.out); - matched = AddState(stateSet, l, s.out1) || matched; - return matched; + bool matched = AddState(l, s.out); + return AddState(l, s.out1) || matched; } - else if (!(stateSet[index >> 5] & (1 << (index & 31)))) { - stateSet[index >> 5] |= (1 << (index & 31)); - *l.template Push() = index; + else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) { + stateSet_[index >> 5] |= (1 << (index & 31)); + *l.template PushUnsafe() = index; } - return GetState(index).out == kRegexInvalidState; + return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. } bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { @@ -642,6 +647,11 @@ private: SizeType root_; SizeType stateCount_; SizeType rangeCount_; + + // For SearchWithAnchoring() + uint32_t* stateSet_; // allocated by states_.GetAllocator() + mutable Stack state0_; + mutable Stack state1_; bool anchorBegin_; bool anchorEnd_; }; diff --git a/include/rapidjson/internal/stack.h b/include/rapidjson/internal/stack.h index dc2efea5..6615c461 100644 --- a/include/rapidjson/internal/stack.h +++ b/include/rapidjson/internal/stack.h @@ -38,7 +38,6 @@ public: // Optimization note: Do not allocate memory for stack_ in constructor. // Do it lazily when first Push() -> Expand() -> Resize(). Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { - RAPIDJSON_ASSERT(stackCapacity > 0); } #if RAPIDJSON_HAS_CXX11_RVALUE_REFS diff --git a/include/rapidjson/schema.h b/include/rapidjson/schema.h index b968c052..7ff55d12 100644 --- a/include/rapidjson/schema.h +++ b/include/rapidjson/schema.h @@ -300,15 +300,17 @@ struct SchemaValidationContext { factory.DestroySchemaValidator(patternPropertiesValidators[i]); factory.FreeState(patternPropertiesValidators); } - factory.FreeState(patternPropertiesSchemas); - factory.FreeState(objectDependencies); + if (patternPropertiesSchemas) + factory.FreeState(patternPropertiesSchemas); + if (objectDependencies) + factory.FreeState(objectDependencies); } SchemaValidatorFactoryType& factory; const SchemaType* schema; const SchemaType* valueSchema; const Ch* invalidKeyword; - void* hasher; // Only calidator access + void* hasher; // Only validator access void* arrayElementHashCodes; // Only validator access this ISchemaValidator** validators; SizeType validatorCount; @@ -613,7 +615,7 @@ public: return true; } - bool EndValue(Context& context) const { + RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const { if (context.patternPropertiesValidatorCount > 0) { bool otherValid = false; SizeType count = context.patternPropertiesValidatorCount; @@ -1080,8 +1082,12 @@ private: // O(n) template bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const { + SizeType len = name.GetStringLength(); + const Ch* str = name.GetString(); for (SizeType index = 0; index < propertyCount_; index++) - if (properties_[index].name == name) { + if (properties_[index].name.GetStringLength() == len && + (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0)) + { *outIndex = index; return true; } @@ -1703,7 +1709,7 @@ private: PushSchema(root_); else { if (CurrentContext().inArray) - AppendToken(CurrentContext().arrayElementIndex); + AppendToken(CurrentContext().arrayElementIndex); if (!CurrentSchema().BeginValue(CurrentContext())) return false; @@ -1767,21 +1773,23 @@ private: } void AppendToken(const Ch* str, SizeType len) { - *documentStack_.template Push() = '/'; + documentStack_.template Reserve(1 + len * 2); // worst case all characters are escaped as two characters + *documentStack_.template PushUnsafe() = '/'; for (SizeType i = 0; i < len; i++) { if (str[i] == '~') { - *documentStack_.template Push() = '~'; - *documentStack_.template Push() = '0'; + *documentStack_.template PushUnsafe() = '~'; + *documentStack_.template PushUnsafe() = '0'; } else if (str[i] == '/') { - *documentStack_.template Push() = '~'; - *documentStack_.template Push() = '1'; + *documentStack_.template PushUnsafe() = '~'; + *documentStack_.template PushUnsafe() = '1'; } else - *documentStack_.template Push() = str[i]; + *documentStack_.template PushUnsafe() = str[i]; } } + template void AppendToken(SizeType index) { *documentStack_.template Push() = '/'; char buffer[21]; @@ -1790,9 +1798,27 @@ private: *documentStack_.template Push() = buffer[i]; } - void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push()) Context(*this, &schema); } + // Specialized version for char to prevent buffer copying. + template <> + void AppendToken(SizeType index) { + if (sizeof(SizeType) == 4) { + char *buffer = documentStack_.template Push(1 + 10); // '/' + uint + *buffer++ = '/'; + const char* end = internal::u32toa(index, buffer); + documentStack_.template Pop(static_cast(10 - (end - buffer))); + } + else { + char *buffer = documentStack_.template Push(1 + 20); // '/' + uint64 + *buffer++ = '/'; + const char* end = internal::u64toa(index, buffer); + documentStack_.template Pop(static_cast(20 - (end - buffer))); + } + } + + + RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push()) Context(*this, &schema); } - void PopSchema() { + RAPIDJSON_FORCEINLINE void PopSchema() { Context* c = schemaStack_.template Pop(1); if (HashCodeArray* a = static_cast(c->arrayElementHashCodes)) { a->~HashCodeArray();