mirror of
				https://github.com/Tencent/rapidjson.git
				synced 2025-10-20 14:02:44 +02:00 
			
		
		
		
	Optimization for Regex and Schema
This commit is contained in:
		| @@ -71,13 +71,17 @@ class GenericRegex { | ||||
| public: | ||||
|     typedef typename Encoding::Ch Ch; | ||||
|  | ||||
|     GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() { | ||||
|     GenericRegex(const Ch* source, Allocator* allocator = 0) :  | ||||
|         states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),  | ||||
|         stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_() | ||||
|     { | ||||
|         GenericStringStream<Encoding> ss(source); | ||||
|         DecodedStream<GenericStringStream<Encoding> > ds(ss); | ||||
|         Parse(ds); | ||||
|     } | ||||
|  | ||||
|     ~GenericRegex() { | ||||
|         Allocator::Free(stateSet_); | ||||
|     } | ||||
|  | ||||
|     bool IsValid() const { | ||||
| @@ -308,6 +312,14 @@ private: | ||||
|             printf("\n"); | ||||
| #endif | ||||
|         } | ||||
|  | ||||
|         // Preallocate buffer for SearchWithAnchoring() | ||||
|         RAPIDJSON_ASSERT(stateSet_ == 0); | ||||
|         if (stateCount_ > 0) { | ||||
|             stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize())); | ||||
|             state0_.Reserve<SizeType>(stateCount_); | ||||
|             state1_.Reserve<SizeType>(stateCount_); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { | ||||
| @@ -568,21 +580,15 @@ private: | ||||
|         RAPIDJSON_ASSERT(IsValid()); | ||||
|         DecodedStream<InputStream> ds(is); | ||||
|  | ||||
|         Allocator allocator; | ||||
|         Stack<Allocator> state0(&allocator, stateCount_ * sizeof(SizeType)); | ||||
|         Stack<Allocator> state1(&allocator, stateCount_ * sizeof(SizeType)); | ||||
|         Stack<Allocator> *current = &state0, *next = &state1; | ||||
|  | ||||
|         const size_t stateSetSize = (stateCount_ + 31) / 32 * 4; | ||||
|         unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize)); | ||||
|         std::memset(stateSet, 0, stateSetSize); | ||||
|  | ||||
|         bool matched = false; | ||||
|         matched = AddState(stateSet, *current, root_); | ||||
|         state0_.Clear(); | ||||
|         Stack<Allocator> *current = &state0_, *next = &state1_; | ||||
|         const size_t stateSetSize = GetStateSetSize(); | ||||
|         std::memset(stateSet_, 0, stateSetSize); | ||||
|  | ||||
|         bool matched = AddState(*current, root_); | ||||
|         unsigned codepoint; | ||||
|         while (!current->Empty() && (codepoint = ds.Take()) != 0) { | ||||
|             std::memset(stateSet, 0, stateSetSize); | ||||
|             std::memset(stateSet_, 0, stateSetSize); | ||||
|             next->Clear(); | ||||
|             matched = false; | ||||
|             for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) { | ||||
| @@ -591,39 +597,38 @@ private: | ||||
|                     sr.codepoint == kAnyCharacterClass ||  | ||||
|                     (sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) | ||||
|                 { | ||||
|                     matched = AddState(stateSet, *next, sr.out) || matched; | ||||
|                     matched = AddState(*next, sr.out) || matched; | ||||
|                     if (!anchorEnd && matched) | ||||
|                         goto exit; | ||||
|                         return true; | ||||
|                 } | ||||
|                 if (!anchorBegin) | ||||
|                     AddState(stateSet, *next, root_); | ||||
|                     AddState(*next, root_); | ||||
|             } | ||||
|             Stack<Allocator>* temp = current; | ||||
|             current = next; | ||||
|             next = temp; | ||||
|             internal::Swap(current, next); | ||||
|         } | ||||
|  | ||||
|     exit: | ||||
|         Allocator::Free(stateSet); | ||||
|         return matched; | ||||
|     } | ||||
|  | ||||
|     size_t GetStateSetSize() const { | ||||
|         return (stateCount_ + 31) / 32 * 4; | ||||
|     } | ||||
|  | ||||
|     // Return whether the added states is a match state | ||||
|     bool AddState(unsigned* stateSet, Stack<Allocator>& l, SizeType index) const { | ||||
|     bool AddState(Stack<Allocator>& l, SizeType index) const { | ||||
|         if (index == kRegexInvalidState) | ||||
|             return true; | ||||
|  | ||||
|         const State& s = GetState(index); | ||||
|         if (s.out1 != kRegexInvalidState) { // Split | ||||
|             bool matched = AddState(stateSet, l, s.out); | ||||
|             matched = AddState(stateSet, l, s.out1) || matched; | ||||
|             return matched; | ||||
|             bool matched = AddState(l, s.out); | ||||
|             return AddState(l, s.out1) || matched; | ||||
|         } | ||||
|         else if (!(stateSet[index >> 5] & (1 << (index & 31)))) { | ||||
|             stateSet[index >> 5] |= (1 << (index & 31)); | ||||
|             *l.template Push<SizeType>() = index; | ||||
|         else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) { | ||||
|             stateSet_[index >> 5] |= (1 << (index & 31)); | ||||
|             *l.template PushUnsafe<SizeType>() = index; | ||||
|         } | ||||
|         return GetState(index).out == kRegexInvalidState; | ||||
|         return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. | ||||
|     } | ||||
|  | ||||
|     bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { | ||||
| @@ -642,6 +647,11 @@ private: | ||||
|     SizeType root_; | ||||
|     SizeType stateCount_; | ||||
|     SizeType rangeCount_; | ||||
|  | ||||
|     // For SearchWithAnchoring() | ||||
|     uint32_t* stateSet_;        // allocated by states_.GetAllocator() | ||||
|     mutable Stack<Allocator> state0_; | ||||
|     mutable Stack<Allocator> state1_; | ||||
|     bool anchorBegin_; | ||||
|     bool anchorEnd_; | ||||
| }; | ||||
|   | ||||
| @@ -38,7 +38,6 @@ public: | ||||
|     // Optimization note: Do not allocate memory for stack_ in constructor. | ||||
|     // Do it lazily when first Push() -> Expand() -> Resize(). | ||||
|     Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { | ||||
|         RAPIDJSON_ASSERT(stackCapacity > 0); | ||||
|     } | ||||
|  | ||||
| #if RAPIDJSON_HAS_CXX11_RVALUE_REFS | ||||
|   | ||||
| @@ -300,15 +300,17 @@ struct SchemaValidationContext { | ||||
|                 factory.DestroySchemaValidator(patternPropertiesValidators[i]); | ||||
|             factory.FreeState(patternPropertiesValidators); | ||||
|         } | ||||
|         factory.FreeState(patternPropertiesSchemas); | ||||
|         factory.FreeState(objectDependencies); | ||||
|         if (patternPropertiesSchemas) | ||||
|             factory.FreeState(patternPropertiesSchemas); | ||||
|         if (objectDependencies) | ||||
|             factory.FreeState(objectDependencies); | ||||
|     } | ||||
|  | ||||
|     SchemaValidatorFactoryType& factory; | ||||
|     const SchemaType* schema; | ||||
|     const SchemaType* valueSchema; | ||||
|     const Ch* invalidKeyword; | ||||
|     void* hasher; // Only calidator access | ||||
|     void* hasher; // Only validator access | ||||
|     void* arrayElementHashCodes; // Only validator access this | ||||
|     ISchemaValidator** validators; | ||||
|     SizeType validatorCount; | ||||
| @@ -613,7 +615,7 @@ public: | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     bool EndValue(Context& context) const { | ||||
|     RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const { | ||||
|         if (context.patternPropertiesValidatorCount > 0) { | ||||
|             bool otherValid = false; | ||||
|             SizeType count = context.patternPropertiesValidatorCount; | ||||
| @@ -1080,8 +1082,12 @@ private: | ||||
|     // O(n) | ||||
|     template <typename ValueType> | ||||
|     bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const { | ||||
|         SizeType len = name.GetStringLength(); | ||||
|         const Ch* str = name.GetString(); | ||||
|         for (SizeType index = 0; index < propertyCount_; index++) | ||||
|             if (properties_[index].name == name) { | ||||
|             if (properties_[index].name.GetStringLength() == len &&  | ||||
|                 (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0)) | ||||
|             { | ||||
|                 *outIndex = index; | ||||
|                 return true; | ||||
|             } | ||||
| @@ -1703,7 +1709,7 @@ private: | ||||
|             PushSchema(root_); | ||||
|         else { | ||||
|             if (CurrentContext().inArray) | ||||
|                 AppendToken(CurrentContext().arrayElementIndex); | ||||
|                 AppendToken<Ch>(CurrentContext().arrayElementIndex); | ||||
|  | ||||
|             if (!CurrentSchema().BeginValue(CurrentContext())) | ||||
|                 return false; | ||||
| @@ -1767,21 +1773,23 @@ private: | ||||
|     } | ||||
|  | ||||
|     void AppendToken(const Ch* str, SizeType len) { | ||||
|         *documentStack_.template Push<Ch>() = '/'; | ||||
|         documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters | ||||
|         *documentStack_.template PushUnsafe<Ch>() = '/'; | ||||
|         for (SizeType i = 0; i < len; i++) { | ||||
|             if (str[i] == '~') { | ||||
|                 *documentStack_.template Push<Ch>() = '~'; | ||||
|                 *documentStack_.template Push<Ch>() = '0'; | ||||
|                 *documentStack_.template PushUnsafe<Ch>() = '~'; | ||||
|                 *documentStack_.template PushUnsafe<Ch>() = '0'; | ||||
|             } | ||||
|             else if (str[i] == '/') { | ||||
|                 *documentStack_.template Push<Ch>() = '~'; | ||||
|                 *documentStack_.template Push<Ch>() = '1'; | ||||
|                 *documentStack_.template PushUnsafe<Ch>() = '~'; | ||||
|                 *documentStack_.template PushUnsafe<Ch>() = '1'; | ||||
|             } | ||||
|             else | ||||
|                 *documentStack_.template Push<Ch>() = str[i]; | ||||
|                 *documentStack_.template PushUnsafe<Ch>() = str[i]; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     template<typename Ch> | ||||
|     void AppendToken(SizeType index) { | ||||
|         *documentStack_.template Push<Ch>() = '/'; | ||||
|         char buffer[21]; | ||||
| @@ -1790,9 +1798,27 @@ private: | ||||
|             *documentStack_.template Push<Ch>() = buffer[i]; | ||||
|     } | ||||
|  | ||||
|     void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); } | ||||
|     // Specialized version for char to prevent buffer copying. | ||||
|     template <> | ||||
|     void AppendToken<char>(SizeType index) { | ||||
|         if (sizeof(SizeType) == 4) { | ||||
|             char *buffer = documentStack_.template Push<Ch>(1 + 10); // '/' + uint | ||||
|             *buffer++ = '/'; | ||||
|             const char* end = internal::u32toa(index, buffer); | ||||
|              documentStack_.template Pop<Ch>(static_cast<size_t>(10 - (end - buffer))); | ||||
|         } | ||||
|         else { | ||||
|             char *buffer = documentStack_.template Push<Ch>(1 + 20); // '/' + uint64 | ||||
|             *buffer++ = '/'; | ||||
|             const char* end = internal::u64toa(index, buffer); | ||||
|             documentStack_.template Pop<Ch>(static_cast<size_t>(20 - (end - buffer))); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); } | ||||
|      | ||||
|     void PopSchema() { | ||||
|     RAPIDJSON_FORCEINLINE void PopSchema() { | ||||
|         Context* c = schemaStack_.template Pop<Context>(1); | ||||
|         if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) { | ||||
|             a->~HashCodeArray(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Milo Yip
					Milo Yip