mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-09 19:24:23 +01:00
Optimization for Regex and Schema
This commit is contained in:
parent
a006648398
commit
a33af83ee4
@ -71,13 +71,17 @@ class GenericRegex {
|
||||
public:
|
||||
typedef typename Encoding::Ch Ch;
|
||||
|
||||
GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() {
|
||||
GenericRegex(const Ch* source, Allocator* allocator = 0) :
|
||||
states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
|
||||
stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_()
|
||||
{
|
||||
GenericStringStream<Encoding> ss(source);
|
||||
DecodedStream<GenericStringStream<Encoding> > ds(ss);
|
||||
Parse(ds);
|
||||
}
|
||||
|
||||
~GenericRegex() {
|
||||
Allocator::Free(stateSet_);
|
||||
}
|
||||
|
||||
bool IsValid() const {
|
||||
@ -308,6 +312,14 @@ private:
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Preallocate buffer for SearchWithAnchoring()
|
||||
RAPIDJSON_ASSERT(stateSet_ == 0);
|
||||
if (stateCount_ > 0) {
|
||||
stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize()));
|
||||
state0_.Reserve<SizeType>(stateCount_);
|
||||
state1_.Reserve<SizeType>(stateCount_);
|
||||
}
|
||||
}
|
||||
|
||||
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
|
||||
@ -568,21 +580,15 @@ private:
|
||||
RAPIDJSON_ASSERT(IsValid());
|
||||
DecodedStream<InputStream> ds(is);
|
||||
|
||||
Allocator allocator;
|
||||
Stack<Allocator> state0(&allocator, stateCount_ * sizeof(SizeType));
|
||||
Stack<Allocator> state1(&allocator, stateCount_ * sizeof(SizeType));
|
||||
Stack<Allocator> *current = &state0, *next = &state1;
|
||||
|
||||
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
|
||||
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
|
||||
bool matched = false;
|
||||
matched = AddState(stateSet, *current, root_);
|
||||
state0_.Clear();
|
||||
Stack<Allocator> *current = &state0_, *next = &state1_;
|
||||
const size_t stateSetSize = GetStateSetSize();
|
||||
std::memset(stateSet_, 0, stateSetSize);
|
||||
|
||||
bool matched = AddState(*current, root_);
|
||||
unsigned codepoint;
|
||||
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
std::memset(stateSet_, 0, stateSetSize);
|
||||
next->Clear();
|
||||
matched = false;
|
||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||
@ -591,39 +597,38 @@ private:
|
||||
sr.codepoint == kAnyCharacterClass ||
|
||||
(sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
|
||||
{
|
||||
matched = AddState(stateSet, *next, sr.out) || matched;
|
||||
matched = AddState(*next, sr.out) || matched;
|
||||
if (!anchorEnd && matched)
|
||||
goto exit;
|
||||
return true;
|
||||
}
|
||||
if (!anchorBegin)
|
||||
AddState(stateSet, *next, root_);
|
||||
AddState(*next, root_);
|
||||
}
|
||||
Stack<Allocator>* temp = current;
|
||||
current = next;
|
||||
next = temp;
|
||||
internal::Swap(current, next);
|
||||
}
|
||||
|
||||
exit:
|
||||
Allocator::Free(stateSet);
|
||||
return matched;
|
||||
}
|
||||
|
||||
size_t GetStateSetSize() const {
|
||||
return (stateCount_ + 31) / 32 * 4;
|
||||
}
|
||||
|
||||
// Return whether the added states is a match state
|
||||
bool AddState(unsigned* stateSet, Stack<Allocator>& l, SizeType index) const {
|
||||
bool AddState(Stack<Allocator>& l, SizeType index) const {
|
||||
if (index == kRegexInvalidState)
|
||||
return true;
|
||||
|
||||
const State& s = GetState(index);
|
||||
if (s.out1 != kRegexInvalidState) { // Split
|
||||
bool matched = AddState(stateSet, l, s.out);
|
||||
matched = AddState(stateSet, l, s.out1) || matched;
|
||||
return matched;
|
||||
bool matched = AddState(l, s.out);
|
||||
return AddState(l, s.out1) || matched;
|
||||
}
|
||||
else if (!(stateSet[index >> 5] & (1 << (index & 31)))) {
|
||||
stateSet[index >> 5] |= (1 << (index & 31));
|
||||
*l.template Push<SizeType>() = index;
|
||||
else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
|
||||
stateSet_[index >> 5] |= (1 << (index & 31));
|
||||
*l.template PushUnsafe<SizeType>() = index;
|
||||
}
|
||||
return GetState(index).out == kRegexInvalidState;
|
||||
return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
|
||||
}
|
||||
|
||||
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
|
||||
@ -642,6 +647,11 @@ private:
|
||||
SizeType root_;
|
||||
SizeType stateCount_;
|
||||
SizeType rangeCount_;
|
||||
|
||||
// For SearchWithAnchoring()
|
||||
uint32_t* stateSet_; // allocated by states_.GetAllocator()
|
||||
mutable Stack<Allocator> state0_;
|
||||
mutable Stack<Allocator> state1_;
|
||||
bool anchorBegin_;
|
||||
bool anchorEnd_;
|
||||
};
|
||||
|
@ -38,7 +38,6 @@ public:
|
||||
// Optimization note: Do not allocate memory for stack_ in constructor.
|
||||
// Do it lazily when first Push() -> Expand() -> Resize().
|
||||
Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
|
||||
RAPIDJSON_ASSERT(stackCapacity > 0);
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
|
||||
|
@ -300,15 +300,17 @@ struct SchemaValidationContext {
|
||||
factory.DestroySchemaValidator(patternPropertiesValidators[i]);
|
||||
factory.FreeState(patternPropertiesValidators);
|
||||
}
|
||||
factory.FreeState(patternPropertiesSchemas);
|
||||
factory.FreeState(objectDependencies);
|
||||
if (patternPropertiesSchemas)
|
||||
factory.FreeState(patternPropertiesSchemas);
|
||||
if (objectDependencies)
|
||||
factory.FreeState(objectDependencies);
|
||||
}
|
||||
|
||||
SchemaValidatorFactoryType& factory;
|
||||
const SchemaType* schema;
|
||||
const SchemaType* valueSchema;
|
||||
const Ch* invalidKeyword;
|
||||
void* hasher; // Only calidator access
|
||||
void* hasher; // Only validator access
|
||||
void* arrayElementHashCodes; // Only validator access this
|
||||
ISchemaValidator** validators;
|
||||
SizeType validatorCount;
|
||||
@ -613,7 +615,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndValue(Context& context) const {
|
||||
RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
|
||||
if (context.patternPropertiesValidatorCount > 0) {
|
||||
bool otherValid = false;
|
||||
SizeType count = context.patternPropertiesValidatorCount;
|
||||
@ -1080,8 +1082,12 @@ private:
|
||||
// O(n)
|
||||
template <typename ValueType>
|
||||
bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
|
||||
SizeType len = name.GetStringLength();
|
||||
const Ch* str = name.GetString();
|
||||
for (SizeType index = 0; index < propertyCount_; index++)
|
||||
if (properties_[index].name == name) {
|
||||
if (properties_[index].name.GetStringLength() == len &&
|
||||
(std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
|
||||
{
|
||||
*outIndex = index;
|
||||
return true;
|
||||
}
|
||||
@ -1703,7 +1709,7 @@ private:
|
||||
PushSchema(root_);
|
||||
else {
|
||||
if (CurrentContext().inArray)
|
||||
AppendToken(CurrentContext().arrayElementIndex);
|
||||
AppendToken<Ch>(CurrentContext().arrayElementIndex);
|
||||
|
||||
if (!CurrentSchema().BeginValue(CurrentContext()))
|
||||
return false;
|
||||
@ -1767,21 +1773,23 @@ private:
|
||||
}
|
||||
|
||||
void AppendToken(const Ch* str, SizeType len) {
|
||||
*documentStack_.template Push<Ch>() = '/';
|
||||
documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
|
||||
*documentStack_.template PushUnsafe<Ch>() = '/';
|
||||
for (SizeType i = 0; i < len; i++) {
|
||||
if (str[i] == '~') {
|
||||
*documentStack_.template Push<Ch>() = '~';
|
||||
*documentStack_.template Push<Ch>() = '0';
|
||||
*documentStack_.template PushUnsafe<Ch>() = '~';
|
||||
*documentStack_.template PushUnsafe<Ch>() = '0';
|
||||
}
|
||||
else if (str[i] == '/') {
|
||||
*documentStack_.template Push<Ch>() = '~';
|
||||
*documentStack_.template Push<Ch>() = '1';
|
||||
*documentStack_.template PushUnsafe<Ch>() = '~';
|
||||
*documentStack_.template PushUnsafe<Ch>() = '1';
|
||||
}
|
||||
else
|
||||
*documentStack_.template Push<Ch>() = str[i];
|
||||
*documentStack_.template PushUnsafe<Ch>() = str[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Ch>
|
||||
void AppendToken(SizeType index) {
|
||||
*documentStack_.template Push<Ch>() = '/';
|
||||
char buffer[21];
|
||||
@ -1790,9 +1798,27 @@ private:
|
||||
*documentStack_.template Push<Ch>() = buffer[i];
|
||||
}
|
||||
|
||||
void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); }
|
||||
// Specialized version for char to prevent buffer copying.
|
||||
template <>
|
||||
void AppendToken<char>(SizeType index) {
|
||||
if (sizeof(SizeType) == 4) {
|
||||
char *buffer = documentStack_.template Push<Ch>(1 + 10); // '/' + uint
|
||||
*buffer++ = '/';
|
||||
const char* end = internal::u32toa(index, buffer);
|
||||
documentStack_.template Pop<Ch>(static_cast<size_t>(10 - (end - buffer)));
|
||||
}
|
||||
else {
|
||||
char *buffer = documentStack_.template Push<Ch>(1 + 20); // '/' + uint64
|
||||
*buffer++ = '/';
|
||||
const char* end = internal::u64toa(index, buffer);
|
||||
documentStack_.template Pop<Ch>(static_cast<size_t>(20 - (end - buffer)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); }
|
||||
|
||||
void PopSchema() {
|
||||
RAPIDJSON_FORCEINLINE void PopSchema() {
|
||||
Context* c = schemaStack_.template Pop<Context>(1);
|
||||
if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
|
||||
a->~HashCodeArray();
|
||||
|
Loading…
x
Reference in New Issue
Block a user