Ability to customize regular expression engine

This commit is contained in:
sandwoodK 2024-04-30 11:35:45 +02:00 committed by sandwooK
parent c1dde4e270
commit 2431bdeaa6
5 changed files with 205 additions and 25 deletions

View File

@ -124,6 +124,7 @@ if(valijson_BUILD_TESTS)
tests/test_poly_constraint.cpp tests/test_poly_constraint.cpp
tests/test_validation_errors.cpp tests/test_validation_errors.cpp
tests/test_validator.cpp tests/test_validator.cpp
tests/test_validator_with_custom_regular_expression_engine.cpp
tests/test_yaml_cpp_adapter.cpp tests/test_yaml_cpp_adapter.cpp
) )

View File

@ -87,6 +87,34 @@ Validator validator(Validator::kWeakTypes);
This will create a validator that will attempt to cast values to satisfy a schema. The original motivation for this was to support the Boost Property Tree library, which can parse JSON, but stores values as strings. This will create a validator that will attempt to cast values to satisfy a schema. The original motivation for this was to support the Boost Property Tree library, which can parse JSON, but stores values as strings.
## Regular Expression Engine
When enforcing a 'pattern' property, a regular expression engine is in used. By default, the DefaultRegexEngine use std::regex.
std::regex has no protection against catastrophic backtracking and implementation with gcc is so suboptimal that it can easily leads to segmentation fault.
One can customise the regular expression engine by implementing it's own wrapper to it and using a ValidatorT with the custom type.
The regular expression engine wrapper must implement the following interface
```cpp
struct MyRegexpEngine
{
MyRegexpEngine(const std::string& pattern)
{
//implementation specific
}
static bool search(const std::string& s, const MyRegexpEngine& r)
{
//implementation specific
}
};
```
Then to use it
```cpp
using MyValidator = ValidatorT<MyRegexpEngine>;
```
## Memory Management ## Memory Management
Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics. Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics.

View File

@ -28,7 +28,7 @@ class ValidationResults;
* *
* @tparam AdapterType Adapter type for the target document. * @tparam AdapterType Adapter type for the target document.
*/ */
template<typename AdapterType> template<typename AdapterType, typename RegexEngine>
class ValidationVisitor: public constraints::ConstraintVisitor class ValidationVisitor: public constraints::ConstraintVisitor
{ {
public: public:
@ -44,14 +44,14 @@ public:
* recording error descriptions. If this pointer is set * recording error descriptions. If this pointer is set
* to nullptr, validation errors will caused validation to * to nullptr, validation errors will caused validation to
* stop immediately. * stop immediately.
* @param regexesCache Cache of already created std::regex objects for pattern * @param regexesCache Cache of already created RegexEngine objects for pattern
* constraints. * constraints.
*/ */
ValidationVisitor(const AdapterType &target, ValidationVisitor(const AdapterType &target,
std::vector<std::string> context, std::vector<std::string> context,
const bool strictTypes, const bool strictTypes,
ValidationResults *results, ValidationResults *results,
std::unordered_map<std::string, std::regex>& regexesCache) std::unordered_map<std::string, RegexEngine>& regexesCache)
: m_target(target), : m_target(target),
m_context(std::move(context)), m_context(std::move(context)),
m_results(results), m_results(results),
@ -155,7 +155,7 @@ public:
ValidationResults newResults; ValidationResults newResults;
ValidationResults *childResults = (m_results) ? &newResults : nullptr; ValidationResults *childResults = (m_results) ? &newResults : nullptr;
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache); ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
constraint.applyToSubschemas( constraint.applyToSubschemas(
ValidateSubschemas(m_target, m_context, false, true, v, childResults, &numValidated, nullptr)); ValidateSubschemas(m_target, m_context, false, true, v, childResults, &numValidated, nullptr));
@ -498,7 +498,7 @@ public:
std::vector<std::string> newContext = m_context; std::vector<std::string> newContext = m_context;
newContext.push_back("[" + std::to_string(index) + "]"); newContext.push_back("[" + std::to_string(index) + "]");
ValidationVisitor<AdapterType> validator(*itr, newContext, m_strictTypes, m_results, m_regexesCache); ValidationVisitor<AdapterType, RegexEngine> validator(*itr, newContext, m_strictTypes, m_results, m_regexesCache);
if (!validator.validateSchema(*additionalItemsSubschema)) { if (!validator.validateSchema(*additionalItemsSubschema)) {
if (m_results) { if (m_results) {
@ -874,7 +874,7 @@ public:
return false; return false;
} }
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, nullptr, m_regexesCache); ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, nullptr, m_regexesCache);
if (v.validateSchema(*subschema)) { if (v.validateSchema(*subschema)) {
if (m_results) { if (m_results) {
m_results->pushError(m_context, m_results->pushError(m_context,
@ -901,7 +901,7 @@ public:
ValidationResults newResults; ValidationResults newResults;
ValidationResults *childResults = (m_results) ? &newResults : nullptr; ValidationResults *childResults = (m_results) ? &newResults : nullptr;
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache); ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
constraint.applyToSubschemas( constraint.applyToSubschemas(
ValidateSubschemas(m_target, m_context, true, true, v, childResults, &numValidated, nullptr)); ValidateSubschemas(m_target, m_context, true, true, v, childResults, &numValidated, nullptr));
@ -943,10 +943,10 @@ public:
std::string pattern(constraint.getPattern<std::string::allocator_type>()); std::string pattern(constraint.getPattern<std::string::allocator_type>());
auto it = m_regexesCache.find(pattern); auto it = m_regexesCache.find(pattern);
if (it == m_regexesCache.end()) { if (it == m_regexesCache.end()) {
it = m_regexesCache.emplace(pattern, std::regex(pattern)).first; it = m_regexesCache.emplace(pattern, RegexEngine(pattern)).first;
} }
if (!std::regex_search(m_target.asString(), it->second)) { if (!RegexEngine::search(m_target.asString(), it->second)) {
if (m_results) { if (m_results) {
m_results->pushError(m_context, "Failed to match regex specified by 'pattern' constraint."); m_results->pushError(m_context, "Failed to match regex specified by 'pattern' constraint.");
} }
@ -1086,7 +1086,7 @@ public:
for (const typename AdapterType::ObjectMember m : m_target.asObject()) { for (const typename AdapterType::ObjectMember m : m_target.asObject()) {
adapters::StdStringAdapter stringAdapter(m.first); adapters::StdStringAdapter stringAdapter(m.first);
ValidationVisitor<adapters::StdStringAdapter> validator(stringAdapter, m_context, m_strictTypes, nullptr, m_regexesCache); ValidationVisitor<adapters::StdStringAdapter, RegexEngine> validator(stringAdapter, m_context, m_strictTypes, nullptr, m_regexesCache);
if (!validator.validateSchema(*constraint.getSubschema())) { if (!validator.validateSchema(*constraint.getSubschema())) {
return false; return false;
} }
@ -1155,7 +1155,7 @@ public:
newContext.push_back("[" + std::to_string(index) + "]"); newContext.push_back("[" + std::to_string(index) + "]");
// Create a validator for the current array item // Create a validator for the current array item
ValidationVisitor<AdapterType> validationVisitor(item, newContext, m_strictTypes, m_results, m_regexesCache); ValidationVisitor<AdapterType, RegexEngine> validationVisitor(item, newContext, m_strictTypes, m_results, m_regexesCache);
// Perform validation // Perform validation
if (!validationVisitor.validateSchema(*itemsSubschema)) { if (!validationVisitor.validateSchema(*itemsSubschema)) {
@ -1420,7 +1420,7 @@ private:
ValidationResults *results, ValidationResults *results,
unsigned int *numValidated, unsigned int *numValidated,
bool *validated, bool *validated,
std::unordered_map<std::string, std::regex>& regexesCache) std::unordered_map<std::string, RegexEngine>& regexesCache)
: m_arr(arr), : m_arr(arr),
m_context(context), m_context(context),
m_continueOnSuccess(continueOnSuccess), m_continueOnSuccess(continueOnSuccess),
@ -1477,7 +1477,7 @@ private:
ValidationResults * const m_results; ValidationResults * const m_results;
unsigned int * const m_numValidated; unsigned int * const m_numValidated;
bool * const m_validated; bool * const m_validated;
std::unordered_map<std::string, std::regex>& m_regexesCache; std::unordered_map<std::string, RegexEngine>& m_regexesCache;
}; };
/** /**
@ -1563,7 +1563,7 @@ private:
ValidationResults *results, ValidationResults *results,
std::set<std::string> *propertiesMatched, std::set<std::string> *propertiesMatched,
bool *validated, bool *validated,
std::unordered_map<std::string, std::regex>& regexesCache) std::unordered_map<std::string, RegexEngine>& regexesCache)
: m_object(object), : m_object(object),
m_context(context), m_context(context),
m_continueOnSuccess(continueOnSuccess), m_continueOnSuccess(continueOnSuccess),
@ -1640,7 +1640,7 @@ private:
ValidationResults * const m_results; ValidationResults * const m_results;
std::set<std::string> * const m_propertiesMatched; std::set<std::string> * const m_propertiesMatched;
bool * const m_validated; bool * const m_validated;
std::unordered_map<std::string, std::regex>& m_regexesCache; std::unordered_map<std::string, RegexEngine>& m_regexesCache;
}; };
/** /**
@ -1659,7 +1659,7 @@ private:
ValidationResults *results, ValidationResults *results,
std::set<std::string> *propertiesMatched, std::set<std::string> *propertiesMatched,
bool *validated, bool *validated,
std::unordered_map<std::string, std::regex>& regexesCache) std::unordered_map<std::string, RegexEngine>& regexesCache)
: m_object(object), : m_object(object),
m_context(context), m_context(context),
m_continueOnSuccess(continueOnSuccess), m_continueOnSuccess(continueOnSuccess),
@ -1716,7 +1716,7 @@ private:
ValidationResults * const m_results; ValidationResults * const m_results;
std::set<std::string> * const m_propertiesMatched; std::set<std::string> * const m_propertiesMatched;
bool * const m_validated; bool * const m_validated;
std::unordered_map<std::string, std::regex>& m_regexesCache; std::unordered_map<std::string, RegexEngine>& m_regexesCache;
}; };
/** /**
@ -1842,7 +1842,7 @@ private:
* *
* @return true if the visitor returns successfully, false otherwise. * @return true if the visitor returns successfully, false otherwise.
*/ */
static bool validationCallback(const constraints::Constraint &constraint, ValidationVisitor<AdapterType> &visitor) static bool validationCallback(const constraints::Constraint &constraint, ValidationVisitor<AdapterType, RegexEngine> &visitor)
{ {
return constraint.accept(visitor); return constraint.accept(visitor);
} }
@ -1901,7 +1901,7 @@ private:
bool m_strictTypes; bool m_strictTypes;
/// Cached regex objects for pattern constraint /// Cached regex objects for pattern constraint
std::unordered_map<std::string, std::regex>& m_regexesCache; std::unordered_map<std::string, RegexEngine>& m_regexesCache;
}; };
} // namespace valijson } // namespace valijson

View File

@ -8,10 +8,15 @@ namespace valijson {
class Schema; class Schema;
class ValidationResults; class ValidationResults;
/** /**
* @brief Class that provides validation functionality. * @brief Class that provides validation functionality.
*
* @tparam RegexEngine regular expression engine used for pattern constraint validation.
*/ */
class Validator template <typename RegexEngine>
class ValidatorT
{ {
public: public:
enum TypeCheckingMode enum TypeCheckingMode
@ -23,7 +28,7 @@ public:
/** /**
* @brief Construct a Validator that uses strong type checking by default * @brief Construct a Validator that uses strong type checking by default
*/ */
Validator() ValidatorT()
: strictTypes(true) { } : strictTypes(true) { }
/** /**
@ -31,7 +36,7 @@ public:
* *
* @param typeCheckingMode choice of strong or weak type checking * @param typeCheckingMode choice of strong or weak type checking
*/ */
Validator(TypeCheckingMode typeCheckingMode) ValidatorT(TypeCheckingMode typeCheckingMode)
: strictTypes(typeCheckingMode == kStrongTypes) { } : strictTypes(typeCheckingMode == kStrongTypes) { }
/** /**
@ -58,7 +63,7 @@ public:
ValidationResults *results) ValidationResults *results)
{ {
// Construct a ValidationVisitor to perform validation at the root level // Construct a ValidationVisitor to perform validation at the root level
ValidationVisitor<AdapterType> v(target, ValidationVisitor<AdapterType, RegexEngine> v(target,
std::vector<std::string>(1, "<root>"), strictTypes, results, regexesCache); std::vector<std::string>(1, "<root>"), strictTypes, results, regexesCache);
return v.validateSchema(schema); return v.validateSchema(schema);
@ -70,7 +75,25 @@ private:
bool strictTypes; bool strictTypes;
/// Cached regex objects for pattern constraint. Key - pattern. /// Cached regex objects for pattern constraint. Key - pattern.
std::unordered_map<std::string, std::regex> regexesCache; std::unordered_map<std::string, RegexEngine> regexesCache;
}; };
/**
* @brief Struct that provides a default Regular Expression Engine using std::regex
*
*/
struct DefaultRegexEngine
{
DefaultRegexEngine(const std::string& pattern)
: regex(pattern) { }
static bool search(const std::string& s, const DefaultRegexEngine& r)
{
return std::regex_search(s, r.regex);
}
std::regex regex;
};
using Validator = ValidatorT<DefaultRegexEngine>;
} // namespace valijson } // namespace valijson

View File

@ -0,0 +1,128 @@
#ifdef _MSC_VER
#pragma warning(disable: 4706)
#include <picojson.h>
#pragma warning(default: 4706)
#else
#include <picojson.h>
#endif
#include <iostream>
#include <gtest/gtest.h>
#include <valijson/adapters/json11_adapter.hpp>
#include <valijson/adapters/jsoncpp_adapter.hpp>
#include <valijson/adapters/rapidjson_adapter.hpp>
#include <valijson/adapters/picojson_adapter.hpp>
#include <valijson/adapters/nlohmann_json_adapter.hpp>
#include <valijson/utils/json11_utils.hpp>
#include <valijson/utils/jsoncpp_utils.hpp>
#include <valijson/utils/picojson_utils.hpp>
#include <valijson/utils/rapidjson_utils.hpp>
#include <valijson/utils/nlohmann_json_utils.hpp>
#include <valijson/schema.hpp>
#include <valijson/schema_parser.hpp>
#include <valijson/validation_results.hpp>
#include <valijson/validator.hpp>
#include <valijson/exceptions.hpp>
#ifdef VALIJSON_BUILD_POCO_ADAPTER
#include <valijson/adapters/poco_json_adapter.hpp>
#include <valijson/utils/poco_json_utils.hpp>
#endif
using valijson::adapters::AdapterTraits;
using valijson::adapters::RapidJsonAdapter;
using valijson::Schema;
using valijson::SchemaParser;
using valijson::Validator;
namespace
{
void createFileFromContent(const std::string& filename, const std::string& content)
{
std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc);
outfile << content << std::endl;
outfile.close();
};
}
//Potentially :
// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator..
//using CustomValidator = ValidatorT<CustomRegexEngine>;
TEST(valijson, valijson_be_robust_against_bad_regular_expression)
{
GTEST_SKIP() << "Skipping begin it cause segmentation fault with default Validator";
const std::string schema = R"(
{
"properties": {
"text": {
"pattern": "^[\\s\\S]+$",
"type": "string"
}
}
}
)";
createFileFromContent("schema.json", schema);
rapidjson::Document mySchemaDoc;
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
Schema mySchema;
SchemaParser parser;
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \"";
for (int i = 0; i< 100000; ++i)
payload += 'A';
payload += "\"}";
createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
//This test crash (segfault) is validator is not customized with custom RegexpEngine
Validator validator;
RapidJsonAdapter myTargetAdapter(myTargetDoc);
ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr));
}
TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression)
{
GTEST_SKIP() << "Skipping begin it hangs due to non management of catastrophic backtracking with default Validator";
const std::string schema = R"(
{
"properties": {
"text": {
"pattern": "((A+)*)+$",
"type": "string"
}
}
}
)";
createFileFromContent("schema.json", schema);
rapidjson::Document mySchemaDoc;
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
Schema mySchema;
SchemaParser parser;
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}";
createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
//This test takes endless time if validator is not customized with custom RegexpEngine
Validator validator;
RapidJsonAdapter myTargetAdapter(myTargetDoc);
//payload is correct regarding the regexp but evaluation is impossible due to catastrophic regexp bactracking. so we return false.
ASSERT_FALSE(validator.validate(mySchema, myTargetAdapter, nullptr));
}