mirror of
https://github.com/tristanpenman/valijson.git
synced 2024-12-12 10:13:51 +01:00
Ability to customize regular expression engine
This commit is contained in:
parent
c1dde4e270
commit
2431bdeaa6
@ -124,6 +124,7 @@ if(valijson_BUILD_TESTS)
|
||||
tests/test_poly_constraint.cpp
|
||||
tests/test_validation_errors.cpp
|
||||
tests/test_validator.cpp
|
||||
tests/test_validator_with_custom_regular_expression_engine.cpp
|
||||
tests/test_yaml_cpp_adapter.cpp
|
||||
)
|
||||
|
||||
|
28
README.md
28
README.md
@ -87,6 +87,34 @@ Validator validator(Validator::kWeakTypes);
|
||||
|
||||
This will create a validator that will attempt to cast values to satisfy a schema. The original motivation for this was to support the Boost Property Tree library, which can parse JSON, but stores values as strings.
|
||||
|
||||
## Regular Expression Engine
|
||||
|
||||
When enforcing a 'pattern' property, a regular expression engine is in used. By default, the DefaultRegexEngine use std::regex.
|
||||
std::regex has no protection against catastrophic backtracking and implementation with gcc is so suboptimal that it can easily leads to segmentation fault.
|
||||
One can customise the regular expression engine by implementing it's own wrapper to it and using a ValidatorT with the custom type.
|
||||
|
||||
The regular expression engine wrapper must implement the following interface
|
||||
```cpp
|
||||
struct MyRegexpEngine
|
||||
{
|
||||
MyRegexpEngine(const std::string& pattern)
|
||||
{
|
||||
//implementation specific
|
||||
}
|
||||
|
||||
static bool search(const std::string& s, const MyRegexpEngine& r)
|
||||
{
|
||||
//implementation specific
|
||||
}
|
||||
};
|
||||
|
||||
```
|
||||
|
||||
Then to use it
|
||||
```cpp
|
||||
using MyValidator = ValidatorT<MyRegexpEngine>;
|
||||
```
|
||||
|
||||
## Memory Management
|
||||
|
||||
Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics.
|
||||
|
@ -28,7 +28,7 @@ class ValidationResults;
|
||||
*
|
||||
* @tparam AdapterType Adapter type for the target document.
|
||||
*/
|
||||
template<typename AdapterType>
|
||||
template<typename AdapterType, typename RegexEngine>
|
||||
class ValidationVisitor: public constraints::ConstraintVisitor
|
||||
{
|
||||
public:
|
||||
@ -44,14 +44,14 @@ public:
|
||||
* recording error descriptions. If this pointer is set
|
||||
* to nullptr, validation errors will caused validation to
|
||||
* stop immediately.
|
||||
* @param regexesCache Cache of already created std::regex objects for pattern
|
||||
* @param regexesCache Cache of already created RegexEngine objects for pattern
|
||||
* constraints.
|
||||
*/
|
||||
ValidationVisitor(const AdapterType &target,
|
||||
std::vector<std::string> context,
|
||||
const bool strictTypes,
|
||||
ValidationResults *results,
|
||||
std::unordered_map<std::string, std::regex>& regexesCache)
|
||||
std::unordered_map<std::string, RegexEngine>& regexesCache)
|
||||
: m_target(target),
|
||||
m_context(std::move(context)),
|
||||
m_results(results),
|
||||
@ -155,7 +155,7 @@ public:
|
||||
ValidationResults newResults;
|
||||
ValidationResults *childResults = (m_results) ? &newResults : nullptr;
|
||||
|
||||
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
|
||||
ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
|
||||
constraint.applyToSubschemas(
|
||||
ValidateSubschemas(m_target, m_context, false, true, v, childResults, &numValidated, nullptr));
|
||||
|
||||
@ -498,7 +498,7 @@ public:
|
||||
std::vector<std::string> newContext = m_context;
|
||||
newContext.push_back("[" + std::to_string(index) + "]");
|
||||
|
||||
ValidationVisitor<AdapterType> validator(*itr, newContext, m_strictTypes, m_results, m_regexesCache);
|
||||
ValidationVisitor<AdapterType, RegexEngine> validator(*itr, newContext, m_strictTypes, m_results, m_regexesCache);
|
||||
|
||||
if (!validator.validateSchema(*additionalItemsSubschema)) {
|
||||
if (m_results) {
|
||||
@ -874,7 +874,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, nullptr, m_regexesCache);
|
||||
ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, nullptr, m_regexesCache);
|
||||
if (v.validateSchema(*subschema)) {
|
||||
if (m_results) {
|
||||
m_results->pushError(m_context,
|
||||
@ -901,7 +901,7 @@ public:
|
||||
ValidationResults newResults;
|
||||
ValidationResults *childResults = (m_results) ? &newResults : nullptr;
|
||||
|
||||
ValidationVisitor<AdapterType> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
|
||||
ValidationVisitor<AdapterType, RegexEngine> v(m_target, m_context, m_strictTypes, childResults, m_regexesCache);
|
||||
constraint.applyToSubschemas(
|
||||
ValidateSubschemas(m_target, m_context, true, true, v, childResults, &numValidated, nullptr));
|
||||
|
||||
@ -943,10 +943,10 @@ public:
|
||||
std::string pattern(constraint.getPattern<std::string::allocator_type>());
|
||||
auto it = m_regexesCache.find(pattern);
|
||||
if (it == m_regexesCache.end()) {
|
||||
it = m_regexesCache.emplace(pattern, std::regex(pattern)).first;
|
||||
it = m_regexesCache.emplace(pattern, RegexEngine(pattern)).first;
|
||||
}
|
||||
|
||||
if (!std::regex_search(m_target.asString(), it->second)) {
|
||||
if (!RegexEngine::search(m_target.asString(), it->second)) {
|
||||
if (m_results) {
|
||||
m_results->pushError(m_context, "Failed to match regex specified by 'pattern' constraint.");
|
||||
}
|
||||
@ -1086,7 +1086,7 @@ public:
|
||||
|
||||
for (const typename AdapterType::ObjectMember m : m_target.asObject()) {
|
||||
adapters::StdStringAdapter stringAdapter(m.first);
|
||||
ValidationVisitor<adapters::StdStringAdapter> validator(stringAdapter, m_context, m_strictTypes, nullptr, m_regexesCache);
|
||||
ValidationVisitor<adapters::StdStringAdapter, RegexEngine> validator(stringAdapter, m_context, m_strictTypes, nullptr, m_regexesCache);
|
||||
if (!validator.validateSchema(*constraint.getSubschema())) {
|
||||
return false;
|
||||
}
|
||||
@ -1155,7 +1155,7 @@ public:
|
||||
newContext.push_back("[" + std::to_string(index) + "]");
|
||||
|
||||
// Create a validator for the current array item
|
||||
ValidationVisitor<AdapterType> validationVisitor(item, newContext, m_strictTypes, m_results, m_regexesCache);
|
||||
ValidationVisitor<AdapterType, RegexEngine> validationVisitor(item, newContext, m_strictTypes, m_results, m_regexesCache);
|
||||
|
||||
// Perform validation
|
||||
if (!validationVisitor.validateSchema(*itemsSubschema)) {
|
||||
@ -1420,7 +1420,7 @@ private:
|
||||
ValidationResults *results,
|
||||
unsigned int *numValidated,
|
||||
bool *validated,
|
||||
std::unordered_map<std::string, std::regex>& regexesCache)
|
||||
std::unordered_map<std::string, RegexEngine>& regexesCache)
|
||||
: m_arr(arr),
|
||||
m_context(context),
|
||||
m_continueOnSuccess(continueOnSuccess),
|
||||
@ -1477,7 +1477,7 @@ private:
|
||||
ValidationResults * const m_results;
|
||||
unsigned int * const m_numValidated;
|
||||
bool * const m_validated;
|
||||
std::unordered_map<std::string, std::regex>& m_regexesCache;
|
||||
std::unordered_map<std::string, RegexEngine>& m_regexesCache;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1563,7 +1563,7 @@ private:
|
||||
ValidationResults *results,
|
||||
std::set<std::string> *propertiesMatched,
|
||||
bool *validated,
|
||||
std::unordered_map<std::string, std::regex>& regexesCache)
|
||||
std::unordered_map<std::string, RegexEngine>& regexesCache)
|
||||
: m_object(object),
|
||||
m_context(context),
|
||||
m_continueOnSuccess(continueOnSuccess),
|
||||
@ -1640,7 +1640,7 @@ private:
|
||||
ValidationResults * const m_results;
|
||||
std::set<std::string> * const m_propertiesMatched;
|
||||
bool * const m_validated;
|
||||
std::unordered_map<std::string, std::regex>& m_regexesCache;
|
||||
std::unordered_map<std::string, RegexEngine>& m_regexesCache;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1659,7 +1659,7 @@ private:
|
||||
ValidationResults *results,
|
||||
std::set<std::string> *propertiesMatched,
|
||||
bool *validated,
|
||||
std::unordered_map<std::string, std::regex>& regexesCache)
|
||||
std::unordered_map<std::string, RegexEngine>& regexesCache)
|
||||
: m_object(object),
|
||||
m_context(context),
|
||||
m_continueOnSuccess(continueOnSuccess),
|
||||
@ -1716,7 +1716,7 @@ private:
|
||||
ValidationResults * const m_results;
|
||||
std::set<std::string> * const m_propertiesMatched;
|
||||
bool * const m_validated;
|
||||
std::unordered_map<std::string, std::regex>& m_regexesCache;
|
||||
std::unordered_map<std::string, RegexEngine>& m_regexesCache;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1842,7 +1842,7 @@ private:
|
||||
*
|
||||
* @return true if the visitor returns successfully, false otherwise.
|
||||
*/
|
||||
static bool validationCallback(const constraints::Constraint &constraint, ValidationVisitor<AdapterType> &visitor)
|
||||
static bool validationCallback(const constraints::Constraint &constraint, ValidationVisitor<AdapterType, RegexEngine> &visitor)
|
||||
{
|
||||
return constraint.accept(visitor);
|
||||
}
|
||||
@ -1901,7 +1901,7 @@ private:
|
||||
bool m_strictTypes;
|
||||
|
||||
/// Cached regex objects for pattern constraint
|
||||
std::unordered_map<std::string, std::regex>& m_regexesCache;
|
||||
std::unordered_map<std::string, RegexEngine>& m_regexesCache;
|
||||
};
|
||||
|
||||
} // namespace valijson
|
||||
|
@ -8,10 +8,15 @@ namespace valijson {
|
||||
class Schema;
|
||||
class ValidationResults;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Class that provides validation functionality.
|
||||
* @brief Class that provides validation functionality.
|
||||
*
|
||||
* @tparam RegexEngine regular expression engine used for pattern constraint validation.
|
||||
|
||||
*/
|
||||
class Validator
|
||||
template <typename RegexEngine>
|
||||
class ValidatorT
|
||||
{
|
||||
public:
|
||||
enum TypeCheckingMode
|
||||
@ -23,7 +28,7 @@ public:
|
||||
/**
|
||||
* @brief Construct a Validator that uses strong type checking by default
|
||||
*/
|
||||
Validator()
|
||||
ValidatorT()
|
||||
: strictTypes(true) { }
|
||||
|
||||
/**
|
||||
@ -31,7 +36,7 @@ public:
|
||||
*
|
||||
* @param typeCheckingMode choice of strong or weak type checking
|
||||
*/
|
||||
Validator(TypeCheckingMode typeCheckingMode)
|
||||
ValidatorT(TypeCheckingMode typeCheckingMode)
|
||||
: strictTypes(typeCheckingMode == kStrongTypes) { }
|
||||
|
||||
/**
|
||||
@ -58,7 +63,7 @@ public:
|
||||
ValidationResults *results)
|
||||
{
|
||||
// Construct a ValidationVisitor to perform validation at the root level
|
||||
ValidationVisitor<AdapterType> v(target,
|
||||
ValidationVisitor<AdapterType, RegexEngine> v(target,
|
||||
std::vector<std::string>(1, "<root>"), strictTypes, results, regexesCache);
|
||||
|
||||
return v.validateSchema(schema);
|
||||
@ -70,7 +75,25 @@ private:
|
||||
bool strictTypes;
|
||||
|
||||
/// Cached regex objects for pattern constraint. Key - pattern.
|
||||
std::unordered_map<std::string, std::regex> regexesCache;
|
||||
std::unordered_map<std::string, RegexEngine> regexesCache;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Struct that provides a default Regular Expression Engine using std::regex
|
||||
*
|
||||
*/
|
||||
struct DefaultRegexEngine
|
||||
{
|
||||
DefaultRegexEngine(const std::string& pattern)
|
||||
: regex(pattern) { }
|
||||
|
||||
static bool search(const std::string& s, const DefaultRegexEngine& r)
|
||||
{
|
||||
return std::regex_search(s, r.regex);
|
||||
}
|
||||
std::regex regex;
|
||||
};
|
||||
|
||||
using Validator = ValidatorT<DefaultRegexEngine>;
|
||||
|
||||
} // namespace valijson
|
||||
|
128
tests/test_validator_with_custom_regular_expression_engine.cpp
Normal file
128
tests/test_validator_with_custom_regular_expression_engine.cpp
Normal file
@ -0,0 +1,128 @@
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable: 4706)
|
||||
#include <picojson.h>
|
||||
#pragma warning(default: 4706)
|
||||
#else
|
||||
#include <picojson.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <valijson/adapters/json11_adapter.hpp>
|
||||
#include <valijson/adapters/jsoncpp_adapter.hpp>
|
||||
#include <valijson/adapters/rapidjson_adapter.hpp>
|
||||
#include <valijson/adapters/picojson_adapter.hpp>
|
||||
#include <valijson/adapters/nlohmann_json_adapter.hpp>
|
||||
#include <valijson/utils/json11_utils.hpp>
|
||||
#include <valijson/utils/jsoncpp_utils.hpp>
|
||||
#include <valijson/utils/picojson_utils.hpp>
|
||||
#include <valijson/utils/rapidjson_utils.hpp>
|
||||
#include <valijson/utils/nlohmann_json_utils.hpp>
|
||||
#include <valijson/schema.hpp>
|
||||
#include <valijson/schema_parser.hpp>
|
||||
#include <valijson/validation_results.hpp>
|
||||
#include <valijson/validator.hpp>
|
||||
#include <valijson/exceptions.hpp>
|
||||
#ifdef VALIJSON_BUILD_POCO_ADAPTER
|
||||
#include <valijson/adapters/poco_json_adapter.hpp>
|
||||
#include <valijson/utils/poco_json_utils.hpp>
|
||||
#endif
|
||||
|
||||
using valijson::adapters::AdapterTraits;
|
||||
using valijson::adapters::RapidJsonAdapter;
|
||||
using valijson::Schema;
|
||||
using valijson::SchemaParser;
|
||||
using valijson::Validator;
|
||||
|
||||
namespace
|
||||
{
|
||||
void createFileFromContent(const std::string& filename, const std::string& content)
|
||||
{
|
||||
std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc);
|
||||
outfile << content << std::endl;
|
||||
outfile.close();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//Potentially :
|
||||
// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator..
|
||||
//using CustomValidator = ValidatorT<CustomRegexEngine>;
|
||||
|
||||
TEST(valijson, valijson_be_robust_against_bad_regular_expression)
|
||||
{
|
||||
GTEST_SKIP() << "Skipping begin it cause segmentation fault with default Validator";
|
||||
const std::string schema = R"(
|
||||
{
|
||||
"properties": {
|
||||
"text": {
|
||||
"pattern": "^[\\s\\S]+$",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
createFileFromContent("schema.json", schema);
|
||||
rapidjson::Document mySchemaDoc;
|
||||
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
|
||||
|
||||
Schema mySchema;
|
||||
SchemaParser parser;
|
||||
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
|
||||
parser.populateSchema(mySchemaAdapter, mySchema);
|
||||
rapidjson::Document myTargetDoc;
|
||||
std::string payload = "{ \"text\" : \"";
|
||||
for (int i = 0; i< 100000; ++i)
|
||||
payload += 'A';
|
||||
payload += "\"}";
|
||||
|
||||
createFileFromContent("payload.json", payload);
|
||||
|
||||
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
|
||||
|
||||
//This test crash (segfault) is validator is not customized with custom RegexpEngine
|
||||
Validator validator;
|
||||
RapidJsonAdapter myTargetAdapter(myTargetDoc);
|
||||
ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr));
|
||||
}
|
||||
|
||||
TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression)
|
||||
{
|
||||
GTEST_SKIP() << "Skipping begin it hangs due to non management of catastrophic backtracking with default Validator";
|
||||
|
||||
const std::string schema = R"(
|
||||
{
|
||||
"properties": {
|
||||
"text": {
|
||||
"pattern": "((A+)*)+$",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
createFileFromContent("schema.json", schema);
|
||||
rapidjson::Document mySchemaDoc;
|
||||
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
|
||||
|
||||
Schema mySchema;
|
||||
SchemaParser parser;
|
||||
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
|
||||
parser.populateSchema(mySchemaAdapter, mySchema);
|
||||
rapidjson::Document myTargetDoc;
|
||||
std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}";
|
||||
|
||||
createFileFromContent("payload.json", payload);
|
||||
|
||||
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
|
||||
|
||||
//This test takes endless time if validator is not customized with custom RegexpEngine
|
||||
Validator validator;
|
||||
RapidJsonAdapter myTargetAdapter(myTargetDoc);
|
||||
|
||||
//payload is correct regarding the regexp but evaluation is impossible due to catastrophic regexp bactracking. so we return false.
|
||||
ASSERT_FALSE(validator.validate(mySchema, myTargetAdapter, nullptr));
|
||||
}
|
Loading…
Reference in New Issue
Block a user