Tidy up regex changes

This commit is contained in:
Tristan Penman 2024-05-17 08:52:22 +10:00
parent 31ce4a5948
commit a0e05907ae
3 changed files with 49 additions and 45 deletions

View File

@ -89,32 +89,36 @@ This will create a validator that will attempt to cast values to satisfy a schem
## Regular Expression Engine
When enforcing a 'pattern' property, a regular expression engine is in used. By default, the DefaultRegexEngine use std::regex.
std::regex has no protection against catastrophic backtracking and implementation with gcc is so suboptimal that it can easily leads to segmentation fault.
One can customise the regular expression engine by implementing it's own wrapper to it and using a ValidatorT with the custom type.
When enforcing a 'pattern' property, a regular expression engine is used. By default, the default regular expression (`DefaultRegexEngine`) uses `std::regex`.
Unfortunaltey, `std::regex` has no protection against catastrophic backtracking and the implementation in gcc is so suboptimal that it can easily lead to segmentation faults.
This behaviour can be customised by implementing a wrapper for alternative regular expression engine.
To do this, you must implement the following interface:
The regular expression engine wrapper must implement the following interface
```cpp
struct MyRegexpEngine
{
MyRegexpEngine(const std::string& pattern)
{
//implementation specific
// implementation specific
}
static bool search(const std::string& s, const MyRegexpEngine& r)
{
//implementation specific
// implementation specific
}
};
```
Then to use it
Then to use it, you must define a customer validator type:
```cpp
using MyValidator = ValidatorT<MyRegexpEngine>;
```
Once you've done this, `MyValidator` can be used in place of the default `valijson::Validator` type.
## Memory Management
Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics.

View File

@ -80,20 +80,21 @@ private:
/**
* @brief Struct that provides a default Regular Expression Engine using std::regex
*
*/
struct DefaultRegexEngine
{
DefaultRegexEngine(const std::string& pattern)
: regex(pattern) { }
: regex(pattern) { }
static bool search(const std::string& s, const DefaultRegexEngine& r)
{
return std::regex_search(s, r.regex);
return std::regex_search(s, r.regex);
}
private:
std::regex regex;
};
using Validator = ValidatorT<DefaultRegexEngine>;
} // namespace valijson

View File

@ -36,54 +36,53 @@ using valijson::Schema;
using valijson::SchemaParser;
using valijson::Validator;
namespace
{
void createFileFromContent(const std::string& filename, const std::string& content)
static void createFileFromContent(const std::string& filename, const std::string& content)
{
std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc);
outfile << content << std::endl;
outfile.close();
};
}
//Potentially :
// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator..
// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator.
//using CustomValidator = ValidatorT<CustomRegexEngine>;
TEST(valijson, valijson_be_robust_against_bad_regular_expression)
{
GTEST_SKIP() << "Skipping begin it cause segmentation fault with default Validator";
const std::string schema = R"(
GTEST_SKIP() << "Skipping: causes segmentation fault with default Validator";
static const std::string schema = R"(
{
"properties": {
"text": {
"pattern": "^[\\s\\S]+$",
"type": "string"
}
"text": {
"pattern": "^[\\s\\S]+$",
"type": "string"
}
}
}
}
)";
createFileFromContent("schema.json", schema);
rapidjson::Document mySchemaDoc;
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
Schema mySchema;
SchemaParser parser;
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \"";
for (int i = 0; i< 100000; ++i)
payload += 'A';
for (int i = 0; i< 100000; ++i) {
payload += 'A';
}
payload += "\"}";
createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
//This test crash (segfault) is validator is not customized with custom RegexpEngine
// This test crash (segfault) is validator is not customized with custom RegexpEngine
Validator validator;
RapidJsonAdapter myTargetAdapter(myTargetDoc);
ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr));
@ -91,32 +90,32 @@ TEST(valijson, valijson_be_robust_against_bad_regular_expression)
TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression)
{
GTEST_SKIP() << "Skipping begin it hangs due to non management of catastrophic backtracking with default Validator";
const std::string schema = R"(
GTEST_SKIP() << "Skipping: hangs due to non management of catastrophic backtracking with default Validator";
static const std::string schema = R"(
{
"properties": {
"text": {
"pattern": "((A+)*)+$",
"type": "string"
}
"text": {
"pattern": "((A+)*)+$",
"type": "string"
}
}
}
}
)";
createFileFromContent("schema.json", schema);
rapidjson::Document mySchemaDoc;
ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc));
Schema mySchema;
SchemaParser parser;
RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}";
createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
//This test takes endless time if validator is not customized with custom RegexpEngine