Tidy up regex changes

This commit is contained in:
Tristan Penman 2024-05-17 08:52:22 +10:00
parent 31ce4a5948
commit a0e05907ae
3 changed files with 49 additions and 45 deletions

View File

@ -89,32 +89,36 @@ This will create a validator that will attempt to cast values to satisfy a schem
## Regular Expression Engine ## Regular Expression Engine
When enforcing a 'pattern' property, a regular expression engine is in used. By default, the DefaultRegexEngine use std::regex. When enforcing a 'pattern' property, a regular expression engine is used. By default, the default regular expression (`DefaultRegexEngine`) uses `std::regex`.
std::regex has no protection against catastrophic backtracking and implementation with gcc is so suboptimal that it can easily leads to segmentation fault. Unfortunaltey, `std::regex` has no protection against catastrophic backtracking and the implementation in gcc is so suboptimal that it can easily lead to segmentation faults.
One can customise the regular expression engine by implementing it's own wrapper to it and using a ValidatorT with the custom type.
This behaviour can be customised by implementing a wrapper for alternative regular expression engine.
To do this, you must implement the following interface:
The regular expression engine wrapper must implement the following interface
```cpp ```cpp
struct MyRegexpEngine struct MyRegexpEngine
{ {
MyRegexpEngine(const std::string& pattern) MyRegexpEngine(const std::string& pattern)
{ {
//implementation specific // implementation specific
} }
static bool search(const std::string& s, const MyRegexpEngine& r) static bool search(const std::string& s, const MyRegexpEngine& r)
{ {
//implementation specific // implementation specific
} }
}; };
``` ```
Then to use it Then to use it, you must define a customer validator type:
```cpp ```cpp
using MyValidator = ValidatorT<MyRegexpEngine>; using MyValidator = ValidatorT<MyRegexpEngine>;
``` ```
Once you've done this, `MyValidator` can be used in place of the default `valijson::Validator` type.
## Memory Management ## Memory Management
Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics. Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics.

View File

@ -80,17 +80,18 @@ private:
/** /**
* @brief Struct that provides a default Regular Expression Engine using std::regex * @brief Struct that provides a default Regular Expression Engine using std::regex
*
*/ */
struct DefaultRegexEngine struct DefaultRegexEngine
{ {
DefaultRegexEngine(const std::string& pattern) DefaultRegexEngine(const std::string& pattern)
: regex(pattern) { } : regex(pattern) { }
static bool search(const std::string& s, const DefaultRegexEngine& r) static bool search(const std::string& s, const DefaultRegexEngine& r)
{ {
return std::regex_search(s, r.regex); return std::regex_search(s, r.regex);
} }
private:
std::regex regex; std::regex regex;
}; };

View File

@ -36,33 +36,30 @@ using valijson::Schema;
using valijson::SchemaParser; using valijson::SchemaParser;
using valijson::Validator; using valijson::Validator;
namespace static void createFileFromContent(const std::string& filename, const std::string& content)
{
void createFileFromContent(const std::string& filename, const std::string& content)
{ {
std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc); std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc);
outfile << content << std::endl; outfile << content << std::endl;
outfile.close(); outfile.close();
}; };
}
//Potentially : //Potentially :
// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator.. // Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator.
//using CustomValidator = ValidatorT<CustomRegexEngine>; //using CustomValidator = ValidatorT<CustomRegexEngine>;
TEST(valijson, valijson_be_robust_against_bad_regular_expression) TEST(valijson, valijson_be_robust_against_bad_regular_expression)
{ {
GTEST_SKIP() << "Skipping begin it cause segmentation fault with default Validator"; GTEST_SKIP() << "Skipping: causes segmentation fault with default Validator";
const std::string schema = R"(
static const std::string schema = R"(
{ {
"properties": { "properties": {
"text": { "text": {
"pattern": "^[\\s\\S]+$", "pattern": "^[\\s\\S]+$",
"type": "string" "type": "string"
} }
} }
} }
)"; )";
createFileFromContent("schema.json", schema); createFileFromContent("schema.json", schema);
@ -74,16 +71,18 @@ TEST(valijson, valijson_be_robust_against_bad_regular_expression)
RapidJsonAdapter mySchemaAdapter(mySchemaDoc); RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema); parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc; rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \""; std::string payload = "{ \"text\" : \"";
for (int i = 0; i< 100000; ++i) for (int i = 0; i< 100000; ++i) {
payload += 'A'; payload += 'A';
}
payload += "\"}"; payload += "\"}";
createFileFromContent("payload.json", payload); createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc)); ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));
//This test crash (segfault) is validator is not customized with custom RegexpEngine // This test crash (segfault) is validator is not customized with custom RegexpEngine
Validator validator; Validator validator;
RapidJsonAdapter myTargetAdapter(myTargetDoc); RapidJsonAdapter myTargetAdapter(myTargetDoc);
ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr)); ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr));
@ -91,17 +90,17 @@ TEST(valijson, valijson_be_robust_against_bad_regular_expression)
TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression) TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression)
{ {
GTEST_SKIP() << "Skipping begin it hangs due to non management of catastrophic backtracking with default Validator"; GTEST_SKIP() << "Skipping: hangs due to non management of catastrophic backtracking with default Validator";
const std::string schema = R"( static const std::string schema = R"(
{ {
"properties": { "properties": {
"text": { "text": {
"pattern": "((A+)*)+$", "pattern": "((A+)*)+$",
"type": "string" "type": "string"
} }
} }
} }
)"; )";
createFileFromContent("schema.json", schema); createFileFromContent("schema.json", schema);
@ -113,8 +112,8 @@ TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expr
RapidJsonAdapter mySchemaAdapter(mySchemaDoc); RapidJsonAdapter mySchemaAdapter(mySchemaDoc);
parser.populateSchema(mySchemaAdapter, mySchema); parser.populateSchema(mySchemaAdapter, mySchema);
rapidjson::Document myTargetDoc; rapidjson::Document myTargetDoc;
std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}";
std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}";
createFileFromContent("payload.json", payload); createFileFromContent("payload.json", payload);
ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc)); ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc));