From a0e05907ae3f54df2d0bd7d83e3c7b73abe3aae9 Mon Sep 17 00:00:00 2001 From: Tristan Penman Date: Fri, 17 May 2024 08:52:22 +1000 Subject: [PATCH] Tidy up regex changes --- README.md | 20 +++--- include/valijson/validator.hpp | 11 ++-- ..._with_custom_regular_expression_engine.cpp | 63 +++++++++---------- 3 files changed, 49 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 88e845f..539c9b1 100644 --- a/README.md +++ b/README.md @@ -89,32 +89,36 @@ This will create a validator that will attempt to cast values to satisfy a schem ## Regular Expression Engine -When enforcing a 'pattern' property, a regular expression engine is in used. By default, the DefaultRegexEngine use std::regex. -std::regex has no protection against catastrophic backtracking and implementation with gcc is so suboptimal that it can easily leads to segmentation fault. -One can customise the regular expression engine by implementing it's own wrapper to it and using a ValidatorT with the custom type. +When enforcing a 'pattern' property, a regular expression engine is used. By default, the default regular expression (`DefaultRegexEngine`) uses `std::regex`. +Unfortunaltey, `std::regex` has no protection against catastrophic backtracking and the implementation in gcc is so suboptimal that it can easily lead to segmentation faults. + +This behaviour can be customised by implementing a wrapper for alternative regular expression engine. + +To do this, you must implement the following interface: -The regular expression engine wrapper must implement the following interface ```cpp struct MyRegexpEngine { MyRegexpEngine(const std::string& pattern) { - //implementation specific + // implementation specific } static bool search(const std::string& s, const MyRegexpEngine& r) { - //implementation specific + // implementation specific } }; - ``` -Then to use it +Then to use it, you must define a customer validator type: + ```cpp using MyValidator = ValidatorT; ``` +Once you've done this, `MyValidator` can be used in place of the default `valijson::Validator` type. + ## Memory Management Valijson has been designed to safely manage, and eventually free, the memory that is allocated while parsing a schema or validating a document. When working with an externally loaded schema (i.e. one that is populated using the `SchemaParser` class) you can rely on RAII semantics. diff --git a/include/valijson/validator.hpp b/include/valijson/validator.hpp index 305f44b..14028ce 100644 --- a/include/valijson/validator.hpp +++ b/include/valijson/validator.hpp @@ -80,20 +80,21 @@ private: /** * @brief Struct that provides a default Regular Expression Engine using std::regex - * */ struct DefaultRegexEngine { DefaultRegexEngine(const std::string& pattern) - : regex(pattern) { } + : regex(pattern) { } static bool search(const std::string& s, const DefaultRegexEngine& r) { - return std::regex_search(s, r.regex); + return std::regex_search(s, r.regex); } + +private: std::regex regex; }; - + using Validator = ValidatorT; - + } // namespace valijson diff --git a/tests/test_validator_with_custom_regular_expression_engine.cpp b/tests/test_validator_with_custom_regular_expression_engine.cpp index 1fd5e0a..57f09d1 100644 --- a/tests/test_validator_with_custom_regular_expression_engine.cpp +++ b/tests/test_validator_with_custom_regular_expression_engine.cpp @@ -36,54 +36,53 @@ using valijson::Schema; using valijson::SchemaParser; using valijson::Validator; -namespace -{ -void createFileFromContent(const std::string& filename, const std::string& content) +static void createFileFromContent(const std::string& filename, const std::string& content) { std::ofstream outfile(filename, std::ofstream::out | std::ofstream::trunc); outfile << content << std::endl; outfile.close(); }; - -} //Potentially : -// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator.. +// Define a struct CustomRegexEngine that handle both problem and use it as replacement of Validator. //using CustomValidator = ValidatorT; TEST(valijson, valijson_be_robust_against_bad_regular_expression) { - GTEST_SKIP() << "Skipping begin it cause segmentation fault with default Validator"; - const std::string schema = R"( + GTEST_SKIP() << "Skipping: causes segmentation fault with default Validator"; + + static const std::string schema = R"( { "properties": { - "text": { - "pattern": "^[\\s\\S]+$", - "type": "string" - } + "text": { + "pattern": "^[\\s\\S]+$", + "type": "string" + } } - } + } )"; - + createFileFromContent("schema.json", schema); rapidjson::Document mySchemaDoc; ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc)); - + Schema mySchema; SchemaParser parser; RapidJsonAdapter mySchemaAdapter(mySchemaDoc); parser.populateSchema(mySchemaAdapter, mySchema); rapidjson::Document myTargetDoc; + std::string payload = "{ \"text\" : \""; - for (int i = 0; i< 100000; ++i) - payload += 'A'; + for (int i = 0; i< 100000; ++i) { + payload += 'A'; + } payload += "\"}"; - + createFileFromContent("payload.json", payload); - + ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc)); - //This test crash (segfault) is validator is not customized with custom RegexpEngine + // This test crash (segfault) is validator is not customized with custom RegexpEngine Validator validator; RapidJsonAdapter myTargetAdapter(myTargetDoc); ASSERT_TRUE(validator.validate(mySchema, myTargetAdapter, nullptr)); @@ -91,32 +90,32 @@ TEST(valijson, valijson_be_robust_against_bad_regular_expression) TEST(valijson, valijson_be_robust_against_catastrophic_backtracking_regular_expression) { - GTEST_SKIP() << "Skipping begin it hangs due to non management of catastrophic backtracking with default Validator"; - - const std::string schema = R"( + GTEST_SKIP() << "Skipping: hangs due to non management of catastrophic backtracking with default Validator"; + + static const std::string schema = R"( { "properties": { - "text": { - "pattern": "((A+)*)+$", - "type": "string" - } + "text": { + "pattern": "((A+)*)+$", + "type": "string" + } } - } + } )"; - + createFileFromContent("schema.json", schema); rapidjson::Document mySchemaDoc; ASSERT_TRUE(valijson::utils::loadDocument("schema.json", mySchemaDoc)); - + Schema mySchema; SchemaParser parser; RapidJsonAdapter mySchemaAdapter(mySchemaDoc); parser.populateSchema(mySchemaAdapter, mySchema); rapidjson::Document myTargetDoc; + std::string payload = "{ \"text\" : \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC\"}"; - createFileFromContent("payload.json", payload); - + ASSERT_TRUE(valijson::utils::loadDocument("payload.json", myTargetDoc)); //This test takes endless time if validator is not customized with custom RegexpEngine