From a82b766628f969312177cf219e55fe4969fe2ce0 Mon Sep 17 00:00:00 2001 From: Andrew Auclair Date: Sat, 29 Jun 2024 16:13:51 -0400 Subject: [PATCH] DateTimeParser Validation and Performance Improvements (#4593) --- Foundation/include/Poco/DateTimeFormat.h | 3 - Foundation/src/DateTimeFormat.cpp | 34 ++- Foundation/src/DateTimeParser.cpp | 246 +++++++++++++----- .../testsuite/src/DateTimeParserTest.cpp | 10 + 4 files changed, 210 insertions(+), 83 deletions(-) diff --git a/Foundation/include/Poco/DateTimeFormat.h b/Foundation/include/Poco/DateTimeFormat.h index 291bf18b2..069eebf11 100644 --- a/Foundation/include/Poco/DateTimeFormat.h +++ b/Foundation/include/Poco/DateTimeFormat.h @@ -128,9 +128,6 @@ public: static bool isValid(const std::string& dateTime); /// Returns true if dateTime validates against at least one supported format. - typedef std::unordered_set RegexList; - static RegexList REGEX_LIST; - private: typedef std::unordered_set Formatlist; static Formatlist FORMAT_LIST; diff --git a/Foundation/src/DateTimeFormat.cpp b/Foundation/src/DateTimeFormat.cpp index 27057143a..0e296323f 100644 --- a/Foundation/src/DateTimeFormat.cpp +++ b/Foundation/src/DateTimeFormat.cpp @@ -129,19 +129,6 @@ const std::string DateTimeFormat::MONTH_NAMES[] = }; -DateTimeFormat::RegexList DateTimeFormat::REGEX_LIST = -{ - &DateTimeFormat::ISO8601_REGEX, - &DateTimeFormat::RFC822_REGEX, - &DateTimeFormat::RFC1123_REGEX, - &DateTimeFormat::HTTP_REGEX, - &DateTimeFormat::RFC850_REGEX, - &DateTimeFormat::RFC1036_REGEX, - &DateTimeFormat::ASCTIME_REGEX, - &DateTimeFormat::SORTABLE_REGEX -}; - - bool DateTimeFormat::hasFormat(const std::string& fmt) { return FORMAT_LIST.find(fmt) != FORMAT_LIST.end(); @@ -150,11 +137,22 @@ bool DateTimeFormat::hasFormat(const std::string& fmt) bool DateTimeFormat::isValid(const std::string& dateTime) { - for (const auto& f : REGEX_LIST) - { - if (RegularExpression(*f).match(dateTime)) return true; - } - return false; + static const RegularExpression regexList[] = { + RegularExpression(DateTimeFormat::ISO8601_REGEX), + RegularExpression(DateTimeFormat::RFC822_REGEX), + RegularExpression(DateTimeFormat::RFC1123_REGEX), + RegularExpression(DateTimeFormat::HTTP_REGEX), + RegularExpression(DateTimeFormat::RFC850_REGEX), + RegularExpression(DateTimeFormat::RFC1036_REGEX), + RegularExpression(DateTimeFormat::ASCTIME_REGEX), + RegularExpression(DateTimeFormat::SORTABLE_REGEX) + }; + + for (const auto& f : regexList) + { + if (f.match(dateTime)) return true; + } + return false; } diff --git a/Foundation/src/DateTimeParser.cpp b/Foundation/src/DateTimeParser.cpp index 9eb85f8c2..25afa54b4 100644 --- a/Foundation/src/DateTimeParser.cpp +++ b/Foundation/src/DateTimeParser.cpp @@ -18,42 +18,140 @@ #include "Poco/Exception.h" #include "Poco/Ascii.h" #include "Poco/String.h" +#include +namespace { + using ParseIter = std::string::const_iterator; + + [[nodiscard]] ParseIter skipNonDigits(ParseIter it, ParseIter end) + { + while (it != end && !Poco::Ascii::isDigit(*it)) + { + ++it; + } + return it; + } + + + [[nodiscard]] ParseIter skipDigits(ParseIter it, ParseIter end) + { + while (it != end && Poco::Ascii::isDigit(*it)) + { + ++it; + } + return it; + } + + + int parseNumberN(const std::string& dtStr, ParseIter& it, ParseIter end, int n) + { + ParseIter numStart = end; + int i = 0; + + for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i) + { + if (numStart == end) + { + numStart = it; + } + } + + if (numStart == end) + { + throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse"); + } + + std::string number(numStart, it); + try + { + return std::stoi(number); + } + catch(const std::exception& e) + { + throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number); + } + } +} namespace Poco { -#define SKIP_JUNK() \ - while (it != end && !Ascii::isDigit(*it)) ++it - - -#define SKIP_DIGITS() \ - while (it != end && Ascii::isDigit(*it)) ++it - - -#define PARSE_NUMBER(var) \ - while (it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0') - - -#define PARSE_NUMBER_N(var, n) \ - { int i = 0; while (i++ < n && it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0'); } - - -#define PARSE_FRACTIONAL_N(var, n) \ - { int i = 0; while (i < n && it != end && Ascii::isDigit(*it)) { var = var*10 + ((*it++) - '0'); i++; } while (i++ < n) var *= 10; } - - -inline std::string cleanedInputString(const std::string& str) -{ - return Poco::trim(str); -} - void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential) { - const auto str = cleanedInputString(dtStr); + const auto str = Poco::trim(dtStr); - if (fmt.empty() || str.empty() || (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str))) - throw SyntaxException("Invalid DateTimeString:" + dtStr); + if (fmt.empty() || str.empty()) + { + throw SyntaxException("Invalid DateTimeString: " + dtStr); + } + else if (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str)) + { + throw SyntaxException("Invalid DateTimeString: " + dtStr); + } + + const auto parse_number = [&dtStr](ParseIter& it, ParseIter end) + { + ParseIter numStart = end; + + for (; it != end && Poco::Ascii::isDigit(*it); ++it) + { + if (numStart == end) + { + numStart = it; + } + } + + if (numStart == end) + { + throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse"); + } + + std::string number(numStart, it); + try + { + return std::stoi(number); + } + catch(const std::exception& e) + { + throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number); + } + }; + + + + const auto parseFractionalN = [dtStr](ParseIter& it, ParseIter end, int n) + { + ParseIter numStart = end; + int i = 0; + + for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i) + { + if (numStart == end) + { + numStart = it; + } + } + + if (numStart == end) + { + return 0; + } + + std::string number(numStart, it); + int result = 0; + try + { + result = std::stoi(number); + } + catch(const std::exception& e) + { + throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number); + } + + while (i++ < n) result *= 10; + + return result; + }; int year = 0; int month = 0; @@ -81,8 +179,8 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat { switch (*itf) { - case 'w': - case 'W': + case 'w': // Weekday, abbreviated + case 'W': // Weekday while (it != end && Ascii::isSpace(*it)) ++it; while (it != end && Ascii::isAlpha(*it)) ++it; break; @@ -94,32 +192,33 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat case 'd': case 'e': case 'f': - SKIP_JUNK(); - PARSE_NUMBER_N(day, 2); + it = skipNonDigits(it, end); + day = parseNumberN(dtStr, it, end, 2); dayParsed = true; break; case 'm': case 'n': case 'o': - SKIP_JUNK(); - PARSE_NUMBER_N(month, 2); + it = skipNonDigits(it, end); + month = parseNumberN(dtStr, it, end, 2); monthParsed = true; break; case 'y': - SKIP_JUNK(); - PARSE_NUMBER_N(year, 2); + it = skipNonDigits(it, end); + year = parseNumberN(dtStr, it, end, 2); if (year >= 69) year += 1900; else year += 2000; break; case 'Y': - SKIP_JUNK(); - PARSE_NUMBER_N(year, 4); + it = skipNonDigits(it, end); + year = parseNumberN(dtStr, it, end, 4); break; case 'r': - SKIP_JUNK(); - PARSE_NUMBER(year); + it = skipNonDigits(it, end); + year = parse_number(it, end); + if (year < 1000) { if (year >= 69) @@ -130,46 +229,53 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat break; case 'H': case 'h': - SKIP_JUNK(); - PARSE_NUMBER_N(hour, 2); + it = skipNonDigits(it, end); + hour = parseNumberN(dtStr, it, end, 2); break; case 'a': case 'A': hour = parseAMPM(it, end, hour); break; case 'M': - SKIP_JUNK(); - PARSE_NUMBER_N(minute, 2); + it = skipNonDigits(it, end); + minute = parseNumberN(dtStr, it, end, 2); break; case 'S': - SKIP_JUNK(); - PARSE_NUMBER_N(second, 2); + it = skipNonDigits(it, end); + second = parseNumberN(dtStr, it, end, 2); break; case 's': - SKIP_JUNK(); - PARSE_NUMBER_N(second, 2); + it = skipNonDigits(it, end); + second = parseNumberN(dtStr, it, end, 2); + if (it != end && (*it == '.' || *it == ',')) { ++it; - PARSE_FRACTIONAL_N(millis, 3); - PARSE_FRACTIONAL_N(micros, 3); - SKIP_DIGITS(); + + if (it != end && !Ascii::isDigit(*it)) + { + throw SyntaxException("Invalid DateTimeString: " + dtStr + ", missing millisecond"); + } + + millis = parseFractionalN(it, end, 3); + micros = parseFractionalN(it, end, 3); + it = skipDigits(it, end); } break; case 'i': - SKIP_JUNK(); - PARSE_NUMBER_N(millis, 3); + it = skipNonDigits(it, end); + millis = parseNumberN(dtStr, it, end, 3); break; case 'c': - SKIP_JUNK(); - PARSE_NUMBER_N(millis, 1); + it = skipNonDigits(it, end); + millis = parseNumberN(dtStr, it, end, 1); millis *= 100; break; case 'F': - SKIP_JUNK(); - PARSE_FRACTIONAL_N(millis, 3); - PARSE_FRACTIONAL_N(micros, 3); - SKIP_DIGITS(); + it = skipNonDigits(it, end); + millis = parseNumberN(dtStr, it, end, 3); + micros = parseNumberN(dtStr, it, end, 3); + it = skipDigits(it, end); break; case 'z': case 'Z': @@ -233,7 +339,7 @@ DateTime DateTimeParser::parse(const std::string& str, int& timeZoneDifferential bool DateTimeParser::tryParse(const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential) { - const auto str = cleanedInputString(dtStr); + const auto str = Poco::trim(dtStr); if (str.length() < 4) return false; @@ -338,12 +444,28 @@ int DateTimeParser::parseTZD(std::string::const_iterator& it, const std::string: int sign = *it == '+' ? 1 : -1; ++it; int hours = 0; - PARSE_NUMBER_N(hours, 2); + try + { + hours = parseNumberN("", it, end, 2); + } + catch(const SyntaxException& e) + { + throw SyntaxException("Timezone invalid number: hours"); + } + if (hours < 0 || hours > 23) throw SyntaxException("Timezone difference hours out of range"); if (it != end && *it == ':') ++it; int minutes = 0; - PARSE_NUMBER_N(minutes, 2); + try + { + minutes = parseNumberN("", it, end, 2); + } + catch(const SyntaxException& e) + { + throw SyntaxException("Timezone invalid number: minutes"); + } + if (minutes < 0 || minutes > 59) throw SyntaxException("Timezone difference minutes out of range"); tzd += sign*(hours*3600 + minutes*60); diff --git a/Foundation/testsuite/src/DateTimeParserTest.cpp b/Foundation/testsuite/src/DateTimeParserTest.cpp index 8bae2bd74..e0afc67fd 100644 --- a/Foundation/testsuite/src/DateTimeParserTest.cpp +++ b/Foundation/testsuite/src/DateTimeParserTest.cpp @@ -177,6 +177,7 @@ void DateTimeParserTest::testISO8601Frac() assertTrue (dt.microsecond() == 0); assertTrue (tzd == 0); testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.1J", tzd); + testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.Z", tzd); dt = DateTimeParser::parse(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.123+01:00", tzd); assertTrue (dt.year() == 2005); @@ -616,6 +617,15 @@ void DateTimeParserTest::testCustom() catch (SyntaxException&) { } + + // bad year (not a number) + testBad("%y", "YY", tzd); + + // bad year (number too big) + testBad("%r", "123456789101112131415", tzd); + + // check that an invalid millisecond is detected with a custom format + testBad("T%H:%M:%s %z", "T12:30:00.Z", tzd); }