DateTimeParser Validation and Performance Improvements (#4593)

This commit is contained in:
Andrew Auclair 2024-06-29 16:13:51 -04:00 committed by GitHub
parent 02b59b4d25
commit a82b766628
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 210 additions and 83 deletions

View File

@ -128,9 +128,6 @@ public:
static bool isValid(const std::string& dateTime);
/// Returns true if dateTime validates against at least one supported format.
typedef std::unordered_set<const std::string*> RegexList;
static RegexList REGEX_LIST;
private:
typedef std::unordered_set<std::string> Formatlist;
static Formatlist FORMAT_LIST;

View File

@ -129,19 +129,6 @@ const std::string DateTimeFormat::MONTH_NAMES[] =
};
DateTimeFormat::RegexList DateTimeFormat::REGEX_LIST =
{
&DateTimeFormat::ISO8601_REGEX,
&DateTimeFormat::RFC822_REGEX,
&DateTimeFormat::RFC1123_REGEX,
&DateTimeFormat::HTTP_REGEX,
&DateTimeFormat::RFC850_REGEX,
&DateTimeFormat::RFC1036_REGEX,
&DateTimeFormat::ASCTIME_REGEX,
&DateTimeFormat::SORTABLE_REGEX
};
bool DateTimeFormat::hasFormat(const std::string& fmt)
{
return FORMAT_LIST.find(fmt) != FORMAT_LIST.end();
@ -150,11 +137,22 @@ bool DateTimeFormat::hasFormat(const std::string& fmt)
bool DateTimeFormat::isValid(const std::string& dateTime)
{
for (const auto& f : REGEX_LIST)
{
if (RegularExpression(*f).match(dateTime)) return true;
}
return false;
static const RegularExpression regexList[] = {
RegularExpression(DateTimeFormat::ISO8601_REGEX),
RegularExpression(DateTimeFormat::RFC822_REGEX),
RegularExpression(DateTimeFormat::RFC1123_REGEX),
RegularExpression(DateTimeFormat::HTTP_REGEX),
RegularExpression(DateTimeFormat::RFC850_REGEX),
RegularExpression(DateTimeFormat::RFC1036_REGEX),
RegularExpression(DateTimeFormat::ASCTIME_REGEX),
RegularExpression(DateTimeFormat::SORTABLE_REGEX)
};
for (const auto& f : regexList)
{
if (f.match(dateTime)) return true;
}
return false;
}

View File

@ -18,42 +18,140 @@
#include "Poco/Exception.h"
#include "Poco/Ascii.h"
#include "Poco/String.h"
#include <iostream>
namespace {
using ParseIter = std::string::const_iterator;
[[nodiscard]] ParseIter skipNonDigits(ParseIter it, ParseIter end)
{
while (it != end && !Poco::Ascii::isDigit(*it))
{
++it;
}
return it;
}
[[nodiscard]] ParseIter skipDigits(ParseIter it, ParseIter end)
{
while (it != end && Poco::Ascii::isDigit(*it))
{
++it;
}
return it;
}
int parseNumberN(const std::string& dtStr, ParseIter& it, ParseIter end, int n)
{
ParseIter numStart = end;
int i = 0;
for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i)
{
if (numStart == end)
{
numStart = it;
}
}
if (numStart == end)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse");
}
std::string number(numStart, it);
try
{
return std::stoi(number);
}
catch(const std::exception& e)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}
}
}
namespace Poco {
#define SKIP_JUNK() \
while (it != end && !Ascii::isDigit(*it)) ++it
#define SKIP_DIGITS() \
while (it != end && Ascii::isDigit(*it)) ++it
#define PARSE_NUMBER(var) \
while (it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0')
#define PARSE_NUMBER_N(var, n) \
{ int i = 0; while (i++ < n && it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0'); }
#define PARSE_FRACTIONAL_N(var, n) \
{ int i = 0; while (i < n && it != end && Ascii::isDigit(*it)) { var = var*10 + ((*it++) - '0'); i++; } while (i++ < n) var *= 10; }
inline std::string cleanedInputString(const std::string& str)
{
return Poco::trim(str);
}
void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential)
{
const auto str = cleanedInputString(dtStr);
const auto str = Poco::trim(dtStr);
if (fmt.empty() || str.empty() || (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str)))
throw SyntaxException("Invalid DateTimeString:" + dtStr);
if (fmt.empty() || str.empty())
{
throw SyntaxException("Invalid DateTimeString: " + dtStr);
}
else if (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str))
{
throw SyntaxException("Invalid DateTimeString: " + dtStr);
}
const auto parse_number = [&dtStr](ParseIter& it, ParseIter end)
{
ParseIter numStart = end;
for (; it != end && Poco::Ascii::isDigit(*it); ++it)
{
if (numStart == end)
{
numStart = it;
}
}
if (numStart == end)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse");
}
std::string number(numStart, it);
try
{
return std::stoi(number);
}
catch(const std::exception& e)
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}
};
const auto parseFractionalN = [dtStr](ParseIter& it, ParseIter end, int n)
{
ParseIter numStart = end;
int i = 0;
for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i)
{
if (numStart == end)
{
numStart = it;
}
}
if (numStart == end)
{
return 0;
}
std::string number(numStart, it);
int result = 0;
try
{
result = std::stoi(number);
}
catch(const std::exception& e)
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}
while (i++ < n) result *= 10;
return result;
};
int year = 0;
int month = 0;
@ -81,8 +179,8 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat
{
switch (*itf)
{
case 'w':
case 'W':
case 'w': // Weekday, abbreviated
case 'W': // Weekday
while (it != end && Ascii::isSpace(*it)) ++it;
while (it != end && Ascii::isAlpha(*it)) ++it;
break;
@ -94,32 +192,33 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat
case 'd':
case 'e':
case 'f':
SKIP_JUNK();
PARSE_NUMBER_N(day, 2);
it = skipNonDigits(it, end);
day = parseNumberN(dtStr, it, end, 2);
dayParsed = true;
break;
case 'm':
case 'n':
case 'o':
SKIP_JUNK();
PARSE_NUMBER_N(month, 2);
it = skipNonDigits(it, end);
month = parseNumberN(dtStr, it, end, 2);
monthParsed = true;
break;
case 'y':
SKIP_JUNK();
PARSE_NUMBER_N(year, 2);
it = skipNonDigits(it, end);
year = parseNumberN(dtStr, it, end, 2);
if (year >= 69)
year += 1900;
else
year += 2000;
break;
case 'Y':
SKIP_JUNK();
PARSE_NUMBER_N(year, 4);
it = skipNonDigits(it, end);
year = parseNumberN(dtStr, it, end, 4);
break;
case 'r':
SKIP_JUNK();
PARSE_NUMBER(year);
it = skipNonDigits(it, end);
year = parse_number(it, end);
if (year < 1000)
{
if (year >= 69)
@ -130,46 +229,53 @@ void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, Dat
break;
case 'H':
case 'h':
SKIP_JUNK();
PARSE_NUMBER_N(hour, 2);
it = skipNonDigits(it, end);
hour = parseNumberN(dtStr, it, end, 2);
break;
case 'a':
case 'A':
hour = parseAMPM(it, end, hour);
break;
case 'M':
SKIP_JUNK();
PARSE_NUMBER_N(minute, 2);
it = skipNonDigits(it, end);
minute = parseNumberN(dtStr, it, end, 2);
break;
case 'S':
SKIP_JUNK();
PARSE_NUMBER_N(second, 2);
it = skipNonDigits(it, end);
second = parseNumberN(dtStr, it, end, 2);
break;
case 's':
SKIP_JUNK();
PARSE_NUMBER_N(second, 2);
it = skipNonDigits(it, end);
second = parseNumberN(dtStr, it, end, 2);
if (it != end && (*it == '.' || *it == ','))
{
++it;
PARSE_FRACTIONAL_N(millis, 3);
PARSE_FRACTIONAL_N(micros, 3);
SKIP_DIGITS();
if (it != end && !Ascii::isDigit(*it))
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", missing millisecond");
}
millis = parseFractionalN(it, end, 3);
micros = parseFractionalN(it, end, 3);
it = skipDigits(it, end);
}
break;
case 'i':
SKIP_JUNK();
PARSE_NUMBER_N(millis, 3);
it = skipNonDigits(it, end);
millis = parseNumberN(dtStr, it, end, 3);
break;
case 'c':
SKIP_JUNK();
PARSE_NUMBER_N(millis, 1);
it = skipNonDigits(it, end);
millis = parseNumberN(dtStr, it, end, 1);
millis *= 100;
break;
case 'F':
SKIP_JUNK();
PARSE_FRACTIONAL_N(millis, 3);
PARSE_FRACTIONAL_N(micros, 3);
SKIP_DIGITS();
it = skipNonDigits(it, end);
millis = parseNumberN(dtStr, it, end, 3);
micros = parseNumberN(dtStr, it, end, 3);
it = skipDigits(it, end);
break;
case 'z':
case 'Z':
@ -233,7 +339,7 @@ DateTime DateTimeParser::parse(const std::string& str, int& timeZoneDifferential
bool DateTimeParser::tryParse(const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential)
{
const auto str = cleanedInputString(dtStr);
const auto str = Poco::trim(dtStr);
if (str.length() < 4) return false;
@ -338,12 +444,28 @@ int DateTimeParser::parseTZD(std::string::const_iterator& it, const std::string:
int sign = *it == '+' ? 1 : -1;
++it;
int hours = 0;
PARSE_NUMBER_N(hours, 2);
try
{
hours = parseNumberN("", it, end, 2);
}
catch(const SyntaxException& e)
{
throw SyntaxException("Timezone invalid number: hours");
}
if (hours < 0 || hours > 23)
throw SyntaxException("Timezone difference hours out of range");
if (it != end && *it == ':') ++it;
int minutes = 0;
PARSE_NUMBER_N(minutes, 2);
try
{
minutes = parseNumberN("", it, end, 2);
}
catch(const SyntaxException& e)
{
throw SyntaxException("Timezone invalid number: minutes");
}
if (minutes < 0 || minutes > 59)
throw SyntaxException("Timezone difference minutes out of range");
tzd += sign*(hours*3600 + minutes*60);

View File

@ -177,6 +177,7 @@ void DateTimeParserTest::testISO8601Frac()
assertTrue (dt.microsecond() == 0);
assertTrue (tzd == 0);
testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.1J", tzd);
testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.Z", tzd);
dt = DateTimeParser::parse(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.123+01:00", tzd);
assertTrue (dt.year() == 2005);
@ -616,6 +617,15 @@ void DateTimeParserTest::testCustom()
catch (SyntaxException&)
{
}
// bad year (not a number)
testBad("%y", "YY", tzd);
// bad year (number too big)
testBad("%r", "123456789101112131415", tzd);
// check that an invalid millisecond is detected with a custom format
testBad("T%H:%M:%s %z", "T12:30:00.Z", tzd);
}