Implements a simple regex matcher (to be used by death tests on Windows).

This commit is contained in:
zhanyong.wan 2009-01-29 01:28:52 +00:00
parent a32fc79c9a
commit c946ae6019
6 changed files with 854 additions and 27 deletions

View File

@ -86,6 +86,57 @@ GTEST_DECLARE_string_(death_test_style);
//
// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
//
// On the regular expressions used in death tests:
//
// On POSIX-compliant systems (*nix), we use the <regex.h> library,
// which uses the POSIX extended regex syntax.
//
// On other platforms (e.g. Windows), we only support a simple regex
// syntax implemented as part of Google Test. This limited
// implementation should be enough most of the time when writing
// death tests; though it lacks many features you can find in PCRE
// or POSIX extended regex syntax. For example, we don't support
// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
// repetition count ("x{5,7}"), among others.
//
// Below is the syntax that we do support. We chose it to be a
// subset of both PCRE and POSIX extended regex, so it's easy to
// learn wherever you come from. In the following: 'A' denotes a
// literal character, period (.), or a single \\ escape sequence;
// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
// natural numbers.
//
// c matches any literal character c
// \\d matches any decimal digit
// \\D matches any character that's not a decimal digit
// \\f matches \f
// \\n matches \n
// \\r matches \r
// \\s matches any ASCII whitespace, including \n
// \\S matches any character that's not a whitespace
// \\t matches \t
// \\v matches \v
// \\w matches any letter, _, or decimal digit
// \\W matches any character that \\w doesn't match
// \\c matches any literal character c, which must be a punctuation
// . matches any single character except \n
// A? matches 0 or 1 occurrences of A
// A* matches 0 or many occurrences of A
// A+ matches 1 or many occurrences of A
// ^ matches the beginning of a string (not that of each line)
// $ matches the end of a string (not that of each line)
// xy matches x followed by y
//
// If you accidentally use PCRE or POSIX extended regex features
// not implemented by us, you will get a run-time failure. In that
// case, please try to rewrite your regular expression within the
// above syntax.
//
// This implementation is *not* meant to be as highly tuned or robust
// as a compiled regex library, but should perform well enough for a
// death test, which already incurs significant overhead by launching
// a child process.
//
// Known caveats:
//
// A "threadsafe" style death test obtains the path to the test

View File

@ -97,6 +97,9 @@
// GTEST_HAS_TYPED_TEST - defined iff typed tests are supported.
// GTEST_HAS_TYPED_TEST_P - defined iff type-parameterized tests are
// supported.
// GTEST_USES_POSIX_RE - defined iff enhanced POSIX regex is used.
// GTEST_USES_SIMPLE_RE - defined iff our own simple regex is used;
// the above two are mutually exclusive.
//
// Macros for basic C++ coding:
// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
@ -187,6 +190,23 @@
#define GTEST_OS_SOLARIS
#endif // _MSC_VER
#if defined(GTEST_OS_LINUX)
// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise. We can #include it here as we already
// included <stdlib.h>, which is guaranteed to define size_t through
// <stddef.h>.
#include <regex.h> // NOLINT
#define GTEST_USES_POSIX_RE 1
#else
// We are not on Linux, so <regex.h> may not be available. Use our
// own simple regex implementation instead.
#define GTEST_USES_SIMPLE_RE 1
#endif // GTEST_OS_LINUX
// Determines whether ::std::string and ::string are available.
#ifndef GTEST_HAS_STD_STRING
@ -352,11 +372,6 @@
// Determines whether to support death tests.
#if GTEST_HAS_STD_STRING && GTEST_HAS_CLONE
#define GTEST_HAS_DEATH_TEST
// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise. We can #include it here as we already
// included <stdlib.h>, which is guaranteed to define size_t through
// <stddef.h>.
#include <regex.h>
#include <vector>
#include <fcntl.h>
#include <sys/mman.h>
@ -375,8 +390,8 @@
// Typed tests need <typeinfo> and variadic macros, which gcc and VC
// 8.0+ support.
#if defined(__GNUC__) || (_MSC_VER >= 1400)
#define GTEST_HAS_TYPED_TEST
#define GTEST_HAS_TYPED_TEST_P
#define GTEST_HAS_TYPED_TEST 1
#define GTEST_HAS_TYPED_TEST_P 1
#endif // defined(__GNUC__) || (_MSC_VER >= 1400)
// Determines whether to support Combine(). This only makes sense when
@ -490,8 +505,6 @@ class scoped_ptr {
GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
};
#ifdef GTEST_HAS_DEATH_TEST
// Defines RE.
// A simple C++ wrapper for <regex.h>. It uses the POSIX Enxtended
@ -549,12 +562,16 @@ class RE {
// String type here, in order to simplify dependencies between the
// files.
const char* pattern_;
bool is_valid_;
#if GTEST_USES_POSIX_RE
regex_t full_regex_; // For FullMatch().
regex_t partial_regex_; // For PartialMatch().
bool is_valid_;
};
#else // GTEST_USES_SIMPLE_RE
const char* full_pattern_; // For FullMatch();
#endif
#endif // GTEST_HAS_DEATH_TEST
GTEST_DISALLOW_COPY_AND_ASSIGN_(RE);
};
// Defines logging utilities:
// GTEST_LOG_() - logs messages at the specified severity level.

View File

@ -215,7 +215,6 @@ bool FilePath::DirectoryExists() const {
// root directory per disk drive.)
bool FilePath::IsRootDirectory() const {
#ifdef GTEST_OS_WINDOWS
const char* const name = pathname_.c_str();
// TODO(wan@google.com): on Windows a network share like
// \\server\share can be a root directory, although it cannot be the
// current directory. Handle this properly.

View File

@ -1266,6 +1266,22 @@ inline UnitTestImpl* GetUnitTestImpl() {
return UnitTest::GetInstance()->impl();
}
// Internal helper functions for implementing the simple regular
// expression matcher.
bool IsInSet(char ch, const char* str);
bool IsDigit(char ch);
bool IsPunct(char ch);
bool IsRepeat(char ch);
bool IsWhiteSpace(char ch);
bool IsWordChar(char ch);
bool IsValidEscape(char ch);
bool AtomMatchesChar(bool escaped, char pattern, char ch);
bool ValidateRegex(const char* regex);
bool MatchRegexAtHead(const char* regex, const char* str);
bool MatchRepetitionAndRegexAtHead(
bool escaped, char ch, char repeat, const char* regex, const char* str);
bool MatchRegexAnywhere(const char* regex, const char* str);
// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
void ParseGoogleTestFlagsOnly(int* argc, char** argv);

View File

@ -39,6 +39,10 @@
#include <regex.h>
#endif // GTEST_HAS_DEATH_TEST
#if GTEST_USES_SIMPLE_RE
#include <string.h>
#endif
#ifdef _WIN32_WCE
#include <windows.h> // For TerminateProcess()
#endif // _WIN32_WCE
@ -47,11 +51,19 @@
#include <gtest/gtest-message.h>
#include <gtest/internal/gtest-string.h>
// Indicates that this translation unit is part of Google Test's
// implementation. It must come before gtest-internal-inl.h is
// included, or there will be a compiler error. This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION
namespace testing {
namespace internal {
#ifdef GTEST_HAS_DEATH_TEST
#if GTEST_USES_POSIX_RE
// Implements RE. Currently only needed for death tests.
@ -101,7 +113,262 @@ void RE::Init(const char* regex) {
delete[] full_pattern;
}
#endif // GTEST_HAS_DEATH_TEST
#elif GTEST_USES_SIMPLE_RE
// Returns true iff ch appears anywhere in str (excluding the
// terminating '\0' character).
bool IsInSet(char ch, const char* str) {
return ch != '\0' && strchr(str, ch) != NULL;
}
// Returns true iff ch belongs to the given classification. Unlike
// similar functions in <ctype.h>, these aren't affected by the
// current locale.
bool IsDigit(char ch) { return '0' <= ch && ch <= '9'; }
bool IsPunct(char ch) {
return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
}
bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
bool IsWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
bool IsWordChar(char ch) {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
('0' <= ch && ch <= '9') || ch == '_';
}
// Returns true iff "\\c" is a supported escape sequence.
bool IsValidEscape(char c) {
return (IsPunct(c) || IsInSet(c, "dDfnrsStvwW"));
}
// Returns true iff the given atom (specified by escaped and pattern)
// matches ch. The result is undefined if the atom is invalid.
bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
if (escaped) { // "\\p" where p is pattern_char.
switch (pattern_char) {
case 'd': return IsDigit(ch);
case 'D': return !IsDigit(ch);
case 'f': return ch == '\f';
case 'n': return ch == '\n';
case 'r': return ch == '\r';
case 's': return IsWhiteSpace(ch);
case 'S': return !IsWhiteSpace(ch);
case 't': return ch == '\t';
case 'v': return ch == '\v';
case 'w': return IsWordChar(ch);
case 'W': return !IsWordChar(ch);
}
return IsPunct(pattern_char) && pattern_char == ch;
}
return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
}
// Helper function used by ValidateRegex() to format error messages.
String FormatRegexSyntaxError(const char* regex, int index) {
return (Message() << "Syntax error at index " << index
<< " in simple regular expression \"" << regex << "\": ").GetString();
}
// Generates non-fatal failures and returns false if regex is invalid;
// otherwise returns true.
bool ValidateRegex(const char* regex) {
if (regex == NULL) {
// TODO(wan@google.com): fix the source file location in the
// assertion failures to match where the regex is used in user
// code.
ADD_FAILURE() << "NULL is not a valid simple regular expression.";
return false;
}
bool is_valid = true;
// True iff ?, *, or + can follow the previous atom.
bool prev_repeatable = false;
for (int i = 0; regex[i]; i++) {
if (regex[i] == '\\') { // An escape sequence
i++;
if (regex[i] == '\0') {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
<< "'\\' cannot appear at the end.";
return false;
}
if (!IsValidEscape(regex[i])) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
<< "invalid escape sequence \"\\" << regex[i] << "\".";
is_valid = false;
}
prev_repeatable = true;
} else { // Not an escape sequence.
const char ch = regex[i];
if (ch == '^' && i > 0) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'^' can only appear at the beginning.";
is_valid = false;
} else if (ch == '$' && regex[i + 1] != '\0') {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'$' can only appear at the end.";
is_valid = false;
} else if (IsInSet(ch, "()[]{}|")) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'" << ch << "' is unsupported.";
is_valid = false;
} else if (IsRepeat(ch) && !prev_repeatable) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'" << ch << "' can only follow a repeatable token.";
is_valid = false;
}
prev_repeatable = !IsInSet(ch, "^$?*+");
}
}
return is_valid;
}
// Matches a repeated regex atom followed by a valid simple regular
// expression. The regex atom is defined as c if escaped is false,
// or \c otherwise. repeat is the repetition meta character (?, *,
// or +). The behavior is undefined if str contains too many
// characters to be indexable by size_t, in which case the test will
// probably time out anyway. We are fine with this limitation as
// std::string has it too.
bool MatchRepetitionAndRegexAtHead(
bool escaped, char c, char repeat, const char* regex,
const char* str) {
const size_t min_count = (repeat == '+') ? 1 : 0;
const size_t max_count = (repeat == '?') ? 1 :
static_cast<size_t>(-1) - 1;
// We cannot call numeric_limits::max() as it conflicts with the
// max() macro on Windows.
for (size_t i = 0; i <= max_count; ++i) {
// We know that the atom matches each of the first i characters in str.
if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
// We have enough matches at the head, and the tail matches too.
// Since we only care about *whether* the pattern matches str
// (as opposed to *how* it matches), there is no need to find a
// greedy match.
return true;
}
if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
return false;
}
return false;
}
// Returns true iff regex matches a prefix of str. regex must be a
// valid simple regular expression and not start with "^", or the
// result is undefined.
bool MatchRegexAtHead(const char* regex, const char* str) {
if (*regex == '\0') // An empty regex matches a prefix of anything.
return true;
// "$" only matches the end of a string. Note that regex being
// valid guarantees that there's nothing after "$" in it.
if (*regex == '$')
return *str == '\0';
// Is the first thing in regex an escape sequence?
const bool escaped = *regex == '\\';
if (escaped)
++regex;
if (IsRepeat(regex[1])) {
// MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
// here's an indirect recursion. It terminates as the regex gets
// shorter in each recursion.
return MatchRepetitionAndRegexAtHead(
escaped, regex[0], regex[1], regex + 2, str);
} else {
// regex isn't empty, isn't "$", and doesn't start with a
// repetition. We match the first atom of regex with the first
// character of str and recurse.
return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
MatchRegexAtHead(regex + 1, str + 1);
}
}
// Returns true iff regex matches any substring of str. regex must be
// a valid simple regular expression, or the result is undefined.
//
// The algorithm is recursive, but the recursion depth doesn't exceed
// the regex length, so we won't need to worry about running out of
// stack space normally. In rare cases the time complexity can be
// exponential with respect to the regex length + the string length,
// but usually it's must faster (often close to linear).
bool MatchRegexAnywhere(const char* regex, const char* str) {
if (regex == NULL || str == NULL)
return false;
if (*regex == '^')
return MatchRegexAtHead(regex + 1, str);
// A successful match can be anywhere in str.
do {
if (MatchRegexAtHead(regex, str))
return true;
} while (*str++ != '\0');
return false;
}
// Implements the RE class.
RE::~RE() {
free(const_cast<char*>(pattern_));
free(const_cast<char*>(full_pattern_));
}
// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
}
// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
}
// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
pattern_ = full_pattern_ = NULL;
if (regex != NULL) {
#ifdef GTEST_OS_WINDOWS
pattern_ = _strdup(regex);
#else
pattern_ = strdup(regex);
#endif
}
is_valid_ = ValidateRegex(regex);
if (!is_valid_) {
// No need to calculate the full pattern when the regex is invalid.
return;
}
const size_t len = strlen(regex);
// Reserves enough bytes to hold the regular expression used for a
// full match: we need space to prepend a '^', append a '$', and
// terminate the string with '\0'.
char* buffer = static_cast<char*>(malloc(len + 3));
full_pattern_ = buffer;
if (*regex != '^')
*buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
// We don't use snprintf or strncpy, as they trigger a warning when
// compiled with VC++ 8.0.
memcpy(buffer, regex, len);
buffer += len;
if (len == 0 || regex[len - 1] != '$')
*buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
*buffer = '\0';
}
#endif // GTEST_USES_POSIX_RE
// Logs a message at the given severity level.
void GTestLog(GTestLogSeverity severity, const char* file,

View File

@ -27,7 +27,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)
// Authors: vladl@google.com (Vlad Losev), wan@google.com (Zhanyong Wan)
//
// This file tests the internal cross-platform support utilities.
@ -35,6 +35,18 @@
#include <gtest/gtest.h>
#include <gtest/gtest-spi.h>
// Indicates that this translation unit is part of Google Test's
// implementation. It must come before gtest-internal-inl.h is
// included, or there will be a compiler error. This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION
namespace testing {
namespace internal {
TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) {
if (false)
GTEST_CHECK_(false) << "This should never be executed; "
@ -87,9 +99,7 @@ TEST(GtestCheckDeathTest, LivesSilentlyOnSuccess) {
#endif // GTEST_HAS_DEATH_TEST
#ifdef GTEST_USES_POSIX_RE
using ::testing::internal::RE;
#if GTEST_USES_POSIX_RE
template <typename Str>
class RETest : public ::testing::Test {};
@ -109,30 +119,30 @@ TYPED_TEST_CASE(RETest, StringTypes);
// Tests RE's implicit constructors.
TYPED_TEST(RETest, ImplicitConstructorWorks) {
const RE empty = TypeParam("");
const RE empty(TypeParam(""));
EXPECT_STREQ("", empty.pattern());
const RE simple = TypeParam("hello");
const RE simple(TypeParam("hello"));
EXPECT_STREQ("hello", simple.pattern());
const RE normal = TypeParam(".*(\\w+)");
const RE normal(TypeParam(".*(\\w+)"));
EXPECT_STREQ(".*(\\w+)", normal.pattern());
}
// Tests that RE's constructors reject invalid regular expressions.
TYPED_TEST(RETest, RejectsInvalidRegex) {
EXPECT_NONFATAL_FAILURE({
const RE invalid = TypeParam("?");
const RE invalid(TypeParam("?"));
}, "\"?\" is not a valid POSIX Extended regular expression.");
}
// Tests RE::FullMatch().
TYPED_TEST(RETest, FullMatchWorks) {
const RE empty = TypeParam("");
const RE empty(TypeParam(""));
EXPECT_TRUE(RE::FullMatch(TypeParam(""), empty));
EXPECT_FALSE(RE::FullMatch(TypeParam("a"), empty));
const RE re = TypeParam("a.*z");
const RE re(TypeParam("a.*z"));
EXPECT_TRUE(RE::FullMatch(TypeParam("az"), re));
EXPECT_TRUE(RE::FullMatch(TypeParam("axyz"), re));
EXPECT_FALSE(RE::FullMatch(TypeParam("baz"), re));
@ -141,11 +151,11 @@ TYPED_TEST(RETest, FullMatchWorks) {
// Tests RE::PartialMatch().
TYPED_TEST(RETest, PartialMatchWorks) {
const RE empty = TypeParam("");
const RE empty(TypeParam(""));
EXPECT_TRUE(RE::PartialMatch(TypeParam(""), empty));
EXPECT_TRUE(RE::PartialMatch(TypeParam("a"), empty));
const RE re = TypeParam("a.*z");
const RE re(TypeParam("a.*z"));
EXPECT_TRUE(RE::PartialMatch(TypeParam("az"), re));
EXPECT_TRUE(RE::PartialMatch(TypeParam("axyz"), re));
EXPECT_TRUE(RE::PartialMatch(TypeParam("baz"), re));
@ -153,4 +163,471 @@ TYPED_TEST(RETest, PartialMatchWorks) {
EXPECT_FALSE(RE::PartialMatch(TypeParam("zza"), re));
}
#elif GTEST_USES_SIMPLE_RE
TEST(IsInSetTest, NulCharIsNotInAnySet) {
EXPECT_FALSE(IsInSet('\0', ""));
EXPECT_FALSE(IsInSet('\0', "\0"));
EXPECT_FALSE(IsInSet('\0', "a"));
}
TEST(IsInSetTest, WorksForNonNulChars) {
EXPECT_FALSE(IsInSet('a', "Ab"));
EXPECT_FALSE(IsInSet('c', ""));
EXPECT_TRUE(IsInSet('b', "bcd"));
EXPECT_TRUE(IsInSet('b', "ab"));
}
TEST(IsDigitTest, IsFalseForNonDigit) {
EXPECT_FALSE(IsDigit('\0'));
EXPECT_FALSE(IsDigit(' '));
EXPECT_FALSE(IsDigit('+'));
EXPECT_FALSE(IsDigit('-'));
EXPECT_FALSE(IsDigit('.'));
EXPECT_FALSE(IsDigit('a'));
}
TEST(IsDigitTest, IsTrueForDigit) {
EXPECT_TRUE(IsDigit('0'));
EXPECT_TRUE(IsDigit('1'));
EXPECT_TRUE(IsDigit('5'));
EXPECT_TRUE(IsDigit('9'));
}
TEST(IsPunctTest, IsFalseForNonPunct) {
EXPECT_FALSE(IsPunct('\0'));
EXPECT_FALSE(IsPunct(' '));
EXPECT_FALSE(IsPunct('\n'));
EXPECT_FALSE(IsPunct('a'));
EXPECT_FALSE(IsPunct('0'));
}
TEST(IsPunctTest, IsTrueForPunct) {
for (const char* p = "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"; *p; p++) {
EXPECT_PRED1(IsPunct, *p);
}
}
TEST(IsRepeatTest, IsFalseForNonRepeatChar) {
EXPECT_FALSE(IsRepeat('\0'));
EXPECT_FALSE(IsRepeat(' '));
EXPECT_FALSE(IsRepeat('a'));
EXPECT_FALSE(IsRepeat('1'));
EXPECT_FALSE(IsRepeat('-'));
}
TEST(IsRepeatTest, IsTrueForRepeatChar) {
EXPECT_TRUE(IsRepeat('?'));
EXPECT_TRUE(IsRepeat('*'));
EXPECT_TRUE(IsRepeat('+'));
}
TEST(IsWhiteSpaceTest, IsFalseForNonWhiteSpace) {
EXPECT_FALSE(IsWhiteSpace('\0'));
EXPECT_FALSE(IsWhiteSpace('a'));
EXPECT_FALSE(IsWhiteSpace('1'));
EXPECT_FALSE(IsWhiteSpace('+'));
EXPECT_FALSE(IsWhiteSpace('_'));
}
TEST(IsWhiteSpaceTest, IsTrueForWhiteSpace) {
EXPECT_TRUE(IsWhiteSpace(' '));
EXPECT_TRUE(IsWhiteSpace('\n'));
EXPECT_TRUE(IsWhiteSpace('\r'));
EXPECT_TRUE(IsWhiteSpace('\t'));
EXPECT_TRUE(IsWhiteSpace('\v'));
EXPECT_TRUE(IsWhiteSpace('\f'));
}
TEST(IsWordCharTest, IsFalseForNonWordChar) {
EXPECT_FALSE(IsWordChar('\0'));
EXPECT_FALSE(IsWordChar('+'));
EXPECT_FALSE(IsWordChar('.'));
EXPECT_FALSE(IsWordChar(' '));
EXPECT_FALSE(IsWordChar('\n'));
}
TEST(IsWordCharTest, IsTrueForLetter) {
EXPECT_TRUE(IsWordChar('a'));
EXPECT_TRUE(IsWordChar('b'));
EXPECT_TRUE(IsWordChar('A'));
EXPECT_TRUE(IsWordChar('Z'));
}
TEST(IsWordCharTest, IsTrueForDigit) {
EXPECT_TRUE(IsWordChar('0'));
EXPECT_TRUE(IsWordChar('1'));
EXPECT_TRUE(IsWordChar('7'));
EXPECT_TRUE(IsWordChar('9'));
}
TEST(IsWordCharTest, IsTrueForUnderscore) {
EXPECT_TRUE(IsWordChar('_'));
}
TEST(IsValidEscapeTest, IsFalseForNonPrintable) {
EXPECT_FALSE(IsValidEscape('\0'));
EXPECT_FALSE(IsValidEscape('\007'));
}
TEST(IsValidEscapeTest, IsFalseForDigit) {
EXPECT_FALSE(IsValidEscape('0'));
EXPECT_FALSE(IsValidEscape('9'));
}
TEST(IsValidEscapeTest, IsFalseForWhiteSpace) {
EXPECT_FALSE(IsValidEscape(' '));
EXPECT_FALSE(IsValidEscape('\n'));
}
TEST(IsValidEscapeTest, IsFalseForSomeLetter) {
EXPECT_FALSE(IsValidEscape('a'));
EXPECT_FALSE(IsValidEscape('Z'));
}
TEST(IsValidEscapeTest, IsTrueForPunct) {
EXPECT_TRUE(IsValidEscape('.'));
EXPECT_TRUE(IsValidEscape('-'));
EXPECT_TRUE(IsValidEscape('^'));
EXPECT_TRUE(IsValidEscape('$'));
EXPECT_TRUE(IsValidEscape('('));
EXPECT_TRUE(IsValidEscape(']'));
EXPECT_TRUE(IsValidEscape('{'));
EXPECT_TRUE(IsValidEscape('|'));
}
TEST(IsValidEscapeTest, IsTrueForSomeLetter) {
EXPECT_TRUE(IsValidEscape('d'));
EXPECT_TRUE(IsValidEscape('D'));
EXPECT_TRUE(IsValidEscape('s'));
EXPECT_TRUE(IsValidEscape('S'));
EXPECT_TRUE(IsValidEscape('w'));
EXPECT_TRUE(IsValidEscape('W'));
}
TEST(AtomMatchesCharTest, EscapedPunct) {
EXPECT_FALSE(AtomMatchesChar(true, '\\', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, '\\', ' '));
EXPECT_FALSE(AtomMatchesChar(true, '_', '.'));
EXPECT_FALSE(AtomMatchesChar(true, '.', 'a'));
EXPECT_TRUE(AtomMatchesChar(true, '\\', '\\'));
EXPECT_TRUE(AtomMatchesChar(true, '_', '_'));
EXPECT_TRUE(AtomMatchesChar(true, '+', '+'));
EXPECT_TRUE(AtomMatchesChar(true, '.', '.'));
}
TEST(AtomMatchesCharTest, Escaped_d) {
EXPECT_FALSE(AtomMatchesChar(true, 'd', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'd', 'a'));
EXPECT_FALSE(AtomMatchesChar(true, 'd', '.'));
EXPECT_TRUE(AtomMatchesChar(true, 'd', '0'));
EXPECT_TRUE(AtomMatchesChar(true, 'd', '9'));
}
TEST(AtomMatchesCharTest, Escaped_D) {
EXPECT_FALSE(AtomMatchesChar(true, 'D', '0'));
EXPECT_FALSE(AtomMatchesChar(true, 'D', '9'));
EXPECT_TRUE(AtomMatchesChar(true, 'D', '\0'));
EXPECT_TRUE(AtomMatchesChar(true, 'D', 'a'));
EXPECT_TRUE(AtomMatchesChar(true, 'D', '-'));
}
TEST(AtomMatchesCharTest, Escaped_s) {
EXPECT_FALSE(AtomMatchesChar(true, 's', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 's', 'a'));
EXPECT_FALSE(AtomMatchesChar(true, 's', '.'));
EXPECT_FALSE(AtomMatchesChar(true, 's', '9'));
EXPECT_TRUE(AtomMatchesChar(true, 's', ' '));
EXPECT_TRUE(AtomMatchesChar(true, 's', '\n'));
EXPECT_TRUE(AtomMatchesChar(true, 's', '\t'));
}
TEST(AtomMatchesCharTest, Escaped_S) {
EXPECT_FALSE(AtomMatchesChar(true, 'S', ' '));
EXPECT_FALSE(AtomMatchesChar(true, 'S', '\r'));
EXPECT_TRUE(AtomMatchesChar(true, 'S', '\0'));
EXPECT_TRUE(AtomMatchesChar(true, 'S', 'a'));
EXPECT_TRUE(AtomMatchesChar(true, 'S', '9'));
}
TEST(AtomMatchesCharTest, Escaped_w) {
EXPECT_FALSE(AtomMatchesChar(true, 'w', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'w', '+'));
EXPECT_FALSE(AtomMatchesChar(true, 'w', ' '));
EXPECT_FALSE(AtomMatchesChar(true, 'w', '\n'));
EXPECT_TRUE(AtomMatchesChar(true, 'w', '0'));
EXPECT_TRUE(AtomMatchesChar(true, 'w', 'b'));
EXPECT_TRUE(AtomMatchesChar(true, 'w', 'C'));
EXPECT_TRUE(AtomMatchesChar(true, 'w', '_'));
}
TEST(AtomMatchesCharTest, Escaped_W) {
EXPECT_FALSE(AtomMatchesChar(true, 'W', 'A'));
EXPECT_FALSE(AtomMatchesChar(true, 'W', 'b'));
EXPECT_FALSE(AtomMatchesChar(true, 'W', '9'));
EXPECT_FALSE(AtomMatchesChar(true, 'W', '_'));
EXPECT_TRUE(AtomMatchesChar(true, 'W', '\0'));
EXPECT_TRUE(AtomMatchesChar(true, 'W', '*'));
EXPECT_TRUE(AtomMatchesChar(true, 'W', '\n'));
}
TEST(AtomMatchesCharTest, EscapedWhiteSpace) {
EXPECT_FALSE(AtomMatchesChar(true, 'f', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'f', '\n'));
EXPECT_FALSE(AtomMatchesChar(true, 'n', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'n', '\r'));
EXPECT_FALSE(AtomMatchesChar(true, 'r', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'r', 'a'));
EXPECT_FALSE(AtomMatchesChar(true, 't', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 't', 't'));
EXPECT_FALSE(AtomMatchesChar(true, 'v', '\0'));
EXPECT_FALSE(AtomMatchesChar(true, 'v', '\f'));
EXPECT_TRUE(AtomMatchesChar(true, 'f', '\f'));
EXPECT_TRUE(AtomMatchesChar(true, 'n', '\n'));
EXPECT_TRUE(AtomMatchesChar(true, 'r', '\r'));
EXPECT_TRUE(AtomMatchesChar(true, 't', '\t'));
EXPECT_TRUE(AtomMatchesChar(true, 'v', '\v'));
}
TEST(AtomMatchesCharTest, UnescapedDot) {
EXPECT_FALSE(AtomMatchesChar(false, '.', '\n'));
EXPECT_TRUE(AtomMatchesChar(false, '.', '\0'));
EXPECT_TRUE(AtomMatchesChar(false, '.', '.'));
EXPECT_TRUE(AtomMatchesChar(false, '.', 'a'));
EXPECT_TRUE(AtomMatchesChar(false, '.', ' '));
}
TEST(AtomMatchesCharTest, UnescapedChar) {
EXPECT_FALSE(AtomMatchesChar(false, 'a', '\0'));
EXPECT_FALSE(AtomMatchesChar(false, 'a', 'b'));
EXPECT_FALSE(AtomMatchesChar(false, '$', 'a'));
EXPECT_TRUE(AtomMatchesChar(false, '$', '$'));
EXPECT_TRUE(AtomMatchesChar(false, '5', '5'));
EXPECT_TRUE(AtomMatchesChar(false, 'Z', 'Z'));
}
TEST(ValidateRegexTest, GeneratesFailureAndReturnsFalseForInvalid) {
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(NULL)),
"NULL is not a valid simple regular expression");
EXPECT_NONFATAL_FAILURE(
ASSERT_FALSE(ValidateRegex("a\\")),
"Syntax error at index 1 in simple regular expression \"a\\\": ");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a\\")),
"'\\' cannot appear at the end");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\n\\")),
"'\\' cannot appear at the end");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\s\\hb")),
"invalid escape sequence \"\\h\"");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^^")),
"'^' can only appear at the beginning");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(".*^b")),
"'^' can only appear at the beginning");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("$$")),
"'$' can only appear at the end");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^$a")),
"'$' can only appear at the end");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a(b")),
"'(' is unsupported");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("ab)")),
"')' is unsupported");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("[ab")),
"'[' is unsupported");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a{2")),
"'{' is unsupported");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("?")),
"'?' can only follow a repeatable token");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^*")),
"'*' can only follow a repeatable token");
EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("5*+")),
"'+' can only follow a repeatable token");
}
TEST(ValidateRegexTest, ReturnsTrueForValid) {
EXPECT_TRUE(ValidateRegex(""));
EXPECT_TRUE(ValidateRegex("a"));
EXPECT_TRUE(ValidateRegex(".*"));
EXPECT_TRUE(ValidateRegex("^a_+"));
EXPECT_TRUE(ValidateRegex("^a\\t\\&?"));
EXPECT_TRUE(ValidateRegex("09*$"));
EXPECT_TRUE(ValidateRegex("^Z$"));
EXPECT_TRUE(ValidateRegex("a\\^Z\\$\\(\\)\\|\\[\\]\\{\\}"));
}
TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrOne) {
EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "a", "ba"));
// Repeating more than once.
EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "aab"));
// Repeating zero times.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ba"));
// Repeating once.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ab"));
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '#', '?', ".", "##"));
}
TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrMany) {
EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '*', "a$", "baab"));
// Repeating zero times.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "bc"));
// Repeating once.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "abc"));
// Repeating more than once.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '*', "-", "ab_1-g"));
}
TEST(MatchRepetitionAndRegexAtHeadTest, WorksForOneOrMany) {
EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "a$", "baab"));
// Repeating zero times.
EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "bc"));
// Repeating once.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "abc"));
// Repeating more than once.
EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '+', "-", "ab_1-g"));
}
TEST(MatchRegexAtHeadTest, ReturnsTrueForEmptyRegex) {
EXPECT_TRUE(MatchRegexAtHead("", ""));
EXPECT_TRUE(MatchRegexAtHead("", "ab"));
}
TEST(MatchRegexAtHeadTest, WorksWhenDollarIsInRegex) {
EXPECT_FALSE(MatchRegexAtHead("$", "a"));
EXPECT_TRUE(MatchRegexAtHead("$", ""));
EXPECT_TRUE(MatchRegexAtHead("a$", "a"));
}
TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithEscapeSequence) {
EXPECT_FALSE(MatchRegexAtHead("\\w", "+"));
EXPECT_FALSE(MatchRegexAtHead("\\W", "ab"));
EXPECT_TRUE(MatchRegexAtHead("\\sa", "\nab"));
EXPECT_TRUE(MatchRegexAtHead("\\d", "1a"));
}
TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetition) {
EXPECT_FALSE(MatchRegexAtHead(".+a", "abc"));
EXPECT_FALSE(MatchRegexAtHead("a?b", "aab"));
EXPECT_TRUE(MatchRegexAtHead(".*a", "bc12-ab"));
EXPECT_TRUE(MatchRegexAtHead("a?b", "b"));
EXPECT_TRUE(MatchRegexAtHead("a?b", "ab"));
}
TEST(MatchRegexAtHeadTest,
WorksWhenRegexStartsWithRepetionOfEscapeSequence) {
EXPECT_FALSE(MatchRegexAtHead("\\.+a", "abc"));
EXPECT_FALSE(MatchRegexAtHead("\\s?b", " b"));
EXPECT_TRUE(MatchRegexAtHead("\\(*a", "((((ab"));
EXPECT_TRUE(MatchRegexAtHead("\\^?b", "^b"));
EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "b"));
EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "\\b"));
}
TEST(MatchRegexAtHeadTest, MatchesSequentially) {
EXPECT_FALSE(MatchRegexAtHead("ab.*c", "acabc"));
EXPECT_TRUE(MatchRegexAtHead("ab.*c", "ab-fsc"));
}
TEST(MatchRegexAnywhereTest, ReturnsFalseWhenStringIsNull) {
EXPECT_FALSE(MatchRegexAnywhere("", NULL));
}
TEST(MatchRegexAnywhereTest, WorksWhenRegexStartsWithCaret) {
EXPECT_FALSE(MatchRegexAnywhere("^a", "ba"));
EXPECT_FALSE(MatchRegexAnywhere("^$", "a"));
EXPECT_TRUE(MatchRegexAnywhere("^a", "ab"));
EXPECT_TRUE(MatchRegexAnywhere("^", "ab"));
EXPECT_TRUE(MatchRegexAnywhere("^$", ""));
}
TEST(MatchRegexAnywhereTest, ReturnsFalseWhenNoMatch) {
EXPECT_FALSE(MatchRegexAnywhere("a", "bcde123"));
EXPECT_FALSE(MatchRegexAnywhere("a.+a", "--aa88888888"));
}
TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingPrefix) {
EXPECT_TRUE(MatchRegexAnywhere("\\w+", "ab1_ - 5"));
EXPECT_TRUE(MatchRegexAnywhere(".*=", "="));
EXPECT_TRUE(MatchRegexAnywhere("x.*ab?.*bc", "xaaabc"));
}
TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingNonPrefix) {
EXPECT_TRUE(MatchRegexAnywhere("\\w+", "$$$ ab1_ - 5"));
EXPECT_TRUE(MatchRegexAnywhere("\\.+=", "= ...="));
}
// Tests RE's implicit constructors.
TEST(RETest, ImplicitConstructorWorks) {
const RE empty = "";
EXPECT_STREQ("", empty.pattern());
const RE simple = "hello";
EXPECT_STREQ("hello", simple.pattern());
}
// Tests that RE's constructors reject invalid regular expressions.
TEST(RETest, RejectsInvalidRegex) {
EXPECT_NONFATAL_FAILURE({
const RE normal = NULL;
}, "NULL is not a valid simple regular expression");
EXPECT_NONFATAL_FAILURE({
const RE normal = ".*(\\w+";
}, "'(' is unsupported");
EXPECT_NONFATAL_FAILURE({
const RE invalid = "^?";
}, "'?' can only follow a repeatable token");
}
// Tests RE::FullMatch().
TEST(RETest, FullMatchWorks) {
const RE empty("");
EXPECT_TRUE(RE::FullMatch("", empty));
EXPECT_FALSE(RE::FullMatch("a", empty));
const RE re1 = "a";
EXPECT_TRUE(RE::FullMatch("a", re1));
const RE re = "a.*z";
EXPECT_TRUE(RE::FullMatch("az", re));
EXPECT_TRUE(RE::FullMatch("axyz", re));
EXPECT_FALSE(RE::FullMatch("baz", re));
EXPECT_FALSE(RE::FullMatch("azy", re));
}
// Tests RE::PartialMatch().
TEST(RETest, PartialMatchWorks) {
const RE empty = "";
EXPECT_TRUE(RE::PartialMatch("", empty));
EXPECT_TRUE(RE::PartialMatch("a", empty));
const RE re = "a.*z";
EXPECT_TRUE(RE::PartialMatch("az", re));
EXPECT_TRUE(RE::PartialMatch("axyz", re));
EXPECT_TRUE(RE::PartialMatch("baz", re));
EXPECT_TRUE(RE::PartialMatch("azy", re));
EXPECT_FALSE(RE::PartialMatch("zza", re));
}
#endif // GTEST_USES_POSIX_RE
} // namespace internal
} // namespace testing