Implements a simple regex matcher (to be used by death tests on Windows).

2009-01-29 01:28:52 +00:00 · 2009-01-29 01:28:52 +00:00 · c946ae6019
commit c946ae6019
parent a32fc79c9a
6 changed files with 854 additions and 27 deletions
--- a/include/gtest/gtest-death-test.h
+++ b/include/gtest/gtest-death-test.h
@ -86,6 +86,57 @@ GTEST_DECLARE_string_(death_test_style);
 //
 //   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
 //
+// On the regular expressions used in death tests:
+//
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
+//
 // Known caveats:
 //
 //   A "threadsafe" style death test obtains the path to the test
--- a/include/gtest/internal/gtest-port.h
+++ b/include/gtest/internal/gtest-port.h
@ -97,6 +97,9 @@
 //   GTEST_HAS_TYPED_TEST   - defined iff typed tests are supported.
 //   GTEST_HAS_TYPED_TEST_P - defined iff type-parameterized tests are
 //                            supported.
+//   GTEST_USES_POSIX_RE    - defined iff enhanced POSIX regex is used.
+//   GTEST_USES_SIMPLE_RE   - defined iff our own simple regex is used;
+//                            the above two are mutually exclusive.
 //
 // Macros for basic C++ coding:
 //   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
@ -187,6 +190,23 @@
 #define GTEST_OS_SOLARIS
 #endif  // _MSC_VER

+#if defined(GTEST_OS_LINUX)
+
+// On some platforms, <regex.h> needs someone to define size_t, and
+// won't compile otherwise.  We can #include it here as we already
+// included <stdlib.h>, which is guaranteed to define size_t through
+// <stddef.h>.
+#include <regex.h>  // NOLINT
+#define GTEST_USES_POSIX_RE 1
+
+#else
+
+// We are not on Linux, so <regex.h> may not be available.  Use our
+// own simple regex implementation instead.
+#define GTEST_USES_SIMPLE_RE 1
+
+#endif  // GTEST_OS_LINUX
+
 // Determines whether ::std::string and ::string are available.

 #ifndef GTEST_HAS_STD_STRING
@ -352,11 +372,6 @@
 // Determines whether to support death tests.
 #if GTEST_HAS_STD_STRING && GTEST_HAS_CLONE
 #define GTEST_HAS_DEATH_TEST
-// On some platforms, <regex.h> needs someone to define size_t, and
-// won't compile otherwise.  We can #include it here as we already
-// included <stdlib.h>, which is guaranteed to define size_t through
-// <stddef.h>.
-#include <regex.h>
 #include <vector>
 #include <fcntl.h>
 #include <sys/mman.h>
@ -375,8 +390,8 @@
 // Typed tests need <typeinfo> and variadic macros, which gcc and VC
 // 8.0+ support.
 #if defined(__GNUC__) || (_MSC_VER >= 1400)
-#define GTEST_HAS_TYPED_TEST
-#define GTEST_HAS_TYPED_TEST_P
+#define GTEST_HAS_TYPED_TEST 1
+#define GTEST_HAS_TYPED_TEST_P 1
 #endif  // defined(__GNUC__) || (_MSC_VER >= 1400)

 // Determines whether to support Combine(). This only makes sense when
@ -490,8 +505,6 @@ class scoped_ptr {
  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
 };

-#ifdef GTEST_HAS_DEATH_TEST
-
 // Defines RE.

 // A simple C++ wrapper for <regex.h>.  It uses the POSIX Enxtended
@ -549,12 +562,16 @@ class RE {
  // String type here, in order to simplify dependencies between the
  // files.
  const char* pattern_;
+  bool is_valid_;
+#if GTEST_USES_POSIX_RE
  regex_t full_regex_;     // For FullMatch().
  regex_t partial_regex_;  // For PartialMatch().
-  bool is_valid_;
-};
+#else  // GTEST_USES_SIMPLE_RE
+  const char* full_pattern_;  // For FullMatch();
+#endif

-#endif  // GTEST_HAS_DEATH_TEST
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(RE);
+};

 // Defines logging utilities:
 //   GTEST_LOG_()   - logs messages at the specified severity level.
--- a/src/gtest-filepath.cc
+++ b/src/gtest-filepath.cc
@ -215,7 +215,6 @@ bool FilePath::DirectoryExists() const {
 // root directory per disk drive.)
 bool FilePath::IsRootDirectory() const {
 #ifdef GTEST_OS_WINDOWS
-  const char* const name = pathname_.c_str();
  // TODO(wan@google.com): on Windows a network share like
  // \\server\share can be a root directory, although it cannot be the
  // current directory.  Handle this properly.
--- a/src/gtest-internal-inl.h
+++ b/src/gtest-internal-inl.h
@ -1266,6 +1266,22 @@ inline UnitTestImpl* GetUnitTestImpl() {
  return UnitTest::GetInstance()->impl();
 }

+// Internal helper functions for implementing the simple regular
+// expression matcher.
+bool IsInSet(char ch, const char* str);
+bool IsDigit(char ch);
+bool IsPunct(char ch);
+bool IsRepeat(char ch);
+bool IsWhiteSpace(char ch);
+bool IsWordChar(char ch);
+bool IsValidEscape(char ch);
+bool AtomMatchesChar(bool escaped, char pattern, char ch);
+bool ValidateRegex(const char* regex);
+bool MatchRegexAtHead(const char* regex, const char* str);
+bool MatchRepetitionAndRegexAtHead(
+    bool escaped, char ch, char repeat, const char* regex, const char* str);
+bool MatchRegexAnywhere(const char* regex, const char* str);
+
 // Parses the command line for Google Test flags, without initializing
 // other parts of Google Test.
 void ParseGoogleTestFlagsOnly(int* argc, char** argv);
--- a/src/gtest-port.cc
+++ b/src/gtest-port.cc
@ -39,6 +39,10 @@
 #include <regex.h>
 #endif  // GTEST_HAS_DEATH_TEST

+#if GTEST_USES_SIMPLE_RE
+#include <string.h>
+#endif
+
 #ifdef _WIN32_WCE
 #include <windows.h>  // For TerminateProcess()
 #endif  // _WIN32_WCE
@ -47,11 +51,19 @@
 #include <gtest/gtest-message.h>
 #include <gtest/internal/gtest-string.h>

+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION
+#include "src/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION

 namespace testing {
 namespace internal {

-#ifdef GTEST_HAS_DEATH_TEST
+#if GTEST_USES_POSIX_RE

 // Implements RE.  Currently only needed for death tests.

@ -101,7 +113,262 @@ void RE::Init(const char* regex) {
  delete[] full_pattern;
 }

-#endif  // GTEST_HAS_DEATH_TEST
+#elif GTEST_USES_SIMPLE_RE
+
+// Returns true iff ch appears anywhere in str (excluding the
+// terminating '\0' character).
+bool IsInSet(char ch, const char* str) {
+  return ch != '\0' && strchr(str, ch) != NULL;
+}
+
+// Returns true iff ch belongs to the given classification.  Unlike
+// similar functions in <ctype.h>, these aren't affected by the
+// current locale.
+bool IsDigit(char ch) { return '0' <= ch && ch <= '9'; }
+bool IsPunct(char ch) {
+  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
+}
+bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
+bool IsWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
+bool IsWordChar(char ch) {
+  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
+      ('0' <= ch && ch <= '9') || ch == '_';
+}
+
+// Returns true iff "\\c" is a supported escape sequence.
+bool IsValidEscape(char c) {
+  return (IsPunct(c) || IsInSet(c, "dDfnrsStvwW"));
+}
+
+// Returns true iff the given atom (specified by escaped and pattern)
+// matches ch.  The result is undefined if the atom is invalid.
+bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
+  if (escaped) {  // "\\p" where p is pattern_char.
+    switch (pattern_char) {
+      case 'd': return IsDigit(ch);
+      case 'D': return !IsDigit(ch);
+      case 'f': return ch == '\f';
+      case 'n': return ch == '\n';
+      case 'r': return ch == '\r';
+      case 's': return IsWhiteSpace(ch);
+      case 'S': return !IsWhiteSpace(ch);
+      case 't': return ch == '\t';
+      case 'v': return ch == '\v';
+      case 'w': return IsWordChar(ch);
+      case 'W': return !IsWordChar(ch);
+    }
+    return IsPunct(pattern_char) && pattern_char == ch;
+  }
+
+  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
+}
+
+// Helper function used by ValidateRegex() to format error messages.
+String FormatRegexSyntaxError(const char* regex, int index) {
+  return (Message() << "Syntax error at index " << index
+          << " in simple regular expression \"" << regex << "\": ").GetString();
+}
+
+// Generates non-fatal failures and returns false if regex is invalid;
+// otherwise returns true.
+bool ValidateRegex(const char* regex) {
+  if (regex == NULL) {
+    // TODO(wan@google.com): fix the source file location in the
+    // assertion failures to match where the regex is used in user
+    // code.
+    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
+    return false;
+  }
+
+  bool is_valid = true;
+
+  // True iff ?, *, or + can follow the previous atom.
+  bool prev_repeatable = false;
+  for (int i = 0; regex[i]; i++) {
+    if (regex[i] == '\\') {  // An escape sequence
+      i++;
+      if (regex[i] == '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "'\\' cannot appear at the end.";
+        return false;
+      }
+
+      if (!IsValidEscape(regex[i])) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "invalid escape sequence \"\\" << regex[i] << "\".";
+        is_valid = false;
+      }
+      prev_repeatable = true;
+    } else {  // Not an escape sequence.
+      const char ch = regex[i];
+
+      if (ch == '^' && i > 0) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'^' can only appear at the beginning.";
+        is_valid = false;
+      } else if (ch == '$' && regex[i + 1] != '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'$' can only appear at the end.";
+        is_valid = false;
+      } else if (IsInSet(ch, "()[]{}|")) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' is unsupported.";
+        is_valid = false;
+      } else if (IsRepeat(ch) && !prev_repeatable) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' can only follow a repeatable token.";
+        is_valid = false;
+      }
+
+      prev_repeatable = !IsInSet(ch, "^$?*+");
+    }
+  }
+
+  return is_valid;
+}
+
+// Matches a repeated regex atom followed by a valid simple regular
+// expression.  The regex atom is defined as c if escaped is false,
+// or \c otherwise.  repeat is the repetition meta character (?, *,
+// or +).  The behavior is undefined if str contains too many
+// characters to be indexable by size_t, in which case the test will
+// probably time out anyway.  We are fine with this limitation as
+// std::string has it too.
+bool MatchRepetitionAndRegexAtHead(
+    bool escaped, char c, char repeat, const char* regex,
+    const char* str) {
+  const size_t min_count = (repeat == '+') ? 1 : 0;
+  const size_t max_count = (repeat == '?') ? 1 :
+      static_cast<size_t>(-1) - 1;
+  // We cannot call numeric_limits::max() as it conflicts with the
+  // max() macro on Windows.
+
+  for (size_t i = 0; i <= max_count; ++i) {
+    // We know that the atom matches each of the first i characters in str.
+    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
+      // We have enough matches at the head, and the tail matches too.
+      // Since we only care about *whether* the pattern matches str
+      // (as opposed to *how* it matches), there is no need to find a
+      // greedy match.
+      return true;
+    }
+    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
+      return false;
+  }
+  return false;
+}
+
+// Returns true iff regex matches a prefix of str.  regex must be a
+// valid simple regular expression and not start with "^", or the
+// result is undefined.
+bool MatchRegexAtHead(const char* regex, const char* str) {
+  if (*regex == '\0')  // An empty regex matches a prefix of anything.
+    return true;
+
+  // "$" only matches the end of a string.  Note that regex being
+  // valid guarantees that there's nothing after "$" in it.
+  if (*regex == '$')
+    return *str == '\0';
+
+  // Is the first thing in regex an escape sequence?
+  const bool escaped = *regex == '\\';
+  if (escaped)
+    ++regex;
+  if (IsRepeat(regex[1])) {
+    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
+    // here's an indirect recursion.  It terminates as the regex gets
+    // shorter in each recursion.
+    return MatchRepetitionAndRegexAtHead(
+        escaped, regex[0], regex[1], regex + 2, str);
+  } else {
+    // regex isn't empty, isn't "$", and doesn't start with a
+    // repetition.  We match the first atom of regex with the first
+    // character of str and recurse.
+    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
+        MatchRegexAtHead(regex + 1, str + 1);
+  }
+}
+
+// Returns true iff regex matches any substring of str.  regex must be
+// a valid simple regular expression, or the result is undefined.
+//
+// The algorithm is recursive, but the recursion depth doesn't exceed
+// the regex length, so we won't need to worry about running out of
+// stack space normally.  In rare cases the time complexity can be
+// exponential with respect to the regex length + the string length,
+// but usually it's must faster (often close to linear).
+bool MatchRegexAnywhere(const char* regex, const char* str) {
+  if (regex == NULL || str == NULL)
+    return false;
+
+  if (*regex == '^')
+    return MatchRegexAtHead(regex + 1, str);
+
+  // A successful match can be anywhere in str.
+  do {
+    if (MatchRegexAtHead(regex, str))
+      return true;
+  } while (*str++ != '\0');
+  return false;
+}
+
+// Implements the RE class.
+
+RE::~RE() {
+  free(const_cast<char*>(pattern_));
+  free(const_cast<char*>(full_pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+  pattern_ = full_pattern_ = NULL;
+  if (regex != NULL) {
+#ifdef GTEST_OS_WINDOWS
+    pattern_ = _strdup(regex);
+#else
+    pattern_ = strdup(regex);
+#endif
+  }
+
+  is_valid_ = ValidateRegex(regex);
+  if (!is_valid_) {
+    // No need to calculate the full pattern when the regex is invalid.
+    return;
+  }
+
+  const size_t len = strlen(regex);
+  // Reserves enough bytes to hold the regular expression used for a
+  // full match: we need space to prepend a '^', append a '$', and
+  // terminate the string with '\0'.
+  char* buffer = static_cast<char*>(malloc(len + 3));
+  full_pattern_ = buffer;
+
+  if (*regex != '^')
+    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.
+
+  // We don't use snprintf or strncpy, as they trigger a warning when
+  // compiled with VC++ 8.0.
+  memcpy(buffer, regex, len);
+  buffer += len;
+
+  if (len == 0 || regex[len - 1] != '$')
+    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.
+
+  *buffer = '\0';
+}
+
+#endif  // GTEST_USES_POSIX_RE

 // Logs a message at the given severity level.
 void GTestLog(GTestLogSeverity severity, const char* file,
--- a/test/gtest-port_test.cc
+++ b/test/gtest-port_test.cc
@ -27,7 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// Author: vladl@google.com (Vlad Losev)
+// Authors: vladl@google.com (Vlad Losev), wan@google.com (Zhanyong Wan)
 //
 // This file tests the internal cross-platform support utilities.

@ -35,6 +35,18 @@
 #include <gtest/gtest.h>
 #include <gtest/gtest-spi.h>

+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION
+#include "src/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION
+
+namespace testing {
+namespace internal {
+
 TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) {
  if (false)
    GTEST_CHECK_(false) << "This should never be executed; "
@ -87,9 +99,7 @@ TEST(GtestCheckDeathTest, LivesSilentlyOnSuccess) {

 #endif  // GTEST_HAS_DEATH_TEST

-#ifdef GTEST_USES_POSIX_RE
-
-using ::testing::internal::RE;
+#if GTEST_USES_POSIX_RE

 template <typename Str>
 class RETest : public ::testing::Test {};
@ -109,30 +119,30 @@ TYPED_TEST_CASE(RETest, StringTypes);

 // Tests RE's implicit constructors.
 TYPED_TEST(RETest, ImplicitConstructorWorks) {
-  const RE empty = TypeParam("");
+  const RE empty(TypeParam(""));
  EXPECT_STREQ("", empty.pattern());

-  const RE simple = TypeParam("hello");
+  const RE simple(TypeParam("hello"));
  EXPECT_STREQ("hello", simple.pattern());

-  const RE normal = TypeParam(".*(\\w+)");
+  const RE normal(TypeParam(".*(\\w+)"));
  EXPECT_STREQ(".*(\\w+)", normal.pattern());
 }

 // Tests that RE's constructors reject invalid regular expressions.
 TYPED_TEST(RETest, RejectsInvalidRegex) {
  EXPECT_NONFATAL_FAILURE({
-    const RE invalid = TypeParam("?");
+    const RE invalid(TypeParam("?"));
  }, "\"?\" is not a valid POSIX Extended regular expression.");
 }

 // Tests RE::FullMatch().
 TYPED_TEST(RETest, FullMatchWorks) {
-  const RE empty = TypeParam("");
+  const RE empty(TypeParam(""));
  EXPECT_TRUE(RE::FullMatch(TypeParam(""), empty));
  EXPECT_FALSE(RE::FullMatch(TypeParam("a"), empty));

-  const RE re = TypeParam("a.*z");
+  const RE re(TypeParam("a.*z"));
  EXPECT_TRUE(RE::FullMatch(TypeParam("az"), re));
  EXPECT_TRUE(RE::FullMatch(TypeParam("axyz"), re));
  EXPECT_FALSE(RE::FullMatch(TypeParam("baz"), re));
@ -141,11 +151,11 @@ TYPED_TEST(RETest, FullMatchWorks) {

 // Tests RE::PartialMatch().
 TYPED_TEST(RETest, PartialMatchWorks) {
-  const RE empty = TypeParam("");
+  const RE empty(TypeParam(""));
  EXPECT_TRUE(RE::PartialMatch(TypeParam(""), empty));
  EXPECT_TRUE(RE::PartialMatch(TypeParam("a"), empty));

-  const RE re = TypeParam("a.*z");
+  const RE re(TypeParam("a.*z"));
  EXPECT_TRUE(RE::PartialMatch(TypeParam("az"), re));
  EXPECT_TRUE(RE::PartialMatch(TypeParam("axyz"), re));
  EXPECT_TRUE(RE::PartialMatch(TypeParam("baz"), re));
@ -153,4 +163,471 @@ TYPED_TEST(RETest, PartialMatchWorks) {
  EXPECT_FALSE(RE::PartialMatch(TypeParam("zza"), re));
 }

+#elif GTEST_USES_SIMPLE_RE
+
+TEST(IsInSetTest, NulCharIsNotInAnySet) {
+  EXPECT_FALSE(IsInSet('\0', ""));
+  EXPECT_FALSE(IsInSet('\0', "\0"));
+  EXPECT_FALSE(IsInSet('\0', "a"));
+}
+
+TEST(IsInSetTest, WorksForNonNulChars) {
+  EXPECT_FALSE(IsInSet('a', "Ab"));
+  EXPECT_FALSE(IsInSet('c', ""));
+
+  EXPECT_TRUE(IsInSet('b', "bcd"));
+  EXPECT_TRUE(IsInSet('b', "ab"));
+}
+
+TEST(IsDigitTest, IsFalseForNonDigit) {
+  EXPECT_FALSE(IsDigit('\0'));
+  EXPECT_FALSE(IsDigit(' '));
+  EXPECT_FALSE(IsDigit('+'));
+  EXPECT_FALSE(IsDigit('-'));
+  EXPECT_FALSE(IsDigit('.'));
+  EXPECT_FALSE(IsDigit('a'));
+}
+
+TEST(IsDigitTest, IsTrueForDigit) {
+  EXPECT_TRUE(IsDigit('0'));
+  EXPECT_TRUE(IsDigit('1'));
+  EXPECT_TRUE(IsDigit('5'));
+  EXPECT_TRUE(IsDigit('9'));
+}
+
+TEST(IsPunctTest, IsFalseForNonPunct) {
+  EXPECT_FALSE(IsPunct('\0'));
+  EXPECT_FALSE(IsPunct(' '));
+  EXPECT_FALSE(IsPunct('\n'));
+  EXPECT_FALSE(IsPunct('a'));
+  EXPECT_FALSE(IsPunct('0'));
+}
+
+TEST(IsPunctTest, IsTrueForPunct) {
+  for (const char* p = "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"; *p; p++) {
+    EXPECT_PRED1(IsPunct, *p);
+  }
+}
+
+TEST(IsRepeatTest, IsFalseForNonRepeatChar) {
+  EXPECT_FALSE(IsRepeat('\0'));
+  EXPECT_FALSE(IsRepeat(' '));
+  EXPECT_FALSE(IsRepeat('a'));
+  EXPECT_FALSE(IsRepeat('1'));
+  EXPECT_FALSE(IsRepeat('-'));
+}
+
+TEST(IsRepeatTest, IsTrueForRepeatChar) {
+  EXPECT_TRUE(IsRepeat('?'));
+  EXPECT_TRUE(IsRepeat('*'));
+  EXPECT_TRUE(IsRepeat('+'));
+}
+
+TEST(IsWhiteSpaceTest, IsFalseForNonWhiteSpace) {
+  EXPECT_FALSE(IsWhiteSpace('\0'));
+  EXPECT_FALSE(IsWhiteSpace('a'));
+  EXPECT_FALSE(IsWhiteSpace('1'));
+  EXPECT_FALSE(IsWhiteSpace('+'));
+  EXPECT_FALSE(IsWhiteSpace('_'));
+}
+
+TEST(IsWhiteSpaceTest, IsTrueForWhiteSpace) {
+  EXPECT_TRUE(IsWhiteSpace(' '));
+  EXPECT_TRUE(IsWhiteSpace('\n'));
+  EXPECT_TRUE(IsWhiteSpace('\r'));
+  EXPECT_TRUE(IsWhiteSpace('\t'));
+  EXPECT_TRUE(IsWhiteSpace('\v'));
+  EXPECT_TRUE(IsWhiteSpace('\f'));
+}
+
+TEST(IsWordCharTest, IsFalseForNonWordChar) {
+  EXPECT_FALSE(IsWordChar('\0'));
+  EXPECT_FALSE(IsWordChar('+'));
+  EXPECT_FALSE(IsWordChar('.'));
+  EXPECT_FALSE(IsWordChar(' '));
+  EXPECT_FALSE(IsWordChar('\n'));
+}
+
+TEST(IsWordCharTest, IsTrueForLetter) {
+  EXPECT_TRUE(IsWordChar('a'));
+  EXPECT_TRUE(IsWordChar('b'));
+  EXPECT_TRUE(IsWordChar('A'));
+  EXPECT_TRUE(IsWordChar('Z'));
+}
+
+TEST(IsWordCharTest, IsTrueForDigit) {
+  EXPECT_TRUE(IsWordChar('0'));
+  EXPECT_TRUE(IsWordChar('1'));
+  EXPECT_TRUE(IsWordChar('7'));
+  EXPECT_TRUE(IsWordChar('9'));
+}
+
+TEST(IsWordCharTest, IsTrueForUnderscore) {
+  EXPECT_TRUE(IsWordChar('_'));
+}
+
+TEST(IsValidEscapeTest, IsFalseForNonPrintable) {
+  EXPECT_FALSE(IsValidEscape('\0'));
+  EXPECT_FALSE(IsValidEscape('\007'));
+}
+
+TEST(IsValidEscapeTest, IsFalseForDigit) {
+  EXPECT_FALSE(IsValidEscape('0'));
+  EXPECT_FALSE(IsValidEscape('9'));
+}
+
+TEST(IsValidEscapeTest, IsFalseForWhiteSpace) {
+  EXPECT_FALSE(IsValidEscape(' '));
+  EXPECT_FALSE(IsValidEscape('\n'));
+}
+
+TEST(IsValidEscapeTest, IsFalseForSomeLetter) {
+  EXPECT_FALSE(IsValidEscape('a'));
+  EXPECT_FALSE(IsValidEscape('Z'));
+}
+
+TEST(IsValidEscapeTest, IsTrueForPunct) {
+  EXPECT_TRUE(IsValidEscape('.'));
+  EXPECT_TRUE(IsValidEscape('-'));
+  EXPECT_TRUE(IsValidEscape('^'));
+  EXPECT_TRUE(IsValidEscape('$'));
+  EXPECT_TRUE(IsValidEscape('('));
+  EXPECT_TRUE(IsValidEscape(']'));
+  EXPECT_TRUE(IsValidEscape('{'));
+  EXPECT_TRUE(IsValidEscape('|'));
+}
+
+TEST(IsValidEscapeTest, IsTrueForSomeLetter) {
+  EXPECT_TRUE(IsValidEscape('d'));
+  EXPECT_TRUE(IsValidEscape('D'));
+  EXPECT_TRUE(IsValidEscape('s'));
+  EXPECT_TRUE(IsValidEscape('S'));
+  EXPECT_TRUE(IsValidEscape('w'));
+  EXPECT_TRUE(IsValidEscape('W'));
+}
+
+TEST(AtomMatchesCharTest, EscapedPunct) {
+  EXPECT_FALSE(AtomMatchesChar(true, '\\', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, '\\', ' '));
+  EXPECT_FALSE(AtomMatchesChar(true, '_', '.'));
+  EXPECT_FALSE(AtomMatchesChar(true, '.', 'a'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, '\\', '\\'));
+  EXPECT_TRUE(AtomMatchesChar(true, '_', '_'));
+  EXPECT_TRUE(AtomMatchesChar(true, '+', '+'));
+  EXPECT_TRUE(AtomMatchesChar(true, '.', '.'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_d) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'd', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'd', 'a'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'd', '.'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'd', '0'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'd', '9'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_D) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'D', '0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'D', '9'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'D', '\0'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'D', 'a'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'D', '-'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_s) {
+  EXPECT_FALSE(AtomMatchesChar(true, 's', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 's', 'a'));
+  EXPECT_FALSE(AtomMatchesChar(true, 's', '.'));
+  EXPECT_FALSE(AtomMatchesChar(true, 's', '9'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 's', ' '));
+  EXPECT_TRUE(AtomMatchesChar(true, 's', '\n'));
+  EXPECT_TRUE(AtomMatchesChar(true, 's', '\t'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_S) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'S', ' '));
+  EXPECT_FALSE(AtomMatchesChar(true, 'S', '\r'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'S', '\0'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'S', 'a'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'S', '9'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_w) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'w', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'w', '+'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'w', ' '));
+  EXPECT_FALSE(AtomMatchesChar(true, 'w', '\n'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'w', '0'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'w', 'b'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'w', 'C'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'w', '_'));
+}
+
+TEST(AtomMatchesCharTest, Escaped_W) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'W', 'A'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'W', 'b'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'W', '9'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'W', '_'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'W', '\0'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'W', '*'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'W', '\n'));
+}
+
+TEST(AtomMatchesCharTest, EscapedWhiteSpace) {
+  EXPECT_FALSE(AtomMatchesChar(true, 'f', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'f', '\n'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'n', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'n', '\r'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'r', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'r', 'a'));
+  EXPECT_FALSE(AtomMatchesChar(true, 't', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 't', 't'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'v', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(true, 'v', '\f'));
+
+  EXPECT_TRUE(AtomMatchesChar(true, 'f', '\f'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'n', '\n'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'r', '\r'));
+  EXPECT_TRUE(AtomMatchesChar(true, 't', '\t'));
+  EXPECT_TRUE(AtomMatchesChar(true, 'v', '\v'));
+}
+
+TEST(AtomMatchesCharTest, UnescapedDot) {
+  EXPECT_FALSE(AtomMatchesChar(false, '.', '\n'));
+
+  EXPECT_TRUE(AtomMatchesChar(false, '.', '\0'));
+  EXPECT_TRUE(AtomMatchesChar(false, '.', '.'));
+  EXPECT_TRUE(AtomMatchesChar(false, '.', 'a'));
+  EXPECT_TRUE(AtomMatchesChar(false, '.', ' '));
+}
+
+TEST(AtomMatchesCharTest, UnescapedChar) {
+  EXPECT_FALSE(AtomMatchesChar(false, 'a', '\0'));
+  EXPECT_FALSE(AtomMatchesChar(false, 'a', 'b'));
+  EXPECT_FALSE(AtomMatchesChar(false, '$', 'a'));
+
+  EXPECT_TRUE(AtomMatchesChar(false, '$', '$'));
+  EXPECT_TRUE(AtomMatchesChar(false, '5', '5'));
+  EXPECT_TRUE(AtomMatchesChar(false, 'Z', 'Z'));
+}
+
+TEST(ValidateRegexTest, GeneratesFailureAndReturnsFalseForInvalid) {
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(NULL)),
+                          "NULL is not a valid simple regular expression");
+  EXPECT_NONFATAL_FAILURE(
+      ASSERT_FALSE(ValidateRegex("a\\")),
+      "Syntax error at index 1 in simple regular expression \"a\\\": ");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a\\")),
+                          "'\\' cannot appear at the end");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\n\\")),
+                          "'\\' cannot appear at the end");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\s\\hb")),
+                          "invalid escape sequence \"\\h\"");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^^")),
+                          "'^' can only appear at the beginning");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(".*^b")),
+                          "'^' can only appear at the beginning");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("$$")),
+                          "'$' can only appear at the end");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^$a")),
+                          "'$' can only appear at the end");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a(b")),
+                          "'(' is unsupported");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("ab)")),
+                          "')' is unsupported");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("[ab")),
+                          "'[' is unsupported");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a{2")),
+                          "'{' is unsupported");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("?")),
+                          "'?' can only follow a repeatable token");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^*")),
+                          "'*' can only follow a repeatable token");
+  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("5*+")),
+                          "'+' can only follow a repeatable token");
+}
+
+TEST(ValidateRegexTest, ReturnsTrueForValid) {
+  EXPECT_TRUE(ValidateRegex(""));
+  EXPECT_TRUE(ValidateRegex("a"));
+  EXPECT_TRUE(ValidateRegex(".*"));
+  EXPECT_TRUE(ValidateRegex("^a_+"));
+  EXPECT_TRUE(ValidateRegex("^a\\t\\&?"));
+  EXPECT_TRUE(ValidateRegex("09*$"));
+  EXPECT_TRUE(ValidateRegex("^Z$"));
+  EXPECT_TRUE(ValidateRegex("a\\^Z\\$\\(\\)\\|\\[\\]\\{\\}"));
+}
+
+TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrOne) {
+  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "a", "ba"));
+  // Repeating more than once.
+  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "aab"));
+
+  // Repeating zero times.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ba"));
+  // Repeating once.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ab"));
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '#', '?', ".", "##"));
+}
+
+TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrMany) {
+  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '*', "a$", "baab"));
+
+  // Repeating zero times.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "bc"));
+  // Repeating once.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "abc"));
+  // Repeating more than once.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '*', "-", "ab_1-g"));
+}
+
+TEST(MatchRepetitionAndRegexAtHeadTest, WorksForOneOrMany) {
+  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "a$", "baab"));
+  // Repeating zero times.
+  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "bc"));
+
+  // Repeating once.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "abc"));
+  // Repeating more than once.
+  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '+', "-", "ab_1-g"));
+}
+
+TEST(MatchRegexAtHeadTest, ReturnsTrueForEmptyRegex) {
+  EXPECT_TRUE(MatchRegexAtHead("", ""));
+  EXPECT_TRUE(MatchRegexAtHead("", "ab"));
+}
+
+TEST(MatchRegexAtHeadTest, WorksWhenDollarIsInRegex) {
+  EXPECT_FALSE(MatchRegexAtHead("$", "a"));
+
+  EXPECT_TRUE(MatchRegexAtHead("$", ""));
+  EXPECT_TRUE(MatchRegexAtHead("a$", "a"));
+}
+
+TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithEscapeSequence) {
+  EXPECT_FALSE(MatchRegexAtHead("\\w", "+"));
+  EXPECT_FALSE(MatchRegexAtHead("\\W", "ab"));
+
+  EXPECT_TRUE(MatchRegexAtHead("\\sa", "\nab"));
+  EXPECT_TRUE(MatchRegexAtHead("\\d", "1a"));
+}
+
+TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetition) {
+  EXPECT_FALSE(MatchRegexAtHead(".+a", "abc"));
+  EXPECT_FALSE(MatchRegexAtHead("a?b", "aab"));
+
+  EXPECT_TRUE(MatchRegexAtHead(".*a", "bc12-ab"));
+  EXPECT_TRUE(MatchRegexAtHead("a?b", "b"));
+  EXPECT_TRUE(MatchRegexAtHead("a?b", "ab"));
+}
+
+TEST(MatchRegexAtHeadTest,
+     WorksWhenRegexStartsWithRepetionOfEscapeSequence) {
+  EXPECT_FALSE(MatchRegexAtHead("\\.+a", "abc"));
+  EXPECT_FALSE(MatchRegexAtHead("\\s?b", "  b"));
+
+  EXPECT_TRUE(MatchRegexAtHead("\\(*a", "((((ab"));
+  EXPECT_TRUE(MatchRegexAtHead("\\^?b", "^b"));
+  EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "b"));
+  EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "\\b"));
+}
+
+TEST(MatchRegexAtHeadTest, MatchesSequentially) {
+  EXPECT_FALSE(MatchRegexAtHead("ab.*c", "acabc"));
+
+  EXPECT_TRUE(MatchRegexAtHead("ab.*c", "ab-fsc"));
+}
+
+TEST(MatchRegexAnywhereTest, ReturnsFalseWhenStringIsNull) {
+  EXPECT_FALSE(MatchRegexAnywhere("", NULL));
+}
+
+TEST(MatchRegexAnywhereTest, WorksWhenRegexStartsWithCaret) {
+  EXPECT_FALSE(MatchRegexAnywhere("^a", "ba"));
+  EXPECT_FALSE(MatchRegexAnywhere("^$", "a"));
+
+  EXPECT_TRUE(MatchRegexAnywhere("^a", "ab"));
+  EXPECT_TRUE(MatchRegexAnywhere("^", "ab"));
+  EXPECT_TRUE(MatchRegexAnywhere("^$", ""));
+}
+
+TEST(MatchRegexAnywhereTest, ReturnsFalseWhenNoMatch) {
+  EXPECT_FALSE(MatchRegexAnywhere("a", "bcde123"));
+  EXPECT_FALSE(MatchRegexAnywhere("a.+a", "--aa88888888"));
+}
+
+TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingPrefix) {
+  EXPECT_TRUE(MatchRegexAnywhere("\\w+", "ab1_ - 5"));
+  EXPECT_TRUE(MatchRegexAnywhere(".*=", "="));
+  EXPECT_TRUE(MatchRegexAnywhere("x.*ab?.*bc", "xaaabc"));
+}
+
+TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingNonPrefix) {
+  EXPECT_TRUE(MatchRegexAnywhere("\\w+", "$$$ ab1_ - 5"));
+  EXPECT_TRUE(MatchRegexAnywhere("\\.+=", "=  ...="));
+}
+
+// Tests RE's implicit constructors.
+TEST(RETest, ImplicitConstructorWorks) {
+  const RE empty = "";
+  EXPECT_STREQ("", empty.pattern());
+
+  const RE simple = "hello";
+  EXPECT_STREQ("hello", simple.pattern());
+}
+
+// Tests that RE's constructors reject invalid regular expressions.
+TEST(RETest, RejectsInvalidRegex) {
+  EXPECT_NONFATAL_FAILURE({
+    const RE normal = NULL;
+  }, "NULL is not a valid simple regular expression");
+
+  EXPECT_NONFATAL_FAILURE({
+    const RE normal = ".*(\\w+";
+  }, "'(' is unsupported");
+
+  EXPECT_NONFATAL_FAILURE({
+    const RE invalid = "^?";
+  }, "'?' can only follow a repeatable token");
+}
+
+// Tests RE::FullMatch().
+TEST(RETest, FullMatchWorks) {
+  const RE empty("");
+  EXPECT_TRUE(RE::FullMatch("", empty));
+  EXPECT_FALSE(RE::FullMatch("a", empty));
+
+  const RE re1 = "a";
+  EXPECT_TRUE(RE::FullMatch("a", re1));
+
+  const RE re = "a.*z";
+  EXPECT_TRUE(RE::FullMatch("az", re));
+  EXPECT_TRUE(RE::FullMatch("axyz", re));
+  EXPECT_FALSE(RE::FullMatch("baz", re));
+  EXPECT_FALSE(RE::FullMatch("azy", re));
+}
+
+// Tests RE::PartialMatch().
+TEST(RETest, PartialMatchWorks) {
+  const RE empty = "";
+  EXPECT_TRUE(RE::PartialMatch("", empty));
+  EXPECT_TRUE(RE::PartialMatch("a", empty));
+
+  const RE re = "a.*z";
+  EXPECT_TRUE(RE::PartialMatch("az", re));
+  EXPECT_TRUE(RE::PartialMatch("axyz", re));
+  EXPECT_TRUE(RE::PartialMatch("baz", re));
+  EXPECT_TRUE(RE::PartialMatch("azy", re));
+  EXPECT_FALSE(RE::PartialMatch("zza", re));
+}
+
 #endif  // GTEST_USES_POSIX_RE
+
+}  // namespace internal
+}  // namespace testing