diff --git a/include/regex b/include/regex index e48f0722..02777741 100644 --- a/include/regex +++ b/include/regex @@ -720,6 +720,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__config> #include #include <__locale> +#include #pragma GCC system_header @@ -895,7 +896,8 @@ enum error_type error_space, error_badrepeat, error_complexity, - error_stack + error_stack, + error_temp }; } // regex_constants @@ -1193,7 +1195,7 @@ regex_traits<_CharT>::__value(unsigned char __ch, int __radix) { __ch |= 0x20; // tolower if ('a' <= __ch && __ch <= 'f') - return __ch - 'a' + 10; + return __ch - ('a' - 10); } } return -1; @@ -1207,6 +1209,510 @@ regex_traits<_CharT>::__value(wchar_t __ch, int __radix) const return __value(static_cast(__ct_->narrow(__ch, char_type())), __radix); } +template > +class basic_regex +{ +public: + // types: + typedef _CharT value_type; + typedef regex_constants::syntax_option_type flag_type; + typedef typename _Traits::locale_type locale_type; + +private: + _Traits __traits_; + flag_type __flags_; + unsigned __marked_count_; + +public: + // constants: + static const/*expr*/ regex_constants::syntax_option_type icase = regex_constants::icase; + static const/*expr*/ regex_constants::syntax_option_type nosubs = regex_constants::nosubs; + static const/*expr*/ regex_constants::syntax_option_type optimize = regex_constants::optimize; + static const/*expr*/ regex_constants::syntax_option_type collate = regex_constants::collate; + static const/*expr*/ regex_constants::syntax_option_type ECMAScript = regex_constants::ECMAScript; + static const/*expr*/ regex_constants::syntax_option_type basic = regex_constants::basic; + static const/*expr*/ regex_constants::syntax_option_type extended = regex_constants::extended; + static const/*expr*/ regex_constants::syntax_option_type awk = regex_constants::awk; + static const/*expr*/ regex_constants::syntax_option_type grep = regex_constants::grep; + static const/*expr*/ regex_constants::syntax_option_type egrep = regex_constants::egrep; + + // construct/copy/destroy: + basic_regex(); + explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript) + : __flags_(__f), __marked_count_(0) + {__parse(__p, __p + __traits_.length(__p));} + basic_regex(const value_type* __p, size_t __len, flag_type __f) + : __flags_(__f), __marked_count_(0) + {__parse(__p, __p + __len);} + basic_regex(const basic_regex&); +#ifdef _LIBCPP_MOVE + basic_regex(basic_regex&&); +#endif + template + explicit basic_regex(const basic_string& __p, + flag_type __f = regex_constants::ECMAScript) + : __flags_(__f), __marked_count_(0) + {__parse(__p.begin(), __p.end());} + template + basic_regex(_ForwardIterator __first, _ForwardIterator __last, + flag_type __f = regex_constants::ECMAScript) + : __flags_(__f), __marked_count_(0) + {__parse(__first, __last);} + basic_regex(initializer_list __il, + flag_type __f = regex_constants::ECMAScript) + : __flags_(__f), __marked_count_(0) + {__parse(__il.begin(), __il.end());} + + ~basic_regex(); + + basic_regex& operator=(const basic_regex&); +#ifdef _LIBCPP_MOVE + basic_regex& operator=(basic_regex&&); +#endif + basic_regex& operator=(const value_type* __p); + basic_regex& operator=(initializer_list __il); + template + basic_regex& operator=(const basic_string& __p); + + // assign: + basic_regex& assign(const basic_regex& __that); +#ifdef _LIBCPP_MOVE + basic_regex& assign(basic_regex&& __that); +#endif + basic_regex& assign(const value_type* __p, flag_type __f = regex_constants::ECMAScript); + basic_regex& assign(const value_type* __p, size_t __len, flag_type __f); + template + basic_regex& assign(const basic_string& __s, + flag_type __f = regex_constants::ECMAScript); + template + basic_regex& assign(_InputIterator __first, _InputIterator __last, + flag_type __f = regex_constants::ECMAScript); + basic_regex& assign(initializer_list __il, + flag_type = regex_constants::ECMAScript); + + // const operations: + unsigned mark_count() const {return __marked_count_;} + flag_type flags() const {return __flags_;} + + // locale: + locale_type imbue(locale_type __loc) {return __traits_.imbue(__loc);} + locale_type getloc() const {return __traits_.getloc();} + + // swap: + void swap(basic_regex&); + +private: + template + void __parse(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_basic_reg_exp(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_RE_expression(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_simple_RE(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_nondupl_RE(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_one_char_or_coll_elem_RE(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_Back_open_paren(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_Back_close_paren(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_Back_open_brace(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_Back_close_brace(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_BACKREF(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_ORD_CHAR(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_QUOTED_CHAR(_ForwardIterator __first, _ForwardIterator __last); + template + _ForwardIterator + __parse_RE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last); + + void __push_l_anchor(); + void __push_r_anchor(); + void __push_match_any(); + void __push_greedy_inf_repeat(int __min); + void __push_exact_repeat(int __count); + void __push_repeat(int __min, int __max); +}; + +template +inline +basic_regex<_CharT, _Traits>::basic_regex() + : __traits_(), __flags_(), __marked_count_(0) +{ +} + +template +basic_regex<_CharT, _Traits>::~basic_regex() +{ +} + +template +template +void +basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first, + _ForwardIterator __last) +{ + switch (__flags_ & 0x3F0) + { + case ECMAScript: + break; + case basic: + __parse_basic_reg_exp(__first, __last); + break; + case extended: + break; + case awk: + break; + case grep: + break; + case egrep: + break; + default: + throw regex_error(regex_constants::error_temp); + } +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_basic_reg_exp(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + if (*__first == '^') + { + __push_l_anchor(); + ++__first; + } + if (__first != __last) + { + __first = __parse_RE_expression(__first, __last); + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp == __last && *__first == '$') + { + __push_r_anchor(); + ++__first; + } + } + } + if (__first != __last) + throw regex_error(regex_constants::error_temp); + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_RE_expression(_ForwardIterator __first, + _ForwardIterator __last) +{ + while (true) + { + _ForwardIterator __temp = __parse_simple_RE(__first, __last); + if (__temp == __first) + break; + __first = __temp; + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_simple_RE(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = __parse_nondupl_RE(__first, __last); + if (__temp != __first) + { + __first = __temp; + __first = __parse_RE_dupl_symbol(__first, __last); + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_nondupl_RE(_ForwardIterator __first, + _ForwardIterator __last) +{ + _ForwardIterator __temp = __first; + __first = __parse_one_char_or_coll_elem_RE(__first, __last); + if (__temp == __first) + { + __temp = __parse_Back_open_paren(__first, __last); + if (__temp != __first) + { + __first = __parse_RE_expression(__temp, __last); + __temp = __parse_Back_close_paren(__first, __last); + if (__temp == __first) + throw regex_error(regex_constants::error_paren); + __first = __temp; + ++__marked_count_; + } + else + __first = __parse_BACKREF(__first, __last); + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_one_char_or_coll_elem_RE( + _ForwardIterator __first, + _ForwardIterator __last) +{ + _ForwardIterator __temp = __first; + __first = __parse_ORD_CHAR(__first, __last); + if (__temp == __first) + { + __first = __parse_QUOTED_CHAR(__first, __last); + if (__temp == __first) + { + if (__first != __last && *__first == '.') + { + __push_match_any(); + ++__first; + } + else + __first = __parse_bracket_expression(__first, __last); + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_Back_open_paren(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\' && *__temp == '(') + __first = ++__temp; + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_Back_close_paren(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\' && *__temp == ')') + __first = ++__temp; + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_Back_open_brace(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\' && *__temp == '{') + __first = ++__temp; + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_Back_close_brace(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\' && *__temp == '}') + __first = ++__temp; + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_BACKREF(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\' && '1' <= *__temp && *__temp <= '9') + { + __push_back_ref(*__temp - '0'); + __first = ++__temp; + } + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_ORD_CHAR(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp == __last && *__first == '$') + return __first; + // Not called inside a bracket + if (*__first == '.' || *__first == '\\' || *__first == '[') + return __first; + __push_ord_char(*__first); + ++__first; + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_QUOTED_CHAR(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + _ForwardIterator __temp = next(__first); + if (__temp != __last) + { + if (*__first == '\\') + { + switch (*__temp) + { + case '^': + case '.': + case '*': + case '[': + case '$': + case '\\': + __push_ord_char(*__temp); + __first = ++__temp; + break; + } + } + } + } + return __first; +} + +template +template +_ForwardIterator +basic_regex<_CharT, _Traits>::__parse_RE_dupl_symbol(_ForwardIterator __first, + _ForwardIterator __last) +{ + if (__first != __last) + { + if (__first == '*') + { + __push_greedy_inf_repeat(0); + ++__first; + } + else + { + _ForwardIterator __temp = __parse_Back_open_brace(__first, __last); + if (__temp != __first) + { + int __min = 0; + __first = __temp; + __temp = __parse_DUP_COUNT(__first, __last, __min); + if (__temp == __first) + throw regex_error(regex_constants::error_badbrace); + __first = __temp; + if (__first == __last) + throw regex_error(regex_constants::error_brace); + if (*__first != ',') + { + __temp = __parse_Back_close_brace(__first, __last); + if (__temp == __first) + throw regex_error(regex_constants::error_brace); + __push_exact_repeat(__min); + __first = __temp; + } + else + { + ++__first; // consume ',' + int __max = -1; + __first = __parse_DUP_COUNT(__first, __last, __max); + __temp = __parse_Back_close_brace(__first, __last); + if (__temp == __first) + throw regex_error(regex_constants::error_brace); + if (__max == -1) + __push_greedy_inf_repeat(__min); + else + { + if (__max < __min) + throw regex_error(regex_constants::error_badbrace); + __push_repeat(__min, __max); + } + __first = __temp; + } + } + } + } + return __first; +} + +typedef basic_regex regex; +typedef basic_regex wregex; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_REGEX diff --git a/src/regex.cpp b/src/regex.cpp index abb1b668..9cc6eb54 100644 --- a/src/regex.cpp +++ b/src/regex.cpp @@ -20,33 +20,40 @@ make_error_type_string(regex_constants::error_type ecode) switch (ecode) { case regex_constants::error_collate: - return "error_collate"; + return "The expression contained an invalid collating element name."; case regex_constants::error_ctype: - return "error_ctype"; + return "The expression contained an invalid character class name."; case regex_constants::error_escape: - return "error_escape"; + return "The expression contained an invalid escaped character, or a " + "trailing escape."; case regex_constants::error_backref: - return "error_backref"; + return "The expression contained an invalid back reference."; case regex_constants::error_brack: - return "error_brack"; + return "The expression contained mismatched [ and ]."; case regex_constants::error_paren: - return "error_paren"; + return "The expression contained mismatched ( and )."; case regex_constants::error_brace: - return "error_brace"; + return "The expression contained mismatched { and }."; case regex_constants::error_badbrace: - return "error_badbrace"; + return "The expression contained an invalid range in a {} expression."; case regex_constants::error_range: - return "error_range"; + return "The expression contained an invalid character range, " + "such as [b-a] in most encodings."; case regex_constants::error_space: - return "error_space"; + return "There was insufficient memory to convert the expression into " + "a finite state machine."; case regex_constants::error_badrepeat: - return "error_badrepeat"; + return "One of *?+{ was not preceded by a valid regular expression."; case regex_constants::error_complexity: - return "error_complexity"; + return "The complexity of an attempted match against a regular " + "expression exceeded a pre-set level."; case regex_constants::error_stack: - return "error_stack"; + return "There was insufficient memory to determine whether the regular " + "expression could match the specified character sequence."; + default: + break; } - return "unknown error_type"; + return "Unknown error type"; } regex_error::regex_error(regex_constants::error_type ecode) diff --git a/test/re/re.badexp/regex_error.pass.cpp b/test/re/re.badexp/regex_error.pass.cpp index 6145b8ea..849e372e 100644 --- a/test/re/re.badexp/regex_error.pass.cpp +++ b/test/re/re.badexp/regex_error.pass.cpp @@ -26,66 +26,71 @@ int main() { std::regex_error e(std::regex_constants::error_collate); assert(e.code() == std::regex_constants::error_collate); - assert(e.what() == std::string("error_collate")); + assert(e.what() == std::string("The expression contained an invalid collating element name.")); } { std::regex_error e(std::regex_constants::error_ctype); assert(e.code() == std::regex_constants::error_ctype); - assert(e.what() == std::string("error_ctype")); + assert(e.what() == std::string("The expression contained an invalid character class name.")); } { std::regex_error e(std::regex_constants::error_escape); assert(e.code() == std::regex_constants::error_escape); - assert(e.what() == std::string("error_escape")); + assert(e.what() == std::string("The expression contained an invalid escaped character, or a " + "trailing escape.")); } { std::regex_error e(std::regex_constants::error_backref); assert(e.code() == std::regex_constants::error_backref); - assert(e.what() == std::string("error_backref")); + assert(e.what() == std::string("The expression contained an invalid back reference.")); } { std::regex_error e(std::regex_constants::error_brack); assert(e.code() == std::regex_constants::error_brack); - assert(e.what() == std::string("error_brack")); + assert(e.what() == std::string("The expression contained mismatched [ and ].")); } { std::regex_error e(std::regex_constants::error_paren); assert(e.code() == std::regex_constants::error_paren); - assert(e.what() == std::string("error_paren")); + assert(e.what() == std::string("The expression contained mismatched ( and ).")); } { std::regex_error e(std::regex_constants::error_brace); assert(e.code() == std::regex_constants::error_brace); - assert(e.what() == std::string("error_brace")); + assert(e.what() == std::string("The expression contained mismatched { and }.")); } { std::regex_error e(std::regex_constants::error_badbrace); assert(e.code() == std::regex_constants::error_badbrace); - assert(e.what() == std::string("error_badbrace")); + assert(e.what() == std::string("The expression contained an invalid range in a {} expression.")); } { std::regex_error e(std::regex_constants::error_range); assert(e.code() == std::regex_constants::error_range); - assert(e.what() == std::string("error_range")); + assert(e.what() == std::string("The expression contained an invalid character range, " + "such as [b-a] in most encodings.")); } { std::regex_error e(std::regex_constants::error_space); assert(e.code() == std::regex_constants::error_space); - assert(e.what() == std::string("error_space")); + assert(e.what() == std::string("There was insufficient memory to convert the expression into " + "a finite state machine.")); } { std::regex_error e(std::regex_constants::error_badrepeat); assert(e.code() == std::regex_constants::error_badrepeat); - assert(e.what() == std::string("error_badrepeat")); + assert(e.what() == std::string("One of *?+{ was not preceded by a valid regular expression.")); } { std::regex_error e(std::regex_constants::error_complexity); assert(e.code() == std::regex_constants::error_complexity); - assert(e.what() == std::string("error_complexity")); + assert(e.what() == std::string("The complexity of an attempted match against a regular " + "expression exceeded a pre-set level.")); } { std::regex_error e(std::regex_constants::error_stack); assert(e.code() == std::regex_constants::error_stack); - assert(e.what() == std::string("error_stack")); + assert(e.what() == std::string("There was insufficient memory to determine whether the regular " + "expression could match the specified character sequence.")); } } diff --git a/test/re/re.regex/re.regex.const/constants.pass.cpp b/test/re/re.regex/re.regex.const/constants.pass.cpp new file mode 100644 index 00000000..634cb2ec --- /dev/null +++ b/test/re/re.regex/re.regex.const/constants.pass.cpp @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template > +// class basic_regex +// { +// public: +// // constants: +// static constexpr regex_constants::syntax_option_type icase = regex_constants::icase; +// static constexpr regex_constants::syntax_option_type nosubs = regex_constants::nosubs; +// static constexpr regex_constants::syntax_option_type optimize = regex_constants::optimize; +// static constexpr regex_constants::syntax_option_type collate = regex_constants::collate; +// static constexpr regex_constants::syntax_option_type ECMAScript = regex_constants::ECMAScript; +// static constexpr regex_constants::syntax_option_type basic = regex_constants::basic; +// static constexpr regex_constants::syntax_option_type extended = regex_constants::extended; +// static constexpr regex_constants::syntax_option_type awk = regex_constants::awk; +// static constexpr regex_constants::syntax_option_type grep = regex_constants::grep; +// static constexpr regex_constants::syntax_option_type egrep = regex_constants::egrep; + +#include +#include + +template +void +test() +{ + typedef std::basic_regex BR; + static_assert((BR::icase == std::regex_constants::icase), ""); + static_assert((BR::nosubs == std::regex_constants::nosubs), ""); + static_assert((BR::optimize == std::regex_constants::optimize), ""); + static_assert((BR::collate == std::regex_constants::collate), ""); + static_assert((BR::ECMAScript == std::regex_constants::ECMAScript), ""); + static_assert((BR::basic == std::regex_constants::basic), ""); + static_assert((BR::extended == std::regex_constants::extended), ""); + static_assert((BR::awk == std::regex_constants::awk), ""); + static_assert((BR::grep == std::regex_constants::grep), ""); + static_assert((BR::egrep == std::regex_constants::egrep), ""); +} + +int main() +{ + test(); + test(); +} diff --git a/test/re/re.regex/re.regex.construct/default.pass.cpp b/test/re/re.regex/re.regex.construct/default.pass.cpp new file mode 100644 index 00000000..44930f6d --- /dev/null +++ b/test/re/re.regex/re.regex.construct/default.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template > class basic_regex; + +// basic_regex(); + +#include +#include + +template +void +test() +{ + std::basic_regex r; + assert(r.flags() == 0); + assert(r.mark_count() == 0); +} + +int main() +{ + test(); + test(); +} diff --git a/test/re/re.regex/types.pass.cpp b/test/re/re.regex/types.pass.cpp new file mode 100644 index 00000000..4d90737e --- /dev/null +++ b/test/re/re.regex/types.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template > +// class basic_regex +// { +// public: +// // types: +// typedef charT value_type; +// typedef regex_constants::syntax_option_type flag_type; +// typedef typename traits::locale_type locale_type; + +#include +#include + +int main() +{ + static_assert((std::is_same::value_type, char>::value), ""); + static_assert((std::is_same::flag_type, + std::regex_constants::syntax_option_type>::value), ""); + static_assert((std::is_same::locale_type, std::locale>::value), ""); + + static_assert((std::is_same::value_type, wchar_t>::value), ""); + static_assert((std::is_same::flag_type, + std::regex_constants::syntax_option_type>::value), ""); + static_assert((std::is_same::locale_type, std::locale>::value), ""); +} diff --git a/www/libcxx_by_chapter.pdf b/www/libcxx_by_chapter.pdf index 1ba10edc..2bc5956e 100644 Binary files a/www/libcxx_by_chapter.pdf and b/www/libcxx_by_chapter.pdf differ