diff --git a/include/regex b/include/regex index c1bacffa..7641a4a3 100644 --- a/include/regex +++ b/include/regex @@ -717,6 +717,9 @@ typedef regex_token_iterator wsregex_token_iterator; } // std */ +#include +#include + #include <__config> #include #include <__locale> @@ -1224,10 +1227,9 @@ struct __command { __end_state = -1000, __consume_input, // -999 -// __try_state, // -998 __begin_marked_expr, // -998 __end_marked_expr, // -997 - __go_back, // -996 + __pop_state, // -996 __accept_and_consume, // -995 __accept_but_not_consume, // -994 __reject, // -993 @@ -1239,7 +1241,6 @@ struct __command typedef __state<_CharT> __state; int __do_; - int __data_; const __state* first; const __state* second; @@ -1252,6 +1253,18 @@ struct __command : __do_(0), first(__s1), second(__s2) {} }; +template +ostream& +operator<<(ostream& os, const __command<_CharT>& c) +{ + os << c.__do_; + if (c.first) + os << ", " << c.first->speak(); + if (c.second) + os << ", " << c.second->speak(); + return os; +} + template class sub_match; // __state @@ -1272,6 +1285,8 @@ public: vector& __lc, sub_match* __m, regex_constants::match_flag_type __flags) const = 0; + + virtual string speak() const = 0; }; // __end_state @@ -1290,6 +1305,8 @@ public: vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const {return "end state";} }; template @@ -1359,6 +1376,8 @@ public: vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const {return "empty state";} }; template @@ -1390,6 +1409,8 @@ public: vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const {return "empty non-owning state";} }; template @@ -1457,6 +1478,16 @@ public: vector&, sub_match*, regex_constants::match_flag_type __flags) const; + + virtual string speak() const + { + ostringstream os; + os << "loop {" << __min_ << ',' << __max_ << "}"; + if (!__greedy_) + os << " not"; + os << " greedy"; + return os.str(); + } }; template @@ -1503,6 +1534,13 @@ public: vector& __lc, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "zero loop " << __loop_id_; + return os.str(); + } }; template @@ -1537,6 +1575,13 @@ public: vector& __lc, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "increment loop " << __loop_id_; + return os.str(); + } }; template @@ -1572,6 +1617,13 @@ public: vector&, sub_match* __sm, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "zero marked exprs [" << __begin_ << ',' << __end_ << ')'; + return os.str(); + } }; template @@ -1599,29 +1651,36 @@ class __begin_marked_subexpression { typedef __owns_one_state<_CharT> base; - __begin_marked_subexpression(const __begin_marked_subexpression&); - __begin_marked_subexpression& operator=(const __begin_marked_subexpression&); + unsigned __mexp_; public: typedef __command<_CharT> __command; - explicit __begin_marked_subexpression(__state<_CharT>* __s) - : base(__s) {} + explicit __begin_marked_subexpression(unsigned __mexp, __state<_CharT>* __s) + : base(__s), __mexp_(__mexp) {} virtual __command __test(const _CharT*, const _CharT*, const _CharT*, vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "begin marked expr " << __mexp_; + return os.str(); + } }; template __command<_CharT> __begin_marked_subexpression<_CharT>::__test(const _CharT*, const _CharT* __c, const _CharT*, vector&, - sub_match*, + sub_match* __s, regex_constants::match_flag_type) const { - return __command(__command::__begin_marked_expr, this->first()); + __s[__mexp_].first = __c; + return __command(__command::__accept_but_not_consume, this->first()); } // __end_marked_subexpression @@ -1632,29 +1691,37 @@ class __end_marked_subexpression { typedef __owns_one_state<_CharT> base; - __end_marked_subexpression(const __end_marked_subexpression&); - __end_marked_subexpression& operator=(const __end_marked_subexpression&); + unsigned __mexp_; public: typedef __command<_CharT> __command; - explicit __end_marked_subexpression(__state<_CharT>* __s) - : base(__s) {} + explicit __end_marked_subexpression(unsigned __mexp, __state<_CharT>* __s) + : base(__s), __mexp_(__mexp) {} virtual __command __test(const _CharT*, const _CharT*, const _CharT*, vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "end marked expr " << __mexp_; + return os.str(); + } }; template __command<_CharT> __end_marked_subexpression<_CharT>::__test(const _CharT*, const _CharT* __c, const _CharT*, vector&, - sub_match*, + sub_match* __s, regex_constants::match_flag_type) const { - return __command(__command::__end_marked_expr, this->first()); + __s[__mexp_].second = __c; + __s[__mexp_].matched = true; + return __command(__command::__accept_but_not_consume, this->first()); } // __state_arg @@ -1680,6 +1747,13 @@ public: vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "state arg " << __arg_; + return os.str(); + } }; template @@ -1715,6 +1789,13 @@ public: vector&, sub_match*, regex_constants::match_flag_type) const; + + virtual string speak() const + { + ostringstream os; + os << "match char " << __c_; + return os.str(); + } }; template @@ -1876,7 +1957,8 @@ private: template _ForwardIterator __parse_RE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last, - __owns_one_state<_CharT>* __s); + __owns_one_state<_CharT>* __s, + unsigned __mexp_begin, unsigned __mexp_end); template _ForwardIterator __parse_ERE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last); @@ -1923,8 +2005,10 @@ private: void __push_l_anchor() {} void __push_r_anchor() {} void __push_match_any() {} - void __push_greedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s) - {__push_loop(__min, numeric_limits::max(), __s);} + void __push_greedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s, + unsigned __mexp_begin = 0, unsigned __mexp_end = 0) + {__push_loop(__min, numeric_limits::max(), __s, + __mexp_begin, __mexp_end);} void __push_exact_repeat(int __count) {} void __push_loop(size_t __min, size_t __max, __owns_one_state<_CharT>* __s, size_t __mexp_begin = 0, size_t __mexp_end = 0, @@ -1969,6 +2053,7 @@ private: bool __match_at_start_posix_subs(_BidirectionalIterator __first, _BidirectionalIterator __last, match_results<_BidirectionalIterator, _Allocator>& __m, + vector& __lc, regex_constants::match_flag_type __flags) const; template @@ -2151,9 +2236,11 @@ basic_regex<_CharT, _Traits>::__parse_simple_RE(_ForwardIterator __first, if (__first != __last) { __owns_one_state<_CharT>* __e = __end_; + unsigned __mexp_begin = __marked_count_; _ForwardIterator __temp = __parse_nondupl_RE(__first, __last); if (__temp != __first) - __first = __parse_RE_dupl_symbol(__temp, __last, __e); + __first = __parse_RE_dupl_symbol(__temp, __last, __e, + __mexp_begin+1, __marked_count_+1); } return __first; } @@ -2462,13 +2549,15 @@ template _ForwardIterator basic_regex<_CharT, _Traits>::__parse_RE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last, - __owns_one_state<_CharT>* __s) + __owns_one_state<_CharT>* __s, + unsigned __mexp_begin, + unsigned __mexp_end) { if (__first != __last) { if (*__first == '*') { - __push_greedy_inf_repeat(0, __s); + __push_greedy_inf_repeat(0, __s, __mexp_begin, __mexp_end); ++__first; } else @@ -2501,7 +2590,7 @@ basic_regex<_CharT, _Traits>::__parse_RE_dupl_symbol(_ForwardIterator __first, if (__temp == __first) throw regex_error(regex_constants::error_brace); if (__max == -1) - __push_greedy_inf_repeat(__min, __s); + __push_greedy_inf_repeat(__min, __s, __mexp_end, __mexp_end); else { if (__max < __min) @@ -2834,37 +2923,26 @@ template void basic_regex<_CharT, _Traits>::__push_char(value_type __c) { - __match_char<_CharT>* __s = new __match_char<_CharT>(__c, __end_->first()); - __end_->first() = __s; - __end_ = __s; + __end_->first() = new __match_char<_CharT>(__c, __end_->first()); + __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); } template void basic_regex<_CharT, _Traits>::__push_begin_marked_subexpression() { - __begin_marked_subexpression<_CharT>* __s = - new __begin_marked_subexpression<_CharT>(__end_->first()); - __end_->first() = __s; - __end_ = __s; - __state_arg<_CharT>* __a = new __state_arg<_CharT>(++__marked_count_, + __end_->first() = new __begin_marked_subexpression<_CharT>(++__marked_count_, __end_->first()); - __end_->first() = __a; - __end_ = __a; + __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); } template void basic_regex<_CharT, _Traits>::__push_end_marked_subexpression(unsigned __sub) { - __end_marked_subexpression<_CharT>* __s = - new __end_marked_subexpression<_CharT>(__end_->first()); - __end_->first() = __s; - __end_ = __s; - __state_arg<_CharT>* __a = new __state_arg<_CharT>(++__marked_count_, - __end_->first()); - __end_->first() = __a; - __end_ = __a; + __end_->first() = new __end_marked_subexpression<_CharT>(__sub, + __end_->first()); + __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); } typedef basic_regex regex; @@ -3402,9 +3480,7 @@ match_results<_BidirectionalIterator, _Allocator>::__init(unsigned __s, __prefix_.first = __f; __prefix_.second = __f; __prefix_.matched = false; - __suffix_.first = __l; - __suffix_.second = __l; - __suffix_.matched = false; + __suffix_ = __unmatched_; } typedef match_results cmatch; @@ -3449,16 +3525,6 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_nosubs( vector& __lc, regex_constants::match_flag_type __flags) const { -/* - How do you set __m.__matches[i].first and second? - With const _CharT* [__first, __last), we need a reference - _BidirectionalIterator to bounce off of. Something like: - __m.__matches_[0].second = next(__m.__matches_[0].first, __current - __first_); - - Pre: __m.__matches_[0].first <-> __first ? or - __m.__prefix_.first <-> first and - __m.__suffix_.second <-> last ? -*/ typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; __split_buffer<__command> __commands; difference_type __j = 0; @@ -3491,8 +3557,6 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_nosubs( break; case __command::__accept_and_consume: __commands.push_front(__command(__cmd.first)); - if (__cmd.second != nullptr) - __commands.push_front(__command(__cmd.second)); break; case __command::__accept_but_not_consume: __commands.push_back(__command(__cmd.first)); @@ -3523,8 +3587,90 @@ bool basic_regex<_CharT, _Traits>::__match_at_start_posix_subs( _BidirectionalIterator __first, _BidirectionalIterator __last, match_results<_BidirectionalIterator, _Allocator>& __m, + vector& __lc, regex_constants::match_flag_type __flags) const { + typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; + vector<__command> __commands; + vector<_BidirectionalIterator> __current_stack; + vector > __saved_matches; + vector > __best_matches; + difference_type __j = 0; + difference_type __highest_j = 0; + difference_type _N = _STD::distance(__first, __last); + __state* __st = __start_.get(); + if (__st) + { + __commands.push_back(__command(__st)); + _BidirectionalIterator __current = __first; + do + { + __command __cmd = __commands.back(); + __commands.pop_back(); + if (__cmd.first != nullptr) + __cmd = __cmd.first->__test(__first, __current, __last, __lc, + __m.__matches_.data(), __flags); + switch (__cmd.__do_) + { + case __command::__end_state: + if (__highest_j < __j) + { + __highest_j = __j; + for (unsigned __i = 1; __i < __m.__matches_.size(); ++__i) + __best_matches.push_back(__m.__matches_[__i]); + } + break; + case __command::__pop_state: + for (unsigned __i = __m.__matches_.size(); __i > 1;) + { + assert(!__saved_matches.empty()); + __m.__matches_[--__i] = __saved_matches.back(); + __saved_matches.pop_back(); + } + assert(!__current_stack.empty()); + __current = __current_stack.back(); + __current_stack.pop_back(); + break; + case __command::__accept_and_consume: + __commands.push_back(__command(__cmd.first)); + if (__current != __last) + { + ++__current; + ++__j; + } + break; + case __command::__accept_but_not_consume: + if (__cmd.second != nullptr) + { + __commands.push_back(__command(__cmd.second)); + __commands.push_back(__command(__command::__pop_state)); + __current_stack.push_back(__current); + for (unsigned __i = 1; __i < __m.__matches_.size(); ++__i) + __saved_matches.push_back(__m.__matches_[__i]); + } + __commands.push_back(__command(__cmd.first)); + break; + case __command::__reject: + break; + default: + throw regex_error(regex_constants::error_temp); + break; + } + } while (!__commands.empty()); + if (__highest_j != 0) + { + __m.__matches_[0].first = __first; + __m.__matches_[0].second = _STD::next(__first, __highest_j); + __m.__matches_[0].matched = true; + for (unsigned __i = __m.__matches_.size(); __i > 1;) + { + assert(!__best_matches.empty()); + __m.__matches_[--__i] = __best_matches.back(); + __best_matches.pop_back(); + } + return true; + } + } return false; } @@ -3541,7 +3687,7 @@ basic_regex<_CharT, _Traits>::__match_at_start( return __match_at_start_ecma(__first, __last, __m, __flags); if (mark_count() == 0) return __match_at_start_posix_nosubs(__first, __last, __m, __lc, __flags); - return __match_at_start_posix_subs(__first, __last, __m, __flags); + return __match_at_start_posix_subs(__first, __last, __m, __lc, __flags); } template diff --git a/test/re/re.alg/re.alg.search/basic.pass.cpp b/test/re/re.alg/re.alg.search/basic.pass.cpp index 206ea352..56730136 100644 --- a/test/re/re.alg/re.alg.search/basic.pass.cpp +++ b/test/re/re.alg/re.alg.search/basic.pass.cpp @@ -117,26 +117,44 @@ int main() assert(m.position(0) == 0); assert(m.str(0) == s); } -// { -// std::cmatch m; -// const char s[] = "abcdefghijk"; -// assert(std::regex_search(s, m, std::regex("cd\\(\\(e\\)fg\\)hi", -// std::regex_constants::basic))); -// assert(m.size() == 3); -// assert(m.prefix().matched); -// assert(m.prefix().first == s); -// assert(m.prefix().second == m[0].first); -// assert(m.suffix().matched); -// assert(m.suffix().first == m[0].second); -// assert(m.suffix().second == s+std::regex_traits::length(s)); -// assert(m.length(0) == 7); -// assert(m.position(0) == 2); -// assert(m.str(0) == "cdefghi"); -// assert(m.length(1) == 3); -// assert(m.position(1) == 4); -// assert(m.str(1) == "efg"); -// assert(m.length(2) == 1); -// assert(m.position(2) == 4); -// assert(m.str(2) == "e"); -// } + { + std::cmatch m; + const char s[] = "ababc"; + assert(std::regex_search(s, m, std::regex("\\(ab\\)*c", std::regex_constants::basic))); + assert(m.size() == 2); + assert(!m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(!m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s+5); + assert(m.length(0) == 5); + assert(m.position(0) == 0); + assert(m.str(0) == s); + assert(m.length(1) == 2); + assert(m.position(1) == 2); + assert(m.str(1) == "ab"); + } + { + std::cmatch m; + const char s[] = "abcdefghijk"; + assert(std::regex_search(s, m, std::regex("cd\\(\\(e\\)fg\\)hi", + std::regex_constants::basic))); + assert(m.size() == 3); + assert(m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s+std::regex_traits::length(s)); + assert(m.length(0) == 7); + assert(m.position(0) == 2); + assert(m.str(0) == "cdefghi"); + assert(m.length(1) == 3); + assert(m.position(1) == 4); + assert(m.str(1) == "efg"); + assert(m.length(2) == 1); + assert(m.position(2) == 4); + assert(m.str(2) == "e"); + } }