From e9de5ff443e7f5ff297f867442c0a9352063007f Mon Sep 17 00:00:00 2001 From: Howard Hinnant Date: Tue, 27 Jul 2010 22:20:32 +0000 Subject: [PATCH] lookahead for ecma git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@109548 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/regex | 178 ++++++++++++--------- test/re/re.alg/re.alg.search/ecma.pass.cpp | 62 +++++++ 2 files changed, 163 insertions(+), 77 deletions(-) diff --git a/include/regex b/include/regex index 4d132a12..14a0e4bc 100644 --- a/include/regex +++ b/include/regex @@ -2416,57 +2416,7 @@ __exit: } } -// __lookahead - -template -class __lookahead - : public __owns_one_state<_CharT> -{ - typedef __owns_one_state<_CharT> base; - - _Traits __traits_; - bool __invert_; - - __lookahead(const __lookahead&); - __lookahead& operator=(const __lookahead&); -public: - typedef _STD::__state<_CharT> __state; - - __lookahead(const _Traits& __traits, bool __invert, __node<_CharT>* __s) - : base(__s), __traits_(__traits), __invert_(__invert) {} - - virtual void __exec(__state&) const; - - virtual string speak() const - { - ostringstream os; - if (__invert_) - os << "lookahead"; - else - os << "not lookahead"; - return os.str(); - } -}; - -template -void -__lookahead<_CharT, _Traits>::__exec(__state& __s) const -{ -// match_results __m; -// __m.__init(1 + mark_count(), __s.__current_, __s.__last_); -// bool __matched = __exp_.__match_at_start_ecma(__s.__current_, __s.__last_, -// __m, __s.__flags_); -// if (__matched != __invert_) -// { -// __s.__do_ = __state::__accept_but_not_consume; -// __s.__node_ = this->first(); -// } -// else -// { -// __s.__do_ = __state::__reject; -// __s.__node_ = nullptr; -// } -} +template class __lookahead; template > class basic_regex @@ -2516,10 +2466,8 @@ public: : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), __end_(0), __left_anchor_(false) {__parse(__p, __p + __len);} - basic_regex(const basic_regex&); -#ifdef _LIBCPP_MOVE - basic_regex(basic_regex&&); -#endif +// basic_regex(const basic_regex&) = default; +// basic_regex(basic_regex&&) = default; template explicit basic_regex(const basic_string& __p, flag_type __f = regex_constants::ECMAScript) @@ -2540,10 +2488,8 @@ public: ~basic_regex(); - basic_regex& operator=(const basic_regex&); -#ifdef _LIBCPP_MOVE - basic_regex& operator=(basic_regex&&); -#endif +// basic_regex& operator=(const basic_regex&) = default; +// basic_regex& operator=(basic_regex&&) = default; basic_regex& operator=(const value_type* __p); basic_regex& operator=(initializer_list __il); template @@ -2580,7 +2526,8 @@ private: unsigned __loop_count() const {return __loop_count_;} template - void __parse(_ForwardIterator __first, _ForwardIterator __last); + _ForwardIterator + __parse(_ForwardIterator __first, _ForwardIterator __last); template _ForwardIterator __parse_basic_reg_exp(_ForwardIterator __first, _ForwardIterator __last); @@ -2732,7 +2679,7 @@ private: void __push_end_marked_subexpression(unsigned); void __push_empty(); void __push_word_boundary(bool); - void __push_lookahead(bool) {} + void __push_lookahead(const basic_regex&, bool); template bool @@ -2805,6 +2752,8 @@ private: match_results::const_iterator, _A>&, const basic_regex<_C, _T>& __e, regex_constants::match_flag_type __flags); + + template friend class __lookahead; }; template @@ -2812,9 +2761,61 @@ basic_regex<_CharT, _Traits>::~basic_regex() { } +// __lookahead + +template +class __lookahead + : public __owns_one_state<_CharT> +{ + typedef __owns_one_state<_CharT> base; + + basic_regex<_CharT, _Traits> __exp_; + bool __invert_; + + __lookahead(const __lookahead&); + __lookahead& operator=(const __lookahead&); +public: + typedef _STD::__state<_CharT> __state; + + __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s) + : base(__s), __exp_(__exp), __invert_(__invert) {} + + virtual void __exec(__state&) const; + + virtual string speak() const + { + ostringstream os; + if (__invert_) + os << "not lookahead"; + else + os << "lookahead"; + return os.str(); + } +}; + +template +void +__lookahead<_CharT, _Traits>::__exec(__state& __s) const +{ + match_results __m; + __m.__init(1 + __exp_.mark_count(), __s.__current_, __s.__last_); + bool __matched = __exp_.__match_at_start_ecma(__s.__current_, __s.__last_, + __m, __s.__flags_); + if (__matched != __invert_) + { + __s.__do_ = __state::__accept_but_not_consume; + __s.__node_ = this->first(); + } + else + { + __s.__do_ = __state::__reject; + __s.__node_ = nullptr; + } +} + template template -void +_ForwardIterator basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first, _ForwardIterator __last) { @@ -2827,25 +2828,26 @@ basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first, switch (__flags_ & 0x1F0) { case ECMAScript: - __parse_ecma_exp(__first, __last); + __first = __parse_ecma_exp(__first, __last); break; case basic: - __parse_basic_reg_exp(__first, __last); + __first = __parse_basic_reg_exp(__first, __last); break; case extended: - __parse_extended_reg_exp(__first, __last); + __first = __parse_extended_reg_exp(__first, __last); break; case awk: break; case grep: - __parse_grep(__first, __last); + __first = __parse_grep(__first, __last); break; case egrep: - __parse_egrep(__first, __last); + __first = __parse_egrep(__first, __last); break; default: throw regex_error(regex_constants::__re_err_grammar); } + return __first; } template @@ -3816,18 +3818,28 @@ basic_regex<_CharT, _Traits>::__parse_assertion(_ForwardIterator __first, switch (*__temp) { case '=': - __push_lookahead(false); - __temp = __parse_ecma_exp(++__temp, __last); - if (__temp == __last || *__temp != ')') - throw regex_error(regex_constants::error_paren); - __first = ++__temp; + { + basic_regex __exp; + __exp.__flags_ = __flags_; + __temp = __exp.__parse(++__temp, __last); + __exp.__push_l_anchor(); + __push_lookahead(_STD::move(__exp), false); + if (__temp == __last || *__temp != ')') + throw regex_error(regex_constants::error_paren); + __first = ++__temp; + } break; case '!': - __push_lookahead(true); - __temp = __parse_ecma_exp(++__temp, __last); - if (__temp == __last || *__temp != ')') - throw regex_error(regex_constants::error_paren); - __first = ++__temp; + { + basic_regex __exp; + __exp.__flags_ = __flags_; + __temp = __exp.__parse(++__temp, __last); + __exp.__push_l_anchor(); + __push_lookahead(_STD::move(__exp), true); + if (__temp == __last || *__temp != ')') + throw regex_error(regex_constants::error_paren); + __first = ++__temp; + } break; } } @@ -4321,6 +4333,16 @@ basic_regex<_CharT, _Traits>::__start_matching_list(bool __negate) return __r; } +template +void +basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp, + bool __invert) +{ + __end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert, + __end_->first()); + __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); +} + typedef basic_regex regex; typedef basic_regex wregex; @@ -4854,6 +4876,8 @@ private: bool regex_match(_B, _B, match_results<_B, _A>&, const basic_regex<_C, _T>&, regex_constants::match_flag_type); + + template friend class __lookahead; }; template diff --git a/test/re/re.alg/re.alg.search/ecma.pass.cpp b/test/re/re.alg/re.alg.search/ecma.pass.cpp index 03025699..ed8f54a9 100644 --- a/test/re/re.alg/re.alg.search/ecma.pass.cpp +++ b/test/re/re.alg/re.alg.search/ecma.pass.cpp @@ -16,6 +16,8 @@ // const basic_regex& e, // regex_constants::match_flag_type flags = regex_constants::match_default); +#include + #include #include @@ -756,6 +758,36 @@ int main() assert(m.position(0) == 0); assert(m.str(0) == s); } + { + std::cmatch m; + const char s[] = "Jeff Jeffs "; + assert(std::regex_search(s, m, std::regex("Jeff(?=s\\b)"))); + assert(m.size() == 1); + assert(m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s + std::char_traits::length(s)); + assert(m.length(0) == 4); + assert(m.position(0) == 5); + assert(m.str(0) == "Jeff"); + } + { + std::cmatch m; + const char s[] = "Jeffs Jeff"; + assert(std::regex_search(s, m, std::regex("Jeff(?!s\\b)"))); + assert(m.size() == 1); + assert(m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(!m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s + std::char_traits::length(s)); + assert(m.length(0) == 4); + assert(m.position(0) == 6); + assert(m.str(0) == "Jeff"); + } { std::wcmatch m; @@ -1490,4 +1522,34 @@ int main() assert(m.position(0) == 0); assert(m.str(0) == s); } + { + std::wcmatch m; + const wchar_t s[] = L"Jeff Jeffs "; + assert(std::regex_search(s, m, std::wregex(L"Jeff(?=s\\b)"))); + assert(m.size() == 1); + assert(m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s + std::char_traits::length(s)); + assert(m.length(0) == 4); + assert(m.position(0) == 5); + assert(m.str(0) == L"Jeff"); + } + { + std::wcmatch m; + const wchar_t s[] = L"Jeffs Jeff"; + assert(std::regex_search(s, m, std::wregex(L"Jeff(?!s\\b)"))); + assert(m.size() == 1); + assert(m.prefix().matched); + assert(m.prefix().first == s); + assert(m.prefix().second == m[0].first); + assert(!m.suffix().matched); + assert(m.suffix().first == m[0].second); + assert(m.suffix().second == s + std::char_traits::length(s)); + assert(m.length(0) == 4); + assert(m.position(0) == 6); + assert(m.str(0) == L"Jeff"); + } }