Fixed some bugs in the ecma bracket epression regarding escaped characters, and got the awk grammar going.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@109599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Howard Hinnant 2010-07-28 17:35:27 +00:00
parent e9de5ff443
commit 15476f345d
3 changed files with 1845 additions and 24 deletions

View File

@ -2194,10 +2194,12 @@ class __bracket_expression
_Traits __traits_; _Traits __traits_;
vector<_CharT> __chars_; vector<_CharT> __chars_;
vector<_CharT> __neg_chars_;
vector<pair<string_type, string_type> > __ranges_; vector<pair<string_type, string_type> > __ranges_;
vector<pair<_CharT, _CharT> > __digraphs_; vector<pair<_CharT, _CharT> > __digraphs_;
vector<string_type> __equivalences_; vector<string_type> __equivalences_;
ctype_base::mask __mask_; ctype_base::mask __mask_;
ctype_base::mask __neg_mask_;
bool __negate_; bool __negate_;
bool __icase_; bool __icase_;
bool __collate_; bool __collate_;
@ -2210,12 +2212,14 @@ public:
__bracket_expression(const _Traits& __traits, __node<_CharT>* __s, __bracket_expression(const _Traits& __traits, __node<_CharT>* __s,
bool __negate, bool __icase, bool __collate) bool __negate, bool __icase, bool __collate)
: base(__s), __traits_(__traits), __mask_(), __negate_(__negate), : base(__s), __traits_(__traits), __mask_(), __neg_mask_(),
__icase_(__icase), __collate_(__collate), __negate_(__negate), __icase_(__icase), __collate_(__collate),
__might_have_digraph_(__traits_.getloc().name() != "C") {} __might_have_digraph_(__traits_.getloc().name() != "C") {}
virtual void __exec(__state&) const; virtual void __exec(__state&) const;
bool __negated() const {return __negate_;}
void __add_char(_CharT __c) void __add_char(_CharT __c)
{ {
if (__icase_) if (__icase_)
@ -2225,6 +2229,15 @@ public:
else else
__chars_.push_back(__c); __chars_.push_back(__c);
} }
void __add_neg_char(_CharT __c)
{
if (__icase_)
__neg_chars_.push_back(__traits_.translate_nocase(__c));
else if (__collate_)
__neg_chars_.push_back(__traits_.translate(__c));
else
__neg_chars_.push_back(__c);
}
void __add_range(string_type __b, string_type __e) void __add_range(string_type __b, string_type __e)
{ {
if (__collate_) if (__collate_)
@ -2274,6 +2287,8 @@ public:
{__equivalences_.push_back(__s);} {__equivalences_.push_back(__s);}
void __add_class(ctype_base::mask __mask) void __add_class(ctype_base::mask __mask)
{__mask_ |= __mask;} {__mask_ |= __mask;}
void __add_neg_class(ctype_base::mask __mask)
{__neg_mask_ |= __mask;}
virtual string speak() const virtual string speak() const
{ {
@ -2353,6 +2368,12 @@ __bracket_expression<_CharT, _Traits>::__exec(__state& __s) const
__found = true; __found = true;
goto __exit; goto __exit;
} }
if (!__traits_.isctype(__ch2.first, __neg_mask_) &&
!__traits_.isctype(__ch2.second, __neg_mask_))
{
__found = true;
goto __exit;
}
goto __exit; goto __exit;
} }
} }
@ -2371,6 +2392,17 @@ __bracket_expression<_CharT, _Traits>::__exec(__state& __s) const
goto __exit; goto __exit;
} }
} }
if (!__neg_chars_.empty())
{
for (size_t __i = 0; __i < __neg_chars_.size(); ++__i)
{
if (__ch == __neg_chars_[__i])
goto __is_neg_char;
}
__found = true;
goto __exit;
}
__is_neg_char:
if (!__ranges_.empty()) if (!__ranges_.empty())
{ {
string_type __s2 = __collate_ ? string_type __s2 = __collate_ ?
@ -2398,7 +2430,15 @@ __bracket_expression<_CharT, _Traits>::__exec(__state& __s) const
} }
} }
if (__traits_.isctype(__ch, __mask_)) if (__traits_.isctype(__ch, __mask_))
{
__found = true; __found = true;
goto __exit;
}
if (__neg_mask_ && !__traits_.isctype(__ch, __neg_mask_))
{
__found = true;
goto __exit;
}
} }
else else
__found = __negate_; // force reject __found = __negate_; // force reject
@ -2644,7 +2684,8 @@ private:
__parse_character_class_escape(_ForwardIterator __first, _ForwardIterator __last); __parse_character_class_escape(_ForwardIterator __first, _ForwardIterator __last);
template <class _ForwardIterator> template <class _ForwardIterator>
_ForwardIterator _ForwardIterator
__parse_character_escape(_ForwardIterator __first, _ForwardIterator __last); __parse_character_escape(_ForwardIterator __first, _ForwardIterator __last,
basic_string<_CharT>* __str = nullptr);
template <class _ForwardIterator> template <class _ForwardIterator>
_ForwardIterator _ForwardIterator
__parse_pattern_character(_ForwardIterator __first, _ForwardIterator __last); __parse_pattern_character(_ForwardIterator __first, _ForwardIterator __last);
@ -2654,6 +2695,15 @@ private:
template <class _ForwardIterator> template <class _ForwardIterator>
_ForwardIterator _ForwardIterator
__parse_egrep(_ForwardIterator __first, _ForwardIterator __last); __parse_egrep(_ForwardIterator __first, _ForwardIterator __last);
template <class _ForwardIterator>
_ForwardIterator
__parse_class_escape(_ForwardIterator __first, _ForwardIterator __last,
basic_string<_CharT>& __str,
__bracket_expression<_CharT, _Traits>* __ml);
template <class _ForwardIterator>
_ForwardIterator
__parse_awk_escape(_ForwardIterator __first, _ForwardIterator __last,
basic_string<_CharT>* __str = nullptr);
void __push_l_anchor() {__left_anchor_ = true;} void __push_l_anchor() {__left_anchor_ = true;}
void __push_r_anchor(); void __push_r_anchor();
@ -2834,9 +2884,8 @@ basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first,
__first = __parse_basic_reg_exp(__first, __last); __first = __parse_basic_reg_exp(__first, __last);
break; break;
case extended: case extended:
__first = __parse_extended_reg_exp(__first, __last);
break;
case awk: case awk:
__first = __parse_extended_reg_exp(__first, __last);
break; break;
case grep: case grep:
__first = __parse_grep(__first, __last); __first = __parse_grep(__first, __last);
@ -3289,6 +3338,10 @@ basic_regex<_CharT, _Traits>::__parse_QUOTED_CHAR_ERE(_ForwardIterator __first,
__push_char(*__temp); __push_char(*__temp);
__first = ++__temp; __first = ++__temp;
break; break;
default:
if ((__flags_ & 0x1F0) == awk)
__first = __parse_awk_escape(++__first, __last);
break;
} }
} }
} }
@ -3488,7 +3541,7 @@ basic_regex<_CharT, _Traits>::__parse_bracket_expression(_ForwardIterator __firs
// __ml owned by *this // __ml owned by *this
if (__first == __last) if (__first == __last)
throw regex_error(regex_constants::error_brack); throw regex_error(regex_constants::error_brack);
if (*__first == ']') if ((__flags_ & 0x1F0) != ECMAScript && *__first == ']')
{ {
__ml->__add_char(']'); __ml->__add_char(']');
++__first; ++__first;
@ -3538,7 +3591,6 @@ basic_regex<_CharT, _Traits>::__parse_expression_term(_ForwardIterator __first,
{ {
if (__first != __last && *__first != ']') if (__first != __last && *__first != ']')
{ {
bool __parsed_one = false;
_ForwardIterator __temp = next(__first); _ForwardIterator __temp = next(__first);
basic_string<_CharT> __start_range; basic_string<_CharT> __start_range;
if (__temp != __last && *__first == '[') if (__temp != __last && *__first == '[')
@ -3548,15 +3600,23 @@ basic_regex<_CharT, _Traits>::__parse_expression_term(_ForwardIterator __first,
else if (*__temp == ':') else if (*__temp == ':')
return __parse_character_class(++__temp, __last, __ml); return __parse_character_class(++__temp, __last, __ml);
else if (*__temp == '.') else if (*__temp == '.')
{
__first = __parse_collating_symbol(++__temp, __last, __start_range); __first = __parse_collating_symbol(++__temp, __last, __start_range);
__parsed_one = true;
}
} }
if (!__parsed_one) unsigned __grammar = __flags_ & 0x1F0;
if (__start_range.empty())
{ {
__start_range = *__first; if ((__grammar == ECMAScript || __grammar == awk) && *__first == '\\')
++__first; {
if (__grammar == ECMAScript)
__first = __parse_class_escape(++__first, __last, __start_range, __ml);
else
__first = __parse_awk_escape(++__first, __last, &__start_range);
}
else
{
__start_range = *__first;
++__first;
}
} }
if (__first != __last && *__first != ']') if (__first != __last && *__first != ']')
{ {
@ -3571,8 +3631,20 @@ basic_regex<_CharT, _Traits>::__parse_expression_term(_ForwardIterator __first,
__first = __parse_collating_symbol(++__temp, __last, __end_range); __first = __parse_collating_symbol(++__temp, __last, __end_range);
else else
{ {
__end_range = *__first; if ((__grammar == ECMAScript || __grammar == awk) && *__first == '\\')
++__first; {
if (__grammar == ECMAScript)
__first = __parse_class_escape(++__first, __last,
__end_range, __ml);
else
__first = __parse_awk_escape(++__first, __last,
&__end_range);
}
else
{
__end_range = *__first;
++__first;
}
} }
__ml->__add_range(_STD::move(__start_range), _STD::move(__end_range)); __ml->__add_range(_STD::move(__start_range), _STD::move(__end_range));
} }
@ -3595,6 +3667,130 @@ basic_regex<_CharT, _Traits>::__parse_expression_term(_ForwardIterator __first,
return __first; return __first;
} }
template <class _CharT, class _Traits>
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_class_escape(_ForwardIterator __first,
_ForwardIterator __last,
basic_string<_CharT>& __str,
__bracket_expression<_CharT, _Traits>* __ml)
{
if (__first == __last)
throw regex_error(regex_constants::error_escape);
switch (*__first)
{
case 0:
__str = *__first;
return ++__first;
case 'b':
__str = _CharT(8);
return ++__first;
case 'd':
__ml->__add_class(ctype_base::digit);
return ++__first;
case 'D':
__ml->__add_neg_class(ctype_base::digit);
return ++__first;
case 's':
__ml->__add_class(ctype_base::space);
return ++__first;
case 'S':
__ml->__add_neg_class(ctype_base::space);
return ++__first;
case 'w':
__ml->__add_class(ctype_base::alnum);
__ml->__add_char('_');
return ++__first;
case 'W':
__ml->__add_neg_class(ctype_base::alnum);
__ml->__add_neg_char('_');
return ++__first;
}
__first = __parse_character_escape(__first, __last, &__str);
return __first;
}
template <class _CharT, class _Traits>
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_awk_escape(_ForwardIterator __first,
_ForwardIterator __last,
basic_string<_CharT>* __str)
{
if (__first == __last)
throw regex_error(regex_constants::error_escape);
switch (*__first)
{
case '\\':
case '"':
case '/':
if (__str)
*__str = *__first;
else
__push_char(*__first);
return ++__first;
case 'a':
if (__str)
*__str = _CharT(7);
else
__push_char(_CharT(7));
return ++__first;
case 'b':
if (__str)
*__str = _CharT(8);
else
__push_char(_CharT(8));
return ++__first;
case 'f':
if (__str)
*__str = _CharT(0xC);
else
__push_char(_CharT(0xC));
return ++__first;
case 'n':
if (__str)
*__str = _CharT(0xA);
else
__push_char(_CharT(0xA));
return ++__first;
case 'r':
if (__str)
*__str = _CharT(0xD);
else
__push_char(_CharT(0xD));
return ++__first;
case 't':
if (__str)
*__str = _CharT(0x9);
else
__push_char(_CharT(0x9));
return ++__first;
case 'v':
if (__str)
*__str = _CharT(0xB);
else
__push_char(_CharT(0xB));
return ++__first;
}
if ('0' <= *__first && *__first <= '7')
{
unsigned __val = *__first - '0';
if (++__first != __last && ('0' <= *__first && *__first <= '7'))
{
__val = 8 * __val + *__first - '0';
if (++__first != __last && ('0' <= *__first && *__first <= '7'))
__val = 8 * __val + *__first - '0';
}
if (__str)
*__str = _CharT(__val);
else
__push_char(_CharT(__val));
}
else
throw regex_error(regex_constants::error_escape);
return __first;
}
template <class _CharT, class _Traits> template <class _CharT, class _Traits>
template <class _ForwardIterator> template <class _ForwardIterator>
_ForwardIterator _ForwardIterator
@ -4013,7 +4209,8 @@ template <class _CharT, class _Traits>
template <class _ForwardIterator> template <class _ForwardIterator>
_ForwardIterator _ForwardIterator
basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first, basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
_ForwardIterator __last) _ForwardIterator __last,
basic_string<_CharT>* __str)
{ {
if (__first != __last) if (__first != __last)
{ {
@ -4023,23 +4220,38 @@ basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
switch (*__first) switch (*__first)
{ {
case 'f': case 'f':
__push_char(_CharT(0xC)); if (__str)
*__str = _CharT(0xC);
else
__push_char(_CharT(0xC));
++__first; ++__first;
break; break;
case 'n': case 'n':
__push_char(_CharT(0xA)); if (__str)
*__str = _CharT(0xA);
else
__push_char(_CharT(0xA));
++__first; ++__first;
break; break;
case 'r': case 'r':
__push_char(_CharT(0xD)); if (__str)
*__str = _CharT(0xD);
else
__push_char(_CharT(0xD));
++__first; ++__first;
break; break;
case 't': case 't':
__push_char(_CharT(0x9)); if (__str)
*__str = _CharT(0x9);
else
__push_char(_CharT(0x9));
++__first; ++__first;
break; break;
case 'v': case 'v':
__push_char(_CharT(0xB)); if (__str)
*__str = _CharT(0xB);
else
__push_char(_CharT(0xB));
++__first; ++__first;
break; break;
case 'c': case 'c':
@ -4047,7 +4259,10 @@ basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
{ {
if ('A' <= *__t <= 'Z' || 'a' <= *__t <= 'z') if ('A' <= *__t <= 'Z' || 'a' <= *__t <= 'z')
{ {
__push_char(_CharT(*__t % 32)); if (__str)
*__str = _CharT(*__t % 32);
else
__push_char(_CharT(*__t % 32));
__first = ++__t; __first = ++__t;
} }
} }
@ -4079,15 +4294,23 @@ basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
if (__hd == -1) if (__hd == -1)
throw regex_error(regex_constants::error_escape); throw regex_error(regex_constants::error_escape);
__sum = 16 * __sum + __hd; __sum = 16 * __sum + __hd;
__push_char(_CharT(__sum)); if (__str)
*__str = _CharT(__sum);
else
__push_char(_CharT(__sum));
++__first; ++__first;
break; break;
default: default:
if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum)) if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum))
{ {
__push_char(*__first); if (__str)
*__str = *__first;
else
__push_char(*__first);
++__first; ++__first;
} }
else if (__str)
throw regex_error(regex_constants::error_escape);
break; break;
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -788,6 +788,21 @@ int main()
assert(m.position(0) == 6); assert(m.position(0) == 6);
assert(m.str(0) == "Jeff"); assert(m.str(0) == "Jeff");
} }
{
std::cmatch m;
const char s[] = "5%k";
assert(std::regex_search(s, m, std::regex("\\d[\\W]k")));
assert(m.size() == 1);
assert(!m.prefix().matched);
assert(m.prefix().first == s);
assert(m.prefix().second == m[0].first);
assert(!m.suffix().matched);
assert(m.suffix().first == m[0].second);
assert(m.suffix().second == s + std::char_traits<char>::length(s));
assert(m.length(0) == std::char_traits<char>::length(s));
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
{ {
std::wcmatch m; std::wcmatch m;
@ -1552,4 +1567,19 @@ int main()
assert(m.position(0) == 6); assert(m.position(0) == 6);
assert(m.str(0) == L"Jeff"); assert(m.str(0) == L"Jeff");
} }
{
std::wcmatch m;
const wchar_t s[] = L"5%k";
assert(std::regex_search(s, m, std::wregex(L"\\d[\\W]k")));
assert(m.size() == 1);
assert(!m.prefix().matched);
assert(m.prefix().first == s);
assert(m.prefix().second == m[0].first);
assert(!m.suffix().matched);
assert(m.suffix().first == m[0].second);
assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
assert(m.length(0) == std::char_traits<wchar_t>::length(s));
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
} }