mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-09 19:24:23 +01:00
Add character class escapes
This commit is contained in:
parent
92285bed44
commit
0dffe87551
@ -46,6 +46,7 @@ static const SizeType kRegexInvalidRange = ~SizeType(0);
|
||||
- \c [a-z0-9_] Character class combination
|
||||
- \c [^abc] Negated character classes
|
||||
- \c [^a-c] Negated character class range
|
||||
- \c [\b] Backspace (U+0008)
|
||||
- \c \\| \\\\ ... Escape characters
|
||||
- \c \\f Form feed (U+000C)
|
||||
- \c \\n Line feed (U+000A)
|
||||
@ -265,26 +266,8 @@ private:
|
||||
case '\\': // Escape character
|
||||
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
||||
return; // Expect an escape character
|
||||
switch (codepoint) {
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '\\':
|
||||
break; // use the codepoint as is
|
||||
case 'f': codepoint = 0x000C; break;
|
||||
case 'n': codepoint = 0x000A; break;
|
||||
case 'r': codepoint = 0x000D; break;
|
||||
case 't': codepoint = 0x0009; break;
|
||||
case 'v': codepoint = 0x000B; break;
|
||||
default:
|
||||
return; // Unsupported escape character
|
||||
}
|
||||
if (!CharacterEscape(codepoint, &codepoint))
|
||||
return; // Unsupported escape character
|
||||
// fall through to default
|
||||
|
||||
default: // Pattern character
|
||||
@ -414,9 +397,16 @@ private:
|
||||
SizeType current = kRegexInvalidRange;
|
||||
unsigned codepoint;
|
||||
while (Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
||||
if (isBegin && codepoint == '^')
|
||||
negate = true;
|
||||
else if (codepoint == ']') {
|
||||
if (isBegin) {
|
||||
isBegin = false;
|
||||
if (codepoint == '^') {
|
||||
negate = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
switch (codepoint) {
|
||||
case ']':
|
||||
if (step == 2) { // Add trailing '-'
|
||||
SizeType r = NewRange('-');
|
||||
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
||||
@ -426,8 +416,17 @@ private:
|
||||
GetRange(start).start |= kRangeNegationFlag;
|
||||
*range = start;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
|
||||
case '\\':
|
||||
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
||||
return false; // Expect an escape character
|
||||
if (codepoint == 'b')
|
||||
codepoint = 0x0008; // Escape backspace character
|
||||
else if (!CharacterEscape(codepoint, &codepoint))
|
||||
return false;
|
||||
// fall through to default
|
||||
|
||||
default:
|
||||
switch (step) {
|
||||
case 1:
|
||||
if (codepoint == '-') {
|
||||
@ -454,7 +453,6 @@ private:
|
||||
step = 0;
|
||||
}
|
||||
}
|
||||
isBegin = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -466,6 +464,29 @@ private:
|
||||
return rangeCount_++;
|
||||
}
|
||||
|
||||
bool CharacterEscape(unsigned codepoint, unsigned* escapedCodepoint) {
|
||||
switch (codepoint) {
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '\\':
|
||||
*escapedCodepoint = codepoint; return true;
|
||||
case 'f': *escapedCodepoint = 0x000C; return true;
|
||||
case 'n': *escapedCodepoint = 0x000A; return true;
|
||||
case 'r': *escapedCodepoint = 0x000D; return true;
|
||||
case 't': *escapedCodepoint = 0x0009; return true;
|
||||
case 'v': *escapedCodepoint = 0x000B; return true;
|
||||
default:
|
||||
return false; // Unsupported escape character
|
||||
}
|
||||
}
|
||||
|
||||
Stack<Allocator> states_;
|
||||
Stack<Allocator> ranges_;
|
||||
SizeType root_;
|
||||
|
@ -328,10 +328,10 @@ TEST(Regex, CharacterRange8) {
|
||||
}
|
||||
|
||||
TEST(Regex, Escape) {
|
||||
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v";
|
||||
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||
Regex re(s);
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B"));
|
||||
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B\b[]"));
|
||||
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user