mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-10 03:29:59 +01:00
Add character class escapes
This commit is contained in:
parent
92285bed44
commit
0dffe87551
@ -46,6 +46,7 @@ static const SizeType kRegexInvalidRange = ~SizeType(0);
|
|||||||
- \c [a-z0-9_] Character class combination
|
- \c [a-z0-9_] Character class combination
|
||||||
- \c [^abc] Negated character classes
|
- \c [^abc] Negated character classes
|
||||||
- \c [^a-c] Negated character class range
|
- \c [^a-c] Negated character class range
|
||||||
|
- \c [\b] Backspace (U+0008)
|
||||||
- \c \\| \\\\ ... Escape characters
|
- \c \\| \\\\ ... Escape characters
|
||||||
- \c \\f Form feed (U+000C)
|
- \c \\f Form feed (U+000C)
|
||||||
- \c \\n Line feed (U+000A)
|
- \c \\n Line feed (U+000A)
|
||||||
@ -265,26 +266,8 @@ private:
|
|||||||
case '\\': // Escape character
|
case '\\': // Escape character
|
||||||
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
||||||
return; // Expect an escape character
|
return; // Expect an escape character
|
||||||
switch (codepoint) {
|
if (!CharacterEscape(codepoint, &codepoint))
|
||||||
case '|':
|
return; // Unsupported escape character
|
||||||
case '(':
|
|
||||||
case ')':
|
|
||||||
case '?':
|
|
||||||
case '*':
|
|
||||||
case '+':
|
|
||||||
case '.':
|
|
||||||
case '[':
|
|
||||||
case ']':
|
|
||||||
case '\\':
|
|
||||||
break; // use the codepoint as is
|
|
||||||
case 'f': codepoint = 0x000C; break;
|
|
||||||
case 'n': codepoint = 0x000A; break;
|
|
||||||
case 'r': codepoint = 0x000D; break;
|
|
||||||
case 't': codepoint = 0x0009; break;
|
|
||||||
case 'v': codepoint = 0x000B; break;
|
|
||||||
default:
|
|
||||||
return; // Unsupported escape character
|
|
||||||
}
|
|
||||||
// fall through to default
|
// fall through to default
|
||||||
|
|
||||||
default: // Pattern character
|
default: // Pattern character
|
||||||
@ -414,9 +397,16 @@ private:
|
|||||||
SizeType current = kRegexInvalidRange;
|
SizeType current = kRegexInvalidRange;
|
||||||
unsigned codepoint;
|
unsigned codepoint;
|
||||||
while (Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
while (Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
||||||
if (isBegin && codepoint == '^')
|
if (isBegin) {
|
||||||
negate = true;
|
isBegin = false;
|
||||||
else if (codepoint == ']') {
|
if (codepoint == '^') {
|
||||||
|
negate = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (codepoint) {
|
||||||
|
case ']':
|
||||||
if (step == 2) { // Add trailing '-'
|
if (step == 2) { // Add trailing '-'
|
||||||
SizeType r = NewRange('-');
|
SizeType r = NewRange('-');
|
||||||
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
||||||
@ -426,8 +416,17 @@ private:
|
|||||||
GetRange(start).start |= kRangeNegationFlag;
|
GetRange(start).start |= kRangeNegationFlag;
|
||||||
*range = start;
|
*range = start;
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
else {
|
case '\\':
|
||||||
|
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
||||||
|
return false; // Expect an escape character
|
||||||
|
if (codepoint == 'b')
|
||||||
|
codepoint = 0x0008; // Escape backspace character
|
||||||
|
else if (!CharacterEscape(codepoint, &codepoint))
|
||||||
|
return false;
|
||||||
|
// fall through to default
|
||||||
|
|
||||||
|
default:
|
||||||
switch (step) {
|
switch (step) {
|
||||||
case 1:
|
case 1:
|
||||||
if (codepoint == '-') {
|
if (codepoint == '-') {
|
||||||
@ -454,7 +453,6 @@ private:
|
|||||||
step = 0;
|
step = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
isBegin = false;
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -466,6 +464,29 @@ private:
|
|||||||
return rangeCount_++;
|
return rangeCount_++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CharacterEscape(unsigned codepoint, unsigned* escapedCodepoint) {
|
||||||
|
switch (codepoint) {
|
||||||
|
case '|':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '?':
|
||||||
|
case '*':
|
||||||
|
case '+':
|
||||||
|
case '.':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '\\':
|
||||||
|
*escapedCodepoint = codepoint; return true;
|
||||||
|
case 'f': *escapedCodepoint = 0x000C; return true;
|
||||||
|
case 'n': *escapedCodepoint = 0x000A; return true;
|
||||||
|
case 'r': *escapedCodepoint = 0x000D; return true;
|
||||||
|
case 't': *escapedCodepoint = 0x0009; return true;
|
||||||
|
case 'v': *escapedCodepoint = 0x000B; return true;
|
||||||
|
default:
|
||||||
|
return false; // Unsupported escape character
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Stack<Allocator> states_;
|
Stack<Allocator> states_;
|
||||||
Stack<Allocator> ranges_;
|
Stack<Allocator> ranges_;
|
||||||
SizeType root_;
|
SizeType root_;
|
||||||
|
@ -328,10 +328,10 @@ TEST(Regex, CharacterRange8) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, Escape) {
|
TEST(Regex, Escape) {
|
||||||
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v";
|
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||||
Regex re(s);
|
Regex re(s);
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B"));
|
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B\b[]"));
|
||||||
EXPECT_FALSE(re.Match(s)); // Not escaping
|
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user