mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-09 19:24:23 +01:00
Add parenthesis support in regex
This commit is contained in:
parent
0bef29a5f6
commit
05c79891d1
@ -90,6 +90,12 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
enum Operator {
|
||||
kConcatenation,
|
||||
kAlternation,
|
||||
kLeftParenthesis,
|
||||
};
|
||||
|
||||
struct State {
|
||||
SizeType out; //!< Equals to kInvalid for match
|
||||
SizeType out1; //!< Equals to non-kInvalid for split
|
||||
@ -155,52 +161,96 @@ private:
|
||||
void Parse(InputStream& is) {
|
||||
Allocator allocator;
|
||||
Stack<Allocator> operandStack(&allocator, 256); // Frag
|
||||
Stack<Allocator> operatorStack(&allocator, 256); // char
|
||||
Stack<Allocator> operatorStack(&allocator, 256); // Operator
|
||||
Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
|
||||
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
|
||||
unsigned codepoint;
|
||||
bool previousOperand = false;
|
||||
while (Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
||||
switch (codepoint) {
|
||||
case '|':
|
||||
*operatorStack.template Push<char>() = '|';
|
||||
previousOperand = false;
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
*operatorStack.template Push<Operator>() = kAlternation;
|
||||
*atomCountStack.template Top<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
*operatorStack.template Push<Operator>() = kLeftParenthesis;
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
if (operatorStack.Empty())
|
||||
return;
|
||||
operatorStack.template Pop<Operator>(1);
|
||||
atomCountStack.template Pop<unsigned>(1);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
default:
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
// concatenation with previous operand
|
||||
if (previousOperand) {
|
||||
Frag* e = operandStack.template Top<Frag>();
|
||||
Patch(e->out, s);
|
||||
e->out = s;
|
||||
}
|
||||
else
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
previousOperand = true;
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
}
|
||||
}
|
||||
|
||||
while (!operatorStack.Empty()) {
|
||||
switch (*operatorStack.template Pop<char>(1)) {
|
||||
case '|':
|
||||
{
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(e1.start, e2.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (!operatorStack.Empty())
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
|
||||
// Link the operand to matching state.
|
||||
if (operandStack.GetSize() == sizeof(Frag)) {
|
||||
Frag* e = operandStack.template Pop<Frag>(1);
|
||||
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
||||
root_ = e->start;
|
||||
// printf("root: %d\n", root_);
|
||||
// for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||
// State& s = GetState(i);
|
||||
// printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||
// }
|
||||
// printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
|
||||
switch (*operatorStack.template Pop<Operator>(1)) {
|
||||
case kConcatenation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
Patch(e1.out, e2.start);
|
||||
*operandStack.template Push<Frag>() = Frag(e1.start, e2.out);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kAlternation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(e1.start, e2.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
|
||||
if (*atomCountStack.template Top<unsigned>())
|
||||
*operatorStack.template Push<Operator>() = kConcatenation;
|
||||
(*atomCountStack.template Top<unsigned>())++;
|
||||
}
|
||||
|
||||
Stack<Allocator> states_;
|
||||
SizeType root_;
|
||||
SizeType stateCount_;
|
||||
|
@ -19,6 +19,7 @@ using namespace rapidjson::internal;
|
||||
|
||||
TEST(Regex, concatenation) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
@ -27,24 +28,59 @@ TEST(Regex, concatenation) {
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, split) {
|
||||
{
|
||||
Regex re("abab|abbb");
|
||||
EXPECT_TRUE(re.Match("abab"));
|
||||
EXPECT_TRUE(re.Match("abbb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("ababa"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
EXPECT_FALSE(re.Match("abbbb"));
|
||||
}
|
||||
{
|
||||
Regex re("a|b|c");
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
TEST(Regex, split1) {
|
||||
Regex re("abab|abbb");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abab"));
|
||||
EXPECT_TRUE(re.Match("abbb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("ababa"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
EXPECT_FALSE(re.Match("abbbb"));
|
||||
}
|
||||
|
||||
TEST(Regex, split2) {
|
||||
Regex re("a|b|c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis1) {
|
||||
Regex re("(ab)c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis2) {
|
||||
Regex re("a(bc)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis3) {
|
||||
Regex re("(a|b)(c|d)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ac"));
|
||||
EXPECT_TRUE(re.Match("ad"));
|
||||
EXPECT_TRUE(re.Match("bc"));
|
||||
EXPECT_TRUE(re.Match("bd"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("cd"));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user