mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-09 19:24:23 +01:00
Add ?*+ to regex
This commit is contained in:
parent
05c79891d1
commit
a386934288
@ -54,11 +54,12 @@ public:
|
||||
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
|
||||
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
|
||||
AddState(stateSet, *current, root_);
|
||||
|
||||
unsigned codepoint;
|
||||
while (!current->Empty() && Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
next->Clear();
|
||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||
const State& sr = GetState(*s);
|
||||
// if (sr.out != kRegexInvalidState)
|
||||
@ -70,8 +71,6 @@ public:
|
||||
Stack<Allocator>* temp = current;
|
||||
current = next;
|
||||
next = temp;
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
next->Clear();
|
||||
// printf("\n");
|
||||
}
|
||||
|
||||
@ -91,9 +90,12 @@ public:
|
||||
|
||||
private:
|
||||
enum Operator {
|
||||
kZeroOrOne,
|
||||
kZeroOrMore,
|
||||
kOneOrMore,
|
||||
kConcatenation,
|
||||
kAlternation,
|
||||
kLeftParenthesis,
|
||||
kLeftParenthesis
|
||||
};
|
||||
|
||||
struct State {
|
||||
@ -193,6 +195,24 @@ private:
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
*operatorStack.template Push<Operator>() = kZeroOrOne;
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
*operatorStack.template Push<Operator>() = kZeroOrMore;
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '+':
|
||||
*operatorStack.template Push<Operator>() = kOneOrMore;
|
||||
if (!Eval(operandStack, operatorStack))
|
||||
return;
|
||||
break;
|
||||
|
||||
default:
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
@ -209,16 +229,19 @@ private:
|
||||
Frag* e = operandStack.template Pop<Frag>(1);
|
||||
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
||||
root_ = e->start;
|
||||
// printf("root: %d\n", root_);
|
||||
// for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||
// State& s = GetState(i);
|
||||
// printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||
// }
|
||||
// printf("\n");
|
||||
#if 0
|
||||
printf("root: %d\n", root_);
|
||||
for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||
State& s = GetState(i);
|
||||
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
|
||||
// printf("Eval %c\n", "?*+.|("[*operatorStack.template Top<Operator>()]);
|
||||
switch (*operatorStack.template Pop<Operator>(1)) {
|
||||
case kConcatenation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
@ -240,6 +263,35 @@ private:
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrOne:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kOneOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(e.start, s);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
using namespace rapidjson::internal;
|
||||
|
||||
TEST(Regex, concatenation) {
|
||||
TEST(Regex, Concatenation) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
@ -28,7 +28,7 @@ TEST(Regex, concatenation) {
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, split1) {
|
||||
TEST(Regex, Alternation1) {
|
||||
Regex re("abab|abbb");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abab"));
|
||||
@ -40,7 +40,7 @@ TEST(Regex, split1) {
|
||||
EXPECT_FALSE(re.Match("abbbb"));
|
||||
}
|
||||
|
||||
TEST(Regex, split2) {
|
||||
TEST(Regex, Alternation2) {
|
||||
Regex re("a|b|c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
@ -51,7 +51,7 @@ TEST(Regex, split2) {
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis1) {
|
||||
TEST(Regex, Parenthesis1) {
|
||||
Regex re("(ab)c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
@ -62,7 +62,7 @@ TEST(Regex, parenthesis1) {
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis2) {
|
||||
TEST(Regex, Parenthesis2) {
|
||||
Regex re("a(bc)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
@ -73,7 +73,7 @@ TEST(Regex, parenthesis2) {
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, parenthesis3) {
|
||||
TEST(Regex, Parenthesis3) {
|
||||
Regex re("(a|b)(c|d)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ac"));
|
||||
@ -84,3 +84,138 @@ TEST(Regex, parenthesis3) {
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("cd"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne1) {
|
||||
Regex re("a?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne2) {
|
||||
Regex re("a?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne3) {
|
||||
Regex re("ab?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne4) {
|
||||
Regex re("a?b?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne5) {
|
||||
Regex re("a(ab)?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore1) {
|
||||
Regex re("a*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore2) {
|
||||
Regex re("a*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore3) {
|
||||
Regex re("a*b*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("bb"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore4) {
|
||||
Regex re("a(ab)*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore1) {
|
||||
Regex re("a+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore2) {
|
||||
Regex re("a+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore3) {
|
||||
Regex re("a+b+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_TRUE(re.Match("abb"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore4) {
|
||||
Regex re("a(ab)+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user