mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-06 13:41:35 +01:00
Rewrite UTF8::Validate() to obtain better performance.
git-svn-id: https://rapidjson.googlecode.com/svn/trunk@35 c5894555-1306-4e8d-425f-1f6f381ee07c
This commit is contained in:
parent
827de60fb8
commit
a45bcbba7b
@ -357,46 +357,41 @@ struct UTF8 {
|
||||
|
||||
template <typename InputStream, typename OutputStream>
|
||||
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
|
||||
// http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
static const unsigned char utf8d[] = {
|
||||
//! \todo optimization
|
||||
// The first part of the table maps bytes to character classes that
|
||||
// to reduce the size of the transition table and create bitmasks.
|
||||
// Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
// With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
|
||||
static const unsigned char type[] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||
|
||||
// The second part is a transition table that maps a combination
|
||||
// of a state of the automaton and a character class to a state.
|
||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||
};
|
||||
#define COPY() os.Put(c = is.Take())
|
||||
#define TRANS(mask) if (!(type[(unsigned char)c] & mask)) return false
|
||||
#define TAIL() COPY(); TRANS(0x70)
|
||||
Ch c;
|
||||
os.Put(c = is.Take());
|
||||
if ((unsigned char) c < 0x80)
|
||||
COPY();
|
||||
if (!(c & 0x80))
|
||||
return true;
|
||||
|
||||
unsigned type = utf8d[(unsigned char)c];
|
||||
unsigned state = utf8d[256 + type];
|
||||
if (state == 12)
|
||||
return false;
|
||||
|
||||
while (state) {
|
||||
os.Put(c = is.Take());
|
||||
unsigned type = utf8d[(unsigned char)c];
|
||||
state = utf8d[256 + state + type];
|
||||
if (state == 12)
|
||||
return false;
|
||||
};
|
||||
return true;
|
||||
switch (type[(unsigned char)c]) {
|
||||
case 2: TAIL(); return true;
|
||||
case 3: TAIL(); TAIL(); return true;
|
||||
case 4: COPY(); TRANS(0x50); TAIL(); return true;
|
||||
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return true;
|
||||
case 6: TAIL(); TAIL(); TAIL(); return true;
|
||||
case 10: COPY(); TRANS(0x20); TAIL(); return true;
|
||||
case 11: COPY(); TRANS(0x60); TAIL(); return true;
|
||||
default: return false;
|
||||
}
|
||||
#undef COPY
|
||||
#undef TRANS
|
||||
#undef TAIL
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -31,7 +31,7 @@ struct GenericStringBuffer {
|
||||
return stack_.template Bottom<Ch>();
|
||||
}
|
||||
|
||||
size_t Size() const { return stack_.Size(); }
|
||||
size_t GetSize() const { return stack_.GetSize(); }
|
||||
|
||||
static const size_t kDefaultCapacity = 256;
|
||||
mutable internal::Stack<Allocator> stack_;
|
||||
|
@ -232,6 +232,18 @@ TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, UTF8_Validate) {
|
||||
StringBuffer os(0, length_ + 1);
|
||||
|
||||
for (int i = 0; i < kTrialCount; i++) {
|
||||
StringStream is(json_);
|
||||
os.Clear();
|
||||
while (is.Peek() != '\0')
|
||||
UTF8<>::Validate(is, os);
|
||||
EXPECT_EQ(length_, os.GetSize());
|
||||
}
|
||||
}
|
||||
|
||||
// Depreciated.
|
||||
//TEST_F(RapidJson, FileStream_Read) {
|
||||
// for (int i = 0; i < kTrialCount; i++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user