From 66754fc5b062dfd98a82c6feb7a4f4c5bf91e77d Mon Sep 17 00:00:00 2001 From: "miloyip@gmail.com" Date: Thu, 24 Nov 2011 07:50:59 +0000 Subject: [PATCH] Added referenced performance of UTF-8 validation git-svn-id: https://rapidjson.googlecode.com/svn/trunk@36 c5894555-1306-4e8d-425f-1f6f381ee07c --- test/perftest/misctest.cpp | 61 +++++++++++++++++++++++++++++++++ test/perftest/perftest.h | 1 + test/perftest/rapidjsontest.cpp | 8 ++--- 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 test/perftest/misctest.cpp diff --git a/test/perftest/misctest.cpp b/test/perftest/misctest.cpp new file mode 100644 index 00000000..bf47e6eb --- /dev/null +++ b/test/perftest/misctest.cpp @@ -0,0 +1,61 @@ +#include "perftest.h" + +#if TEST_MISC + +class Misc : public PerfTest { +}; + +// Copyright (c) 2008-2010 Bjoern Hoehrmann +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 12 + +static const unsigned char utf8d[] = { + // The first part of the table maps bytes to character classes that + // to reduce the size of the transition table and create bitmasks. + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + // The second part is a transition table that maps a combination + // of a state of the automaton and a character class to a state. + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) { + unsigned type = utf8d[byte]; + + *codep = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state + type]; + return *state; +} + +static bool IsUTF8(unsigned char* s) { + unsigned codepoint, state = 0; + + while (*s) + decode(&state, &codepoint, *s++); + + return state == UTF8_ACCEPT; +} + +TEST_F(Misc, Hoehrmann_IsUTF8) { + for (int i = 0; i < kTrialCount; i++) { + EXPECT_TRUE(IsUTF8((unsigned char*)json_)); + } +} + +#endif // TEST_ULTRAJSON diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index 1f762a13..c206c8c7 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -6,6 +6,7 @@ #define TEST_YAJL 0 #define TEST_ULTRAJSON 0 #define TEST_PLATFORM 0 +#define TEST_MISC 1 #if TEST_RAPIDJSON //#define RAPIDJSON_SSE2 diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index ae2af33d..bffc1f2e 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -233,14 +233,14 @@ TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) { } TEST_F(RapidJson, UTF8_Validate) { - StringBuffer os(0, length_ + 1); + NullStream os; for (int i = 0; i < kTrialCount; i++) { StringStream is(json_); - os.Clear(); + bool result = true; while (is.Peek() != '\0') - UTF8<>::Validate(is, os); - EXPECT_EQ(length_, os.GetSize()); + result &= UTF8<>::Validate(is, os); + EXPECT_TRUE(result); } }