mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-06 13:41:35 +01:00
Merge pull request #553 from miloyip/issue158_parsestdstring
Issue158 parsestdstring
This commit is contained in:
commit
dd25c9651a
@ -20,6 +20,8 @@
|
||||
#include "reader.h"
|
||||
#include "internal/meta.h"
|
||||
#include "internal/strfunc.h"
|
||||
#include "memorystream.h"
|
||||
#include "encodedstream.h"
|
||||
#include <new> // placement new
|
||||
|
||||
#ifdef _MSC_VER
|
||||
@ -2224,6 +2226,42 @@ public:
|
||||
GenericDocument& Parse(const Ch* str) {
|
||||
return Parse<kParseDefaultFlags>(str);
|
||||
}
|
||||
|
||||
template <unsigned parseFlags, typename SourceEncoding>
|
||||
GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
|
||||
RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
|
||||
MemoryStream ms(static_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
|
||||
EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
|
||||
ParseStream<parseFlags, SourceEncoding>(is);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <unsigned parseFlags>
|
||||
GenericDocument& Parse(const Ch* str, size_t length) {
|
||||
return Parse<parseFlags, Encoding>(str, length);
|
||||
}
|
||||
|
||||
GenericDocument& Parse(const Ch* str, size_t length) {
|
||||
return Parse<kParseDefaultFlags>(str, length);
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
template <unsigned parseFlags, typename SourceEncoding>
|
||||
GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) {
|
||||
// c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
|
||||
return Parse<parseFlags, SourceEncoding>(str.c_str());
|
||||
}
|
||||
|
||||
template <unsigned parseFlags>
|
||||
GenericDocument& Parse(const std::basic_string<Ch>& str) {
|
||||
return Parse<parseFlags, Encoding>(str);
|
||||
}
|
||||
|
||||
GenericDocument& Parse(const std::basic_string<Ch>& str) {
|
||||
return Parse<kParseDefaultFlags>(str);
|
||||
}
|
||||
#endif // RAPIDJSON_HAS_STDSTRING
|
||||
|
||||
//!@}
|
||||
|
||||
//!@name Handling parse errors
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define RAPIDJSON_ENCODEDSTREAM_H_
|
||||
|
||||
#include "stream.h"
|
||||
#include "memorystream.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
@ -62,6 +63,30 @@ private:
|
||||
Ch current_;
|
||||
};
|
||||
|
||||
//! Specialized for UTF8 MemoryStream.
|
||||
template <>
|
||||
class EncodedInputStream<UTF8<>, MemoryStream> {
|
||||
public:
|
||||
typedef UTF8<>::Ch Ch;
|
||||
|
||||
EncodedInputStream(MemoryStream& is) : is_(is) {
|
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
|
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
|
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
|
||||
}
|
||||
Ch Peek() const { return is_.Peek(); }
|
||||
Ch Take() { return is_.Take(); }
|
||||
size_t Tell() const { return is_.Tell(); }
|
||||
|
||||
// Not implemented
|
||||
void Put(Ch) {}
|
||||
void Flush() {}
|
||||
Ch* PutBegin() { return 0; }
|
||||
size_t PutEnd(Ch*) { return 0; }
|
||||
|
||||
MemoryStream& is_;
|
||||
};
|
||||
|
||||
//! Output byte stream wrapper with statically bound encoding.
|
||||
/*!
|
||||
\tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
|
||||
|
@ -42,8 +42,8 @@ struct MemoryStream {
|
||||
|
||||
MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {}
|
||||
|
||||
Ch Peek() const { return (src_ == end_) ? '\0' : *src_; }
|
||||
Ch Take() { return (src_ == end_) ? '\0' : *src_++; }
|
||||
Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; }
|
||||
Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; }
|
||||
size_t Tell() const { return static_cast<size_t>(src_ - begin_); }
|
||||
|
||||
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "allocators.h"
|
||||
#include "stream.h"
|
||||
#include "encodedstream.h"
|
||||
#include "internal/meta.h"
|
||||
#include "internal/stack.h"
|
||||
#include "internal/strtod.h"
|
||||
@ -259,6 +260,12 @@ void SkipWhitespace(InputStream& is) {
|
||||
s.Take();
|
||||
}
|
||||
|
||||
inline const char* SkipWhitespace(const char* p, const char* end) {
|
||||
while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
#ifdef RAPIDJSON_SSE42
|
||||
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
|
||||
inline const char *SkipWhitespace_SIMD(const char* p) {
|
||||
@ -295,6 +302,34 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
|
||||
}
|
||||
}
|
||||
|
||||
inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
|
||||
// Fast return for single non-whitespace
|
||||
if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
|
||||
++p;
|
||||
else
|
||||
return p;
|
||||
|
||||
// The middle of string using SIMD
|
||||
static const char whitespace[16] = " \n\r\t";
|
||||
const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
|
||||
|
||||
for (; p <= end - 16; p += 16) {
|
||||
const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
|
||||
const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
|
||||
if (r != 0) { // some of characters is non-whitespace
|
||||
#ifdef _MSC_VER // Find the index of first non-whitespace
|
||||
unsigned long offset;
|
||||
_BitScanForward(&offset, r);
|
||||
return p + offset;
|
||||
#else
|
||||
return p + __builtin_ffs(r) - 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return SkipWhitespace(p, end);
|
||||
}
|
||||
|
||||
#elif defined(RAPIDJSON_SSE2)
|
||||
|
||||
//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
|
||||
@ -342,6 +377,44 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
|
||||
}
|
||||
}
|
||||
|
||||
inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
|
||||
// Fast return for single non-whitespace
|
||||
if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
|
||||
++p;
|
||||
else
|
||||
return p;
|
||||
|
||||
// The rest of string
|
||||
#define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
|
||||
static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
|
||||
#undef C16
|
||||
|
||||
const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
|
||||
const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
|
||||
const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
|
||||
const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
|
||||
|
||||
for (; p <= end - 16; p += 16) {
|
||||
const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
|
||||
__m128i x = _mm_cmpeq_epi8(s, w0);
|
||||
x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
|
||||
x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
|
||||
x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
|
||||
unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
|
||||
if (r != 0) { // some of characters may be non-whitespace
|
||||
#ifdef _MSC_VER // Find the index of first non-whitespace
|
||||
unsigned long offset;
|
||||
_BitScanForward(&offset, r);
|
||||
return p + offset;
|
||||
#else
|
||||
return p + __builtin_ffs(r) - 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return SkipWhitespace(p, end);
|
||||
}
|
||||
|
||||
#endif // RAPIDJSON_SSE2
|
||||
|
||||
#ifdef RAPIDJSON_SIMD
|
||||
@ -354,6 +427,10 @@ template<> inline void SkipWhitespace(InsituStringStream& is) {
|
||||
template<> inline void SkipWhitespace(StringStream& is) {
|
||||
is.src_ = SkipWhitespace_SIMD(is.src_);
|
||||
}
|
||||
|
||||
template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
|
||||
is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
|
||||
}
|
||||
#endif // RAPIDJSON_SIMD
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -30,6 +30,8 @@
|
||||
# define RAPIDJSON_SSE2
|
||||
#endif
|
||||
|
||||
#define RAPIDJSON_HAS_STDSTRING 1
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Google Test
|
||||
|
||||
|
@ -187,6 +187,25 @@ TEST_F(RapidJson, SIMD_SUFFIX(DocumentParse_MemoryPoolAllocator)) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseLength_MemoryPoolAllocator)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
Document doc;
|
||||
doc.Parse(json_, length_);
|
||||
ASSERT_TRUE(doc.IsObject());
|
||||
}
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseStdString_MemoryPoolAllocator)) {
|
||||
const std::string s(json_, length_);
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
Document doc;
|
||||
doc.Parse(s);
|
||||
ASSERT_TRUE(doc.IsObject());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseIterative_MemoryPoolAllocator)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
Document doc;
|
||||
|
@ -34,6 +34,8 @@ void ParseCheck(DocumentType& doc) {
|
||||
typedef typename DocumentType::ValueType ValueType;
|
||||
|
||||
EXPECT_FALSE(doc.HasParseError());
|
||||
if (doc.HasParseError())
|
||||
printf("Error: %d at %zu\n", static_cast<int>(doc.GetParseError()), doc.GetErrorOffset());
|
||||
EXPECT_TRUE(static_cast<ParseResult>(doc));
|
||||
|
||||
EXPECT_TRUE(doc.IsObject());
|
||||
@ -93,6 +95,26 @@ void ParseTest() {
|
||||
doc.ParseInsitu(buffer);
|
||||
ParseCheck(doc);
|
||||
free(buffer);
|
||||
|
||||
// Parse(const Ch*, size_t)
|
||||
size_t length = strlen(json);
|
||||
buffer = reinterpret_cast<char*>(malloc(length * 2));
|
||||
memcpy(buffer, json, length);
|
||||
memset(buffer + length, 'X', length);
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
std::string s2(buffer, length); // backup buffer
|
||||
#endif
|
||||
doc.SetNull();
|
||||
doc.Parse(buffer, length);
|
||||
free(buffer);
|
||||
ParseCheck(doc);
|
||||
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
// Parse(std::string)
|
||||
doc.SetNull();
|
||||
doc.Parse(s2);
|
||||
ParseCheck(doc);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(Document, Parse) {
|
||||
@ -140,6 +162,42 @@ static FILE* OpenEncodedFile(const char* filename) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST(Document, Parse_Encoding) {
|
||||
const char* json = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } ";
|
||||
|
||||
typedef GenericDocument<UTF16<> > DocumentType;
|
||||
DocumentType doc;
|
||||
|
||||
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*)
|
||||
// doc.Parse<kParseDefaultFlags, UTF8<> >(json);
|
||||
// EXPECT_FALSE(doc.HasParseError());
|
||||
// EXPECT_EQ(0, StrCmp(doc[L"hello"].GetString(), L"world"));
|
||||
|
||||
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*, size_t)
|
||||
size_t length = strlen(json);
|
||||
char* buffer = reinterpret_cast<char*>(malloc(length * 2));
|
||||
memcpy(buffer, json, length);
|
||||
memset(buffer + length, 'X', length);
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
std::string s2(buffer, length); // backup buffer
|
||||
#endif
|
||||
doc.SetNull();
|
||||
doc.Parse<kParseDefaultFlags, UTF8<> >(buffer, length);
|
||||
free(buffer);
|
||||
EXPECT_FALSE(doc.HasParseError());
|
||||
if (doc.HasParseError())
|
||||
printf("Error: %d at %zu\n", static_cast<int>(doc.GetParseError()), doc.GetErrorOffset());
|
||||
EXPECT_EQ(0, StrCmp(doc[L"hello"].GetString(), L"world"));
|
||||
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
// Parse<unsigned, SourceEncoding>(std::string)
|
||||
doc.SetNull();
|
||||
doc.Parse<kParseDefaultFlags, UTF8<> >(s2);
|
||||
EXPECT_FALSE(doc.HasParseError());
|
||||
EXPECT_EQ(0, StrCmp(doc[L"hello"].GetString(), L"world"));
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(Document, ParseStream_EncodedInputStream) {
|
||||
// UTF8 -> UTF16
|
||||
FILE* fp = OpenEncodedFile("utf8.json");
|
||||
|
@ -73,6 +73,28 @@ TEST(SIMD, SIMD_SUFFIX(SkipWhitespace)) {
|
||||
TestSkipWhitespace<InsituStringStream>();
|
||||
}
|
||||
|
||||
TEST(SIMD, SIMD_SUFFIX(SkipWhitespace_EncodedMemoryStream)) {
|
||||
for (size_t step = 1; step < 32; step++) {
|
||||
char buffer[1024];
|
||||
for (size_t i = 0; i < 1024; i++)
|
||||
buffer[i] = " \t\r\n"[i % 4];
|
||||
for (size_t i = 0; i < 1024; i += step)
|
||||
buffer[i] = 'X';
|
||||
|
||||
MemoryStream ms(buffer, 1024);
|
||||
EncodedInputStream<UTF8<>, MemoryStream> s(ms);
|
||||
size_t i = 0;
|
||||
for (;;) {
|
||||
SkipWhitespace(s);
|
||||
if (s.Peek() == '\0')
|
||||
break;
|
||||
//EXPECT_EQ(i, s.Tell());
|
||||
EXPECT_EQ('X', s.Take());
|
||||
i += step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ScanCopyUnescapedStringHandler : BaseReaderHandler<UTF8<>, ScanCopyUnescapedStringHandler> {
|
||||
bool String(const char* str, size_t length, bool) {
|
||||
memcpy(buffer, str, length + 1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user