mirror of
https://github.com/Tencent/rapidjson.git
synced 2025-03-09 19:24:23 +01:00
Merge pull request #76 from thebusytypist/TransitionTable
Iterative Parsing (for issue #35)
This commit is contained in:
commit
19a2279a85
@ -1221,12 +1221,13 @@ public:
|
||||
\tparam SourceEncoding Encoding of input stream
|
||||
\tparam InputStream Type of input stream, implementing Stream concept
|
||||
\param is Input stream to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
|
||||
GenericDocument& ParseStream(InputStream& is) {
|
||||
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
|
||||
ValueType::SetNull(); // Remove existing root if exist
|
||||
GenericReader<SourceEncoding, Encoding, Allocator> reader(&GetAllocator());
|
||||
GenericReader<SourceEncoding, Encoding, Allocator> reader(limit, &GetAllocator());
|
||||
ClearStackOnExit scope(*this);
|
||||
parseResult_ = reader.template Parse<parseFlags>(is, *this);
|
||||
if (parseResult_) {
|
||||
@ -1240,21 +1241,23 @@ public:
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag.
|
||||
\tparam InputStream Type of input stream, implementing Stream concept
|
||||
\param is Input stream to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <unsigned parseFlags, typename InputStream>
|
||||
GenericDocument& ParseStream(InputStream& is) {
|
||||
return ParseStream<parseFlags,Encoding,InputStream>(is);
|
||||
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
|
||||
return ParseStream<parseFlags,Encoding,InputStream>(is, limit);
|
||||
}
|
||||
|
||||
//! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
|
||||
/*! \tparam InputStream Type of input stream, implementing Stream concept
|
||||
\param is Input stream to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <typename InputStream>
|
||||
GenericDocument& ParseStream(InputStream& is) {
|
||||
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is);
|
||||
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
|
||||
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is, limit);
|
||||
}
|
||||
//!@}
|
||||
|
||||
@ -1265,30 +1268,33 @@ public:
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag.
|
||||
\tparam SourceEncoding Transcoding from input Encoding
|
||||
\param str Mutable zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <unsigned parseFlags, typename SourceEncoding>
|
||||
GenericDocument& ParseInsitu(Ch* str) {
|
||||
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
|
||||
GenericInsituStringStream<Encoding> s(str);
|
||||
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s);
|
||||
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s, limit);
|
||||
}
|
||||
|
||||
//! Parse JSON text from a mutable string
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag.
|
||||
\param str Mutable zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <unsigned parseFlags>
|
||||
GenericDocument& ParseInsitu(Ch* str) {
|
||||
return ParseInsitu<parseFlags, Encoding>(str);
|
||||
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
|
||||
return ParseInsitu<parseFlags, Encoding>(str, limit);
|
||||
}
|
||||
|
||||
//! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
|
||||
/*! \param str Mutable zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
GenericDocument& ParseInsitu(Ch* str) {
|
||||
return ParseInsitu<kParseDefaultFlags, Encoding>(str);
|
||||
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
|
||||
return ParseInsitu<kParseDefaultFlags, Encoding>(str, limit);
|
||||
}
|
||||
//!@}
|
||||
|
||||
@ -1299,28 +1305,31 @@ public:
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
|
||||
\tparam SourceEncoding Transcoding from input Encoding
|
||||
\param str Read-only zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
*/
|
||||
template <unsigned parseFlags, typename SourceEncoding>
|
||||
GenericDocument& Parse(const Ch* str) {
|
||||
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
|
||||
RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
|
||||
GenericStringStream<SourceEncoding> s(str);
|
||||
return ParseStream<parseFlags, SourceEncoding>(s);
|
||||
return ParseStream<parseFlags, SourceEncoding>(s, limit);
|
||||
}
|
||||
|
||||
//! Parse JSON text from a read-only string
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
|
||||
\param str Read-only zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
*/
|
||||
template <unsigned parseFlags>
|
||||
GenericDocument& Parse(const Ch* str) {
|
||||
return Parse<parseFlags, Encoding>(str);
|
||||
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
|
||||
return Parse<parseFlags, Encoding>(str, limit);
|
||||
}
|
||||
|
||||
//! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
|
||||
/*! \param str Read-only zero-terminated string to be parsed.
|
||||
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
*/
|
||||
GenericDocument& Parse(const Ch* str) {
|
||||
return Parse<kParseDefaultFlags>(str);
|
||||
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
|
||||
return Parse<kParseDefaultFlags>(str, limit);
|
||||
}
|
||||
//!@}
|
||||
|
||||
|
@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
|
||||
case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
|
||||
|
||||
case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
|
||||
case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
|
||||
case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded.");
|
||||
|
||||
default:
|
||||
return RAPIDJSON_ERROR_STRING("Unknown error.");
|
||||
|
@ -58,7 +58,9 @@ enum ParseErrorCode {
|
||||
kParseErrorNumberMissFraction, //!< Miss fraction part in number.
|
||||
kParseErrorNumberMissExponent, //!< Miss exponent in number.
|
||||
|
||||
kParseErrorTermination //!< Parsing was terminated.
|
||||
kParseErrorTermination, //!< Parsing was terminated.
|
||||
kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error.
|
||||
kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded.
|
||||
};
|
||||
|
||||
//! Result of parsing (wraps ParseErrorCode)
|
||||
|
@ -64,7 +64,8 @@ namespace rapidjson {
|
||||
enum ParseFlag {
|
||||
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
|
||||
kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
|
||||
kParseValidateEncodingFlag = 2 //!< Validate encoding of JSON strings.
|
||||
kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
|
||||
kParseIterativeFlag = 4 //!< Iterative(constant complexity in terms of function call stack size) parsing.
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -127,7 +128,7 @@ namespace internal {
|
||||
template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
|
||||
class StreamLocalCopy;
|
||||
|
||||
//! Do copy optimziation.
|
||||
//! Do copy optimization.
|
||||
template<typename Stream>
|
||||
class StreamLocalCopy<Stream, 1> {
|
||||
public:
|
||||
@ -272,10 +273,11 @@ public:
|
||||
typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
|
||||
|
||||
//! Constructor.
|
||||
/*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
|
||||
/*! \param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
|
||||
\param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
|
||||
\param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
|
||||
*/
|
||||
GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseResult_() {}
|
||||
GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit_(limit), parseResult_() {}
|
||||
|
||||
//! Parse JSON text.
|
||||
/*! \tparam parseFlags Combination of \ref ParseFlag.
|
||||
@ -287,9 +289,13 @@ public:
|
||||
*/
|
||||
template <unsigned parseFlags, typename InputStream, typename Handler>
|
||||
ParseResult Parse(InputStream& is, Handler& handler) {
|
||||
if (parseFlags & kParseIterativeFlag)
|
||||
return IterativeParse<parseFlags>(is, handler);
|
||||
|
||||
parseResult_.Clear();
|
||||
|
||||
ClearStackOnExit scope(*this);
|
||||
|
||||
SkipWhitespace(is);
|
||||
|
||||
if (is.Peek() == '\0') {
|
||||
@ -565,8 +571,14 @@ private:
|
||||
if (c == '\\') { // Escape
|
||||
is.Take();
|
||||
Ch e = is.Take();
|
||||
if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e])
|
||||
if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
|
||||
if (!(parseFlags & kParseInsituFlag)) {
|
||||
if (!CheckStackSpaceQuota(sizeof(Ch))) {
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1);
|
||||
}
|
||||
}
|
||||
os.Put(escape[(unsigned char)e]);
|
||||
}
|
||||
else if (e == 'u') { // Unicode
|
||||
unsigned codepoint = ParseHex4(is);
|
||||
if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
|
||||
@ -585,6 +597,11 @@ private:
|
||||
}
|
||||
else if (c == '"') { // Closing double quote
|
||||
is.Take();
|
||||
if (!(parseFlags & kParseInsituFlag)) {
|
||||
if (!CheckStackSpaceQuota(sizeof(Ch))) {
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1);
|
||||
}
|
||||
}
|
||||
os.Put('\0'); // null-terminate the string
|
||||
return;
|
||||
}
|
||||
@ -786,8 +803,434 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
|
||||
// Iterative Parsing
|
||||
|
||||
// States
|
||||
enum IterativeParsingState {
|
||||
IterativeParsingStartState = 0,
|
||||
IterativeParsingFinishState,
|
||||
IterativeParsingErrorState,
|
||||
|
||||
// Object states
|
||||
IterativeParsingObjectInitialState,
|
||||
IterativeParsingMemberKeyState,
|
||||
IterativeParsingKeyValueDelimiterState,
|
||||
IterativeParsingMemberValueState,
|
||||
IterativeParsingMemberDelimiterState,
|
||||
IterativeParsingObjectFinishState,
|
||||
|
||||
// Array states
|
||||
IterativeParsingArrayInitialState,
|
||||
IterativeParsingElementState,
|
||||
IterativeParsingElementDelimiterState,
|
||||
IterativeParsingArrayFinishState,
|
||||
|
||||
cIterativeParsingStateCount
|
||||
};
|
||||
|
||||
// Tokens
|
||||
enum IterativeParsingToken {
|
||||
IterativeParsingLeftBracketToken = 0,
|
||||
IterativeParsingRightBracketToken,
|
||||
|
||||
IterativeParsingLeftCurlyBracketToken,
|
||||
IterativeParsingRightCurlyBracketToken,
|
||||
|
||||
IterativeParsingCommaToken,
|
||||
IterativeParsingColonToken,
|
||||
|
||||
IterativeParsingStringToken,
|
||||
IterativeParsingFalseToken,
|
||||
IterativeParsingTrueToken,
|
||||
IterativeParsingNullToken,
|
||||
IterativeParsingNumberToken,
|
||||
|
||||
cIterativeParsingTokenCount
|
||||
};
|
||||
|
||||
IterativeParsingToken Tokenize(Ch c) {
|
||||
switch (c) {
|
||||
case '[': return IterativeParsingLeftBracketToken;
|
||||
case ']': return IterativeParsingRightBracketToken;
|
||||
case '{': return IterativeParsingLeftCurlyBracketToken;
|
||||
case '}': return IterativeParsingRightCurlyBracketToken;
|
||||
case ',': return IterativeParsingCommaToken;
|
||||
case ':': return IterativeParsingColonToken;
|
||||
case '"': return IterativeParsingStringToken;
|
||||
case 'f': return IterativeParsingFalseToken;
|
||||
case 't': return IterativeParsingTrueToken;
|
||||
case 'n': return IterativeParsingNullToken;
|
||||
default: return IterativeParsingNumberToken;
|
||||
}
|
||||
}
|
||||
|
||||
IterativeParsingState Predict(IterativeParsingState state, IterativeParsingToken token) {
|
||||
// current state x one lookahead token -> new state
|
||||
static const char G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = {
|
||||
// Start
|
||||
{
|
||||
IterativeParsingArrayInitialState, // Left bracket
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingObjectInitialState, // Left curly bracket
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingErrorState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// Finish(sink state)
|
||||
{
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState
|
||||
},
|
||||
// Error(sink state)
|
||||
{
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState
|
||||
},
|
||||
// ObjectInitial
|
||||
{
|
||||
IterativeParsingErrorState, // Left bracket
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingErrorState, // Left curly bracket
|
||||
IterativeParsingObjectFinishState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingMemberKeyState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// MemberKey
|
||||
{
|
||||
IterativeParsingErrorState, // Left bracket
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingErrorState, // Left curly bracket
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingKeyValueDelimiterState, // Colon
|
||||
IterativeParsingErrorState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// KeyValueDelimiter
|
||||
{
|
||||
IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingMemberValueState, // String
|
||||
IterativeParsingMemberValueState, // False
|
||||
IterativeParsingMemberValueState, // True
|
||||
IterativeParsingMemberValueState, // Null
|
||||
IterativeParsingMemberValueState // Number
|
||||
},
|
||||
// MemberValue
|
||||
{
|
||||
IterativeParsingErrorState, // Left bracket
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingErrorState, // Left curly bracket
|
||||
IterativeParsingObjectFinishState, // Right curly bracket
|
||||
IterativeParsingMemberDelimiterState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingErrorState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// MemberDelimiter
|
||||
{
|
||||
IterativeParsingErrorState, // Left bracket
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingErrorState, // Left curly bracket
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingMemberKeyState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// ObjectFinish(sink state)
|
||||
{
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState
|
||||
},
|
||||
// ArrayInitial
|
||||
{
|
||||
IterativeParsingArrayInitialState, // Left bracket(push Element state)
|
||||
IterativeParsingArrayFinishState, // Right bracket
|
||||
IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingElementState, // String
|
||||
IterativeParsingElementState, // False
|
||||
IterativeParsingElementState, // True
|
||||
IterativeParsingElementState, // Null
|
||||
IterativeParsingElementState // Number
|
||||
},
|
||||
// Element
|
||||
{
|
||||
IterativeParsingErrorState, // Left bracket
|
||||
IterativeParsingArrayFinishState, // Right bracket
|
||||
IterativeParsingErrorState, // Left curly bracket
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingElementDelimiterState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingErrorState, // String
|
||||
IterativeParsingErrorState, // False
|
||||
IterativeParsingErrorState, // True
|
||||
IterativeParsingErrorState, // Null
|
||||
IterativeParsingErrorState // Number
|
||||
},
|
||||
// ElementDelimiter
|
||||
{
|
||||
IterativeParsingArrayInitialState, // Left bracket(push Element state)
|
||||
IterativeParsingErrorState, // Right bracket
|
||||
IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
|
||||
IterativeParsingErrorState, // Right curly bracket
|
||||
IterativeParsingErrorState, // Comma
|
||||
IterativeParsingErrorState, // Colon
|
||||
IterativeParsingElementState, // String
|
||||
IterativeParsingElementState, // False
|
||||
IterativeParsingElementState, // True
|
||||
IterativeParsingElementState, // Null
|
||||
IterativeParsingElementState // Number
|
||||
},
|
||||
// ArrayFinish(sink state)
|
||||
{
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
|
||||
IterativeParsingErrorState
|
||||
}
|
||||
}; // End of G
|
||||
|
||||
return (IterativeParsingState)G[state][token];
|
||||
}
|
||||
|
||||
// Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
|
||||
// May return a new state on state pop.
|
||||
template <unsigned parseFlags, typename InputStream, typename Handler>
|
||||
IterativeParsingState Transit(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) {
|
||||
int c = 0;
|
||||
IterativeParsingState n;
|
||||
bool hr;
|
||||
|
||||
switch (dst) {
|
||||
case IterativeParsingStartState:
|
||||
RAPIDJSON_ASSERT(false);
|
||||
return IterativeParsingErrorState;
|
||||
|
||||
case IterativeParsingFinishState:
|
||||
return dst;
|
||||
|
||||
case IterativeParsingErrorState:
|
||||
return dst;
|
||||
|
||||
case IterativeParsingObjectInitialState:
|
||||
case IterativeParsingArrayInitialState:
|
||||
// Push the state(Element or MemeberValue) if we are nested in another array or value of member.
|
||||
// In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
|
||||
n = src;
|
||||
if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
|
||||
n = IterativeParsingElementState;
|
||||
else if (src == IterativeParsingKeyValueDelimiterState)
|
||||
n = IterativeParsingMemberValueState;
|
||||
// Check stack space limit.
|
||||
if (!CheckStackSpaceQuota(sizeof(IterativeParsingState) + sizeof(int))) {
|
||||
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell());
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
// Push current state.
|
||||
*stack_.template Push<IterativeParsingState>(1) = n;
|
||||
// Initialize and push the member/element count.
|
||||
*stack_.template Push<int>(1) = 0;
|
||||
// Call handler
|
||||
if (dst == IterativeParsingObjectInitialState)
|
||||
hr = handler.StartObject();
|
||||
else
|
||||
hr = handler.StartArray();
|
||||
// On handler short circuits the parsing.
|
||||
if (!hr) {
|
||||
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
else {
|
||||
is.Take();
|
||||
return dst;
|
||||
}
|
||||
|
||||
case IterativeParsingMemberKeyState:
|
||||
ParseString<parseFlags>(is, handler);
|
||||
if (HasParseError())
|
||||
return IterativeParsingErrorState;
|
||||
else
|
||||
return dst;
|
||||
|
||||
case IterativeParsingKeyValueDelimiterState:
|
||||
if (token == IterativeParsingColonToken) {
|
||||
is.Take();
|
||||
return dst;
|
||||
}
|
||||
else
|
||||
return IterativeParsingErrorState;
|
||||
|
||||
case IterativeParsingMemberValueState:
|
||||
// Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
|
||||
ParseValue<parseFlags>(is, handler);
|
||||
if (HasParseError()) {
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
return dst;
|
||||
|
||||
case IterativeParsingElementState:
|
||||
// Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
|
||||
ParseValue<parseFlags>(is, handler);
|
||||
if (HasParseError()) {
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
return dst;
|
||||
|
||||
case IterativeParsingMemberDelimiterState:
|
||||
case IterativeParsingElementDelimiterState:
|
||||
is.Take();
|
||||
// Update member/element count.
|
||||
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
|
||||
return dst;
|
||||
|
||||
case IterativeParsingObjectFinishState:
|
||||
// Get member count.
|
||||
c = *stack_.template Pop<int>(1);
|
||||
// If the object is not empty, count the last member.
|
||||
if (src == IterativeParsingMemberValueState)
|
||||
++c;
|
||||
// Restore the state.
|
||||
n = *stack_.template Pop<IterativeParsingState>(1);
|
||||
// Transit to Finish state if this is the topmost scope.
|
||||
if (n == IterativeParsingStartState)
|
||||
n = IterativeParsingFinishState;
|
||||
// Call handler
|
||||
hr = handler.EndObject(c);
|
||||
// On handler short circuits the parsing.
|
||||
if (!hr) {
|
||||
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
else {
|
||||
is.Take();
|
||||
return n;
|
||||
}
|
||||
|
||||
case IterativeParsingArrayFinishState:
|
||||
// Get element count.
|
||||
c = *stack_.template Pop<int>(1);
|
||||
// If the array is not empty, count the last element.
|
||||
if (src == IterativeParsingElementState)
|
||||
++c;
|
||||
// Restore the state.
|
||||
n = *stack_.template Pop<IterativeParsingState>(1);
|
||||
// Transit to Finish state if this is the topmost scope.
|
||||
if (n == IterativeParsingStartState)
|
||||
n = IterativeParsingFinishState;
|
||||
// Call handler
|
||||
hr = handler.EndArray(c);
|
||||
// On handler short circuits the parsing.
|
||||
if (!hr) {
|
||||
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
else {
|
||||
is.Take();
|
||||
return n;
|
||||
}
|
||||
|
||||
default:
|
||||
RAPIDJSON_ASSERT(false);
|
||||
return IterativeParsingErrorState;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
void HandleError(IterativeParsingState src, InputStream& is) {
|
||||
if (HasParseError()) {
|
||||
// Error flag has been set.
|
||||
return;
|
||||
}
|
||||
|
||||
if (src == IterativeParsingStartState && is.Peek() == '\0')
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingStartState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotObjectOrArray, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingFinishState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingObjectInitialState || src == IterativeParsingMemberDelimiterState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingMemberKeyState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingMemberValueState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
|
||||
|
||||
else if (src == IterativeParsingElementState)
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
|
||||
|
||||
else
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
|
||||
}
|
||||
|
||||
template <unsigned parseFlags, typename InputStream, typename Handler>
|
||||
ParseResult IterativeParse(InputStream& is, Handler& handler) {
|
||||
parseResult_.Clear();
|
||||
ClearStackOnExit scope(*this);
|
||||
IterativeParsingState state = IterativeParsingStartState;
|
||||
|
||||
SkipWhitespace(is);
|
||||
while (is.Peek() != '\0') {
|
||||
IterativeParsingToken t = Tokenize(is.Peek());
|
||||
IterativeParsingState n = Predict(state, t);
|
||||
IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
|
||||
|
||||
if (d == IterativeParsingErrorState) {
|
||||
HandleError(state, is);
|
||||
break;
|
||||
}
|
||||
|
||||
state = d;
|
||||
SkipWhitespace(is);
|
||||
}
|
||||
|
||||
// Handle the end of file.
|
||||
if (state != IterativeParsingFinishState)
|
||||
HandleError(state, is);
|
||||
|
||||
return parseResult_;
|
||||
}
|
||||
|
||||
bool CheckStackSpaceQuota(size_t size) const {
|
||||
return kStackSizeLimit_ == 0 || (stack_.GetSize() + size <= kStackSizeLimit_);
|
||||
}
|
||||
|
||||
static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
|
||||
internal::Stack<Allocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
|
||||
const size_t kStackSizeLimit_; //!< Stack size limit(in bytes). A value of 0 means no limit.
|
||||
ParseResult parseResult_;
|
||||
}; // class GenericReader
|
||||
|
||||
|
@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterative_DummyHandler)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
StringStream s(json_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
EXPECT_TRUE(reader.Parse<kParseIterativeFlag>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
StringStream s(json_);
|
||||
BaseReaderHandler<> h;
|
||||
Reader reader;
|
||||
EXPECT_TRUE(reader.Parse<kParseIterativeFlag|kParseInsituFlag>(s, h));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
StringStream s(json_);
|
||||
|
@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > {
|
||||
enum { copyOptimization = 1 };
|
||||
};
|
||||
|
||||
} // namespace rapdijson
|
||||
} // namespace rapidjson
|
||||
#endif
|
||||
|
||||
TEST(Reader, CustomStringStream) {
|
||||
@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
|
||||
EXPECT_FALSE(reader.HasParseError());
|
||||
}
|
||||
|
||||
// Test iterative parsing.
|
||||
|
||||
#define TESTERRORHANDLING(text, errorCode, offset)\
|
||||
{\
|
||||
StringStream json(text); \
|
||||
BaseReaderHandler<> handler; \
|
||||
Reader reader; \
|
||||
reader.IterativeParse<kParseDefaultFlags>(json, handler); \
|
||||
EXPECT_TRUE(reader.HasParseError()); \
|
||||
EXPECT_EQ(errorCode, reader.GetParseErrorCode()); \
|
||||
EXPECT_EQ(offset, reader.GetErrorOffset()); \
|
||||
}
|
||||
|
||||
TEST(Reader, IterativeParsing_ErrorHandling) {
|
||||
TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6u);
|
||||
|
||||
TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0u);
|
||||
TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0u);
|
||||
TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2u);
|
||||
|
||||
TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1u);
|
||||
TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4u);
|
||||
TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4u);
|
||||
TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u);
|
||||
TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u);
|
||||
}
|
||||
|
||||
template<typename Encoding = UTF8<> >
|
||||
struct IterativeParsingReaderHandler {
|
||||
typedef typename Encoding::Ch Ch;
|
||||
|
||||
const static int LOG_NULL = -1;
|
||||
const static int LOG_BOOL = -2;
|
||||
const static int LOG_INT = -3;
|
||||
const static int LOG_UINT = -4;
|
||||
const static int LOG_INT64 = -5;
|
||||
const static int LOG_UINT64 = -6;
|
||||
const static int LOG_DOUBLE = -7;
|
||||
const static int LOG_STRING = -8;
|
||||
const static int LOG_STARTOBJECT = -9;
|
||||
const static int LOG_ENDOBJECT = -10;
|
||||
const static int LOG_STARTARRAY = -11;
|
||||
const static int LOG_ENDARRAY = -12;
|
||||
|
||||
const static size_t LogCapacity = 256;
|
||||
int Logs[LogCapacity];
|
||||
size_t LogCount;
|
||||
|
||||
IterativeParsingReaderHandler() : LogCount(0) {
|
||||
}
|
||||
|
||||
bool Null() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_NULL; return true; }
|
||||
|
||||
bool Bool(bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_BOOL; return true; }
|
||||
|
||||
bool Int(int) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
|
||||
|
||||
bool Uint(unsigned) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
|
||||
|
||||
bool Int64(int64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT64; return true; }
|
||||
|
||||
bool Uint64(uint64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_UINT64; return true; }
|
||||
|
||||
bool Double(double) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_DOUBLE; return true; }
|
||||
|
||||
bool String(const Ch*, SizeType, bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STRING; return true; }
|
||||
|
||||
bool StartObject() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTOBJECT; return true; }
|
||||
|
||||
bool EndObject(SizeType c) {
|
||||
RAPIDJSON_ASSERT(LogCount < LogCapacity);
|
||||
Logs[LogCount++] = LOG_ENDOBJECT;
|
||||
Logs[LogCount++] = (int)c;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartArray() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTARRAY; return true; }
|
||||
|
||||
bool EndArray(SizeType c) {
|
||||
RAPIDJSON_ASSERT(LogCount < LogCapacity);
|
||||
Logs[LogCount++] = LOG_ENDARRAY;
|
||||
Logs[LogCount++] = (int)c;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Reader, IterativeParsing_General) {
|
||||
{
|
||||
StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]");
|
||||
Reader reader;
|
||||
IterativeParsingReaderHandler<> handler;
|
||||
|
||||
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_FALSE(r.IsError());
|
||||
EXPECT_FALSE(reader.HasParseError());
|
||||
|
||||
int e[] = {
|
||||
handler.LOG_STARTARRAY,
|
||||
handler.LOG_INT,
|
||||
handler.LOG_STARTOBJECT,
|
||||
handler.LOG_STRING,
|
||||
handler.LOG_STARTARRAY,
|
||||
handler.LOG_INT,
|
||||
handler.LOG_INT,
|
||||
handler.LOG_ENDARRAY, 2,
|
||||
handler.LOG_ENDOBJECT, 1,
|
||||
handler.LOG_NULL,
|
||||
handler.LOG_BOOL,
|
||||
handler.LOG_BOOL,
|
||||
handler.LOG_STRING,
|
||||
handler.LOG_DOUBLE,
|
||||
handler.LOG_ENDARRAY, 7
|
||||
};
|
||||
|
||||
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
|
||||
|
||||
for (size_t i = 0; i < handler.LogCount; ++i) {
|
||||
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Reader, IterativeParsing_Count) {
|
||||
{
|
||||
StringStream is("[{}, {\"k\": 1}, [1], []]");
|
||||
Reader reader;
|
||||
IterativeParsingReaderHandler<> handler;
|
||||
|
||||
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_FALSE(r.IsError());
|
||||
EXPECT_FALSE(reader.HasParseError());
|
||||
|
||||
int e[] = {
|
||||
handler.LOG_STARTARRAY,
|
||||
handler.LOG_STARTOBJECT,
|
||||
handler.LOG_ENDOBJECT, 0,
|
||||
handler.LOG_STARTOBJECT,
|
||||
handler.LOG_STRING,
|
||||
handler.LOG_INT,
|
||||
handler.LOG_ENDOBJECT, 1,
|
||||
handler.LOG_STARTARRAY,
|
||||
handler.LOG_INT,
|
||||
handler.LOG_ENDARRAY, 1,
|
||||
handler.LOG_STARTARRAY,
|
||||
handler.LOG_ENDARRAY, 0,
|
||||
handler.LOG_ENDARRAY, 4
|
||||
};
|
||||
|
||||
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
|
||||
|
||||
for (size_t i = 0; i < handler.LogCount; ++i) {
|
||||
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test iterative parsing on kParseErrorTermination.
|
||||
struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> {
|
||||
bool StartObject() { return false; }
|
||||
};
|
||||
|
||||
struct HandlerTerminateAtStartArray : public IterativeParsingReaderHandler<> {
|
||||
bool StartArray() { return false; }
|
||||
};
|
||||
|
||||
struct HandlerTerminateAtEndObject : public IterativeParsingReaderHandler<> {
|
||||
bool EndObject(SizeType) { return false; }
|
||||
};
|
||||
|
||||
struct HandlerTerminateAtEndArray : public IterativeParsingReaderHandler<> {
|
||||
bool EndArray(SizeType) { return false; }
|
||||
};
|
||||
|
||||
TEST(Reader, IterativeParsing_ShortCircuit) {
|
||||
{
|
||||
HandlerTerminateAtStartObject handler;
|
||||
Reader reader;
|
||||
StringStream is("[1, {}]");
|
||||
|
||||
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_TRUE(reader.HasParseError());
|
||||
EXPECT_EQ(kParseErrorTermination, r.Code());
|
||||
EXPECT_EQ(4u, r.Offset());
|
||||
}
|
||||
|
||||
{
|
||||
HandlerTerminateAtStartArray handler;
|
||||
Reader reader;
|
||||
StringStream is("{\"a\": []}");
|
||||
|
||||
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_TRUE(reader.HasParseError());
|
||||
EXPECT_EQ(kParseErrorTermination, r.Code());
|
||||
EXPECT_EQ(6u, r.Offset());
|
||||
}
|
||||
|
||||
{
|
||||
HandlerTerminateAtEndObject handler;
|
||||
Reader reader;
|
||||
StringStream is("[1, {}]");
|
||||
|
||||
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_TRUE(reader.HasParseError());
|
||||
EXPECT_EQ(kParseErrorTermination, r.Code());
|
||||
EXPECT_EQ(5u, r.Offset());
|
||||
}
|
||||
|
||||
{
|
||||
HandlerTerminateAtEndArray handler;
|
||||
Reader reader;
|
||||
StringStream is("{\"a\": []}");
|
||||
|
||||
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_TRUE(reader.HasParseError());
|
||||
EXPECT_EQ(kParseErrorTermination, r.Code());
|
||||
EXPECT_EQ(7u, r.Offset());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Reader, IterativeParsing_LimitStackSize) {
|
||||
BaseReaderHandler<> handler;
|
||||
Reader reader(20);
|
||||
StringStream is("[[[]]]");
|
||||
|
||||
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
|
||||
|
||||
EXPECT_TRUE(reader.HasParseError());
|
||||
EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code());
|
||||
EXPECT_EQ(2u, r.Offset());
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
RAPIDJSON_DIAG_POP
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user