poco/JSON/src/Parser.cpp

882 lines
21 KiB
C++
Raw Normal View History

2012-11-11 09:57:01 +01:00
//
// Parser.cpp
//
// $Id$
//
// Library: JSON
// Package: JSON
// Module: Parser
//
// Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:
//
// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
#include "Poco/JSON/Parser.h"
#include "Poco/JSON/JSONException.h"
#include "Poco/Ascii.h"
#include "Poco/Token.h"
#include "Poco/UTF8Encoding.h"
2013-05-20 20:37:13 +02:00
2012-11-11 09:57:01 +01:00
#undef min
#undef max
#include <limits>
2013-05-20 20:37:13 +02:00
#include <clocale>
2012-11-11 09:57:01 +01:00
namespace Poco {
namespace JSON {
2013-05-20 20:37:13 +02:00
#ifndef IS_HIGH_SURROGATE
#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
#endif
#ifndef IS_LOW_SURROGATE
#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
#endif
#ifndef DECODE_SURROGATE_PAIR
#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
#endif
#define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
const int Parser::_asciiClass[] = {
xx, xx, xx, xx, xx, xx, xx, xx,
xx, C_WHITE, C_WHITE, xx, xx, C_WHITE, xx, xx,
xx, xx, xx, xx, xx, xx, xx, xx,
xx, xx, xx, xx, xx, xx, xx, xx,
C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
};
const int Parser::_stateTransitionTable[NR_STATES][NR_CLASSES] = {
/*
white 1-9 ABCDF etc
space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */
/*start GO*/ {GO,GO,-6,xx,-5,xx,xx,xx,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*ok OK*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*object OB*/ {OB,OB,xx,-9,xx,xx,xx,xx,SB,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*key KE*/ {KE,KE,xx,xx,xx,xx,xx,xx,SB,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*colon CO*/ {CO,CO,xx,xx,xx,xx,-2,xx,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*value VA*/ {VA,VA,-6,xx,-5,xx,xx,xx,SB,xx,CB,xx,MX,xx,ZX,IX,xx,xx,xx,xx,xx,FA,xx,NU,xx,xx,TR,xx,xx,xx,xx,xx},
/*array AR*/ {AR,AR,-6,xx,-5,-7,xx,xx,SB,xx,CB,xx,MX,xx,ZX,IX,xx,xx,xx,xx,xx,FA,xx,NU,xx,xx,TR,xx,xx,xx,xx,xx},
/*string ST*/ {ST,xx,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
/*escape ES*/ {xx,xx,xx,xx,xx,xx,xx,xx,ST,ST,ST,xx,xx,xx,xx,xx,xx,ST,xx,xx,xx,ST,xx,ST,ST,xx,ST,U1,xx,xx,xx,xx},
/*u1 U1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,U2,U2,U2,U2,U2,U2,U2,U2,xx,xx,xx,xx,xx,xx,U2,U2,xx,xx},
/*u2 U2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,U3,U3,U3,U3,U3,U3,U3,U3,xx,xx,xx,xx,xx,xx,U3,U3,xx,xx},
/*u3 U3*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,U4,U4,U4,U4,U4,U4,U4,U4,xx,xx,xx,xx,xx,xx,U4,U4,xx,xx},
/*u4 U4*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,UC,UC,UC,UC,UC,UC,UC,UC,xx,xx,xx,xx,xx,xx,UC,UC,xx,xx},
/*minus MI*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,ZE,IT,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*zero ZE*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,CB,xx,xx,DF,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*int IT*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,CB,xx,xx,DF,IT,IT,xx,xx,xx,xx,DE,xx,xx,xx,xx,xx,xx,xx,xx,DE,xx,xx},
/*frac FR*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,CB,xx,xx,xx,FR,FR,xx,xx,xx,xx,E1,xx,xx,xx,xx,xx,xx,xx,xx,E1,xx,xx},
/*e E1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,E2,E2,xx,E3,E3,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*ex E2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,E3,E3,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*exp E3*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,xx,xx,xx,xx,E3,E3,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*tr T1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,T2,xx,xx,xx,xx,xx,xx,xx},
/*tru T2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,T3,xx,xx,xx,xx},
/*1 T3*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,OK,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*fa F1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,F2,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*fal F2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,F3,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*fals F3*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,F4,xx,xx,xx,xx,xx,xx},
/*0 F4*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,OK,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*nu N1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,N2,xx,xx,xx,xx},
/*nul N2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,N3,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*null N3*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,CB,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,OK,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*/ C1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,C2},
/*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
/** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
/*_. FX*/ {OK,OK,xx,-8,xx,-7,xx,-3,xx,xx,xx,xx,xx,xx,FR,FR,xx,xx,xx,xx,E1,xx,xx,xx,xx,xx,xx,xx,xx,E1,xx,xx},
/*\ D1*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,D2,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx},
/*\ D2*/ {xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,xx,U1,xx,xx,xx,xx},
};
2013-05-20 23:36:58 -05:00
// Source
class Source
{
public:
Source()
{
}
virtual ~Source()
{
}
virtual bool nextChar(int& c) = 0;
};
class StreamSource : public Source
{
public:
StreamSource(std::istream& in) : _in(in)
{
}
virtual ~StreamSource()
{
}
bool nextChar(int& c)
{
if ( _in.good() )
{
c = _in.get();
return _in.good();
}
return false;
}
private:
std::istream& _in;
};
class StringSource : public Source
{
public:
StringSource(std::string::const_iterator begin, std::string::const_iterator end) : _it(begin), _end(end)
{
}
virtual ~StringSource()
{
}
bool nextChar(int& c)
{
if ( _it == _end ) return false;
c = *_it++;
return true;
}
private:
std::string::const_iterator _it;
std::string::const_iterator _end;
};
// Parser
Parser::Parser(const Handler::Ptr& pHandler, std::size_t bufSize) :
2013-05-20 20:37:13 +02:00
_pHandler(pHandler),
_state(GO),
_beforeCommentState(0),
_type(JSON_T_NONE),
_escaped(0),
_comment(0),
_utf16HighSurrogate(0),
2013-05-20 23:36:58 -05:00
_depth(UNLIMITED_DEPTH),
2013-05-20 20:37:13 +02:00
_top(-1),
_stack(PARSER_STACK_SIZE),
2013-05-20 23:36:58 -05:00
_parseBuffer(bufSize),
2013-05-20 20:37:13 +02:00
_parseBufferCount(0),
2013-05-20 23:36:58 -05:00
_decimalPoint('.'),
2013-05-20 20:37:13 +02:00
_allowNullByte(true),
_allowComments(false)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
push(MODE_DONE);
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
Parser::~Parser()
{
2013-05-20 23:36:58 -05:00
}
void Parser::reset()
{
_state = GO;
_beforeCommentState = 0;
_type = JSON_T_NONE;
_escaped = 0;
_comment = 0;
_utf16HighSurrogate = 0;
_top = -1;
_parseBufferCount = 0;
2013-05-20 20:37:13 +02:00
2013-05-20 23:36:58 -05:00
_stack.clear();
_parseBuffer.clear();
push(MODE_DONE);
clearBuffer();
2013-05-20 20:37:13 +02:00
}
2013-05-20 23:36:58 -05:00
2013-05-20 20:37:13 +02:00
Dynamic::Var Parser::parse(const std::string& json)
{
2013-05-20 23:36:58 -05:00
StringSource source(json.begin(), json.end());
2013-05-20 20:37:13 +02:00
int c = 0;
2013-05-20 23:36:58 -05:00
while(source.nextChar(c))
2012-11-11 09:57:01 +01:00
{
2013-05-20 23:36:58 -05:00
if (0 == parseChar(c, source)) throw SyntaxException("JSON syntax error");
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
if (!done())
throw JSONException("JSON syntax error");
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
return result();
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
Dynamic::Var Parser::parse(std::istream& in)
{
2013-05-20 23:36:58 -05:00
StreamSource source(in);
2013-05-20 20:37:13 +02:00
int c = 0;
2013-05-20 23:36:58 -05:00
while(source.nextChar(c))
2012-11-11 09:57:01 +01:00
{
2013-05-20 23:36:58 -05:00
if (0 == parseChar(c, source)) throw JSONException("JSON syntax error");
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
if (!done())
throw JSONException("JSON syntax error");
return result();
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
bool Parser::push(int mode)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
_top += 1;
if (_depth < 0)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
if (_top >= _stack.size())
_stack.resize(_stack.size() * 2, true);
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
if (_top >= _depth) return false;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
_stack[_top] = mode;
return true;
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
bool Parser::pop(int mode)
{
if (_top < 0 || _stack[_top] != mode)
return false;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
_top -= 1;
return true;
}
2012-11-11 09:57:01 +01:00
2013-05-20 23:36:58 -05:00
2013-05-20 20:37:13 +02:00
void Parser::clearBuffer()
{
_parseBufferCount = 0;
_parseBuffer[0] = 0;
}
2013-05-20 20:37:13 +02:00
void Parser::parseBufferPopBackChar()
{
poco_assert(_parseBufferCount >= 1);
--_parseBufferCount;
_parseBuffer[_parseBufferCount] = 0;
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
void Parser::parseBufferPushBackChar(char c)
{
if (_parseBufferCount + 1 >= _parseBuffer.size())
growBuffer();
_parseBuffer[_parseBufferCount++] = c;
_parseBuffer[_parseBufferCount] = 0;
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
void Parser::addEscapedCharToParseBuffer(int nextChar)
{
_escaped = 0;
// remove the backslash
parseBufferPopBackChar();
switch(nextChar)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
case 'b':
parseBufferPushBackChar('\b');
break;
case 'f':
parseBufferPushBackChar('\f');
break;
case 'n':
parseBufferPushBackChar('\n');
break;
case 'r':
parseBufferPushBackChar('\r');
break;
case 't':
parseBufferPushBackChar('\t');
break;
case '"':
parseBufferPushBackChar('"');
break;
case '\\':
parseBufferPushBackChar('\\');
break;
case '/':
parseBufferPushBackChar('/');
break;
case 'u':
parseBufferPushBackChar('\\');
parseBufferPushBackChar('u');
break;
default:
break;
}
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
void Parser::addCharToParseBuffer(int nextChar, int nextClass)
{
if (_escaped)
{
addEscapedCharToParseBuffer(nextChar);
return;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else if (!_comment)
{
2013-05-20 20:37:13 +02:00
if ((_type != JSON_T_NONE) ||
!((nextClass == C_SPACE) || (nextClass == C_WHITE)))
{
2013-05-20 20:37:13 +02:00
parseBufferPushBackChar((char) nextChar);
}
}
2013-05-20 20:37:13 +02:00
}
2012-11-11 09:57:01 +01:00
2013-05-20 23:36:58 -05:00
bool Parser::parseChar(int nextChar, Source& source)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
int nextClass, nextState;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// Determine the character's class.
if (nextChar < 0 || (!_allowNullByte && nextChar == 0)) return false;
if (0x80 <= nextChar && nextChar <= 0xFF)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
nextClass = C_ETC;
int count = utf8_check_first(nextChar);
if (!count)
{
throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) nextChar));
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
char buffer[4];
buffer[0] = nextChar;
for(int i = 1; i < count; ++i)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
int c = 0;
2013-05-20 23:36:58 -05:00
if (!source.nextChar(c)) throw JSONException("Invalid UTF8 character found");
2013-05-20 20:37:13 +02:00
buffer[i] = c;
}
if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
{
throw JSONException("No legal UTF8 found");
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
for(int i = 0; i < count; ++i)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
parseBufferPushBackChar(buffer[i]);
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
return true;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
nextClass = _asciiClass[nextChar];
if (nextClass <= xx) return false;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
addCharToParseBuffer(nextChar, nextClass);
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// Get the next _state from the _state transition table.
nextState = _stateTransitionTable[_state][nextClass];
if (nextState >= 0)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
_state = nextState;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
// Or perform one of the actions.
switch (nextState)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
// Unicode character
case UC:
if(!decodeUnicodeChar()) return false;
// check if we need to read a second UTF-16 char
if (_utf16HighSurrogate) _state = D1;
else _state = ST;
break;
// _escaped char
case EX:
_escaped = 1;
_state = ES;
break;
// integer detected by minus
case MX:
_type = JSON_T_INTEGER;
_state = MI;
break;
// integer detected by zero
case ZX:
_type = JSON_T_INTEGER;
_state = ZE;
break;
// integer detected by 1-9
case IX:
_type = JSON_T_INTEGER;
_state = IT;
break;
// floating point number detected by exponent
case DE:
assertNotStringNullBool();
_type = JSON_T_FLOAT;
_state = E1;
break;
// floating point number detected by fraction
case DF:
assertNotStringNullBool();
_type = JSON_T_FLOAT;
_state = FX;
break;
// string begin "
case SB:
clearBuffer();
poco_assert(_type == JSON_T_NONE);
_type = JSON_T_STRING;
_state = ST;
break;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// n
case NU:
poco_assert(_type == JSON_T_NONE);
_type = JSON_T_NULL;
_state = N1;
break;
// f
case FA:
poco_assert(_type == JSON_T_NONE);
_type = JSON_T_FALSE;
_state = F1;
break;
// t
case TR:
poco_assert(_type == JSON_T_NONE);
_type = JSON_T_TRUE;
_state = T1;
break;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// closing comment
case CE:
_comment = 0;
poco_assert(_parseBufferCount == 0);
poco_assert(_type == JSON_T_NONE);
_state = _beforeCommentState;
break;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// opening comment
case CB:
if (!_allowComments) return false;
parseBufferPopBackChar();
parseBuffer();
poco_assert(_parseBufferCount == 0);
poco_assert(_type != JSON_T_STRING);
switch (_stack[_top])
{
case MODE_ARRAY:
case MODE_OBJECT:
switch(_state)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
case VA:
case AR:
_beforeCommentState = _state;
break;
default:
_beforeCommentState = OK;
break;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
break;
default:
_beforeCommentState = _state;
break;
}
_type = JSON_T_NONE;
_state = C1;
_comment = 1;
break;
// empty }
case -9:
{
clearBuffer();
if (_pHandler) _pHandler->endObject();
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
if (!pop(MODE_KEY)) return false;
_state = OK;
break;
}
// }
case -8:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
parseBufferPopBackChar();
parseBuffer();
if (_pHandler) _pHandler->endObject();
if (!pop(MODE_OBJECT)) return false;
_type = JSON_T_NONE;
_state = OK;
break;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
// ]
case -7:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
parseBufferPopBackChar();
parseBuffer();
if (_pHandler) _pHandler->endArray();
if (!pop(MODE_ARRAY)) return false;
_type = JSON_T_NONE;
_state = OK;
break;
}
// {
case -6:
{
parseBufferPopBackChar();
if (_pHandler) _pHandler->startObject();
if (!push(MODE_KEY)) return false;
poco_assert(_type == JSON_T_NONE);
_state = OB;
break;
}
// [
case -5:
{
parseBufferPopBackChar();
if (_pHandler) _pHandler->startArray();
if (!push(MODE_ARRAY)) return false;
poco_assert(_type == JSON_T_NONE);
_state = AR;
break;
}
// string end "
case -4:
parseBufferPopBackChar();
switch (_stack[_top])
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
case MODE_KEY:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
poco_assert(_type == JSON_T_STRING);
_type = JSON_T_NONE;
_state = CO;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
if (_pHandler)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
std::string value(_parseBuffer.begin(), _parseBufferCount);
_pHandler->key(value);
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
clearBuffer();
2012-11-11 09:57:01 +01:00
break;
}
2013-05-20 20:37:13 +02:00
case MODE_ARRAY:
case MODE_OBJECT:
poco_assert(_type == JSON_T_STRING);
parseBuffer();
_type = JSON_T_NONE;
_state = OK;
2012-11-11 09:57:01 +01:00
break;
default:
2013-05-20 20:37:13 +02:00
return false;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
break;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// ,
case -3:
{
parseBufferPopBackChar();
parseBuffer();
switch (_stack[_top])
{
case MODE_OBJECT:
//A comma causes a flip from object mode to key mode.
if (!pop(MODE_OBJECT) || !push(MODE_KEY)) return false;
poco_assert(_type != JSON_T_STRING);
_type = JSON_T_NONE;
_state = KE;
break;
case MODE_ARRAY:
poco_assert(_type != JSON_T_STRING);
_type = JSON_T_NONE;
_state = VA;
break;
default:
return false;
}
break;
}
// :
case -2:
// A colon causes a flip from key mode to object mode.
parseBufferPopBackChar();
if (!pop(MODE_KEY) || !push(MODE_OBJECT)) return false;
poco_assert(_type == JSON_T_NONE);
_state = VA;
break;
//Bad action.
default:
return false;
2012-11-11 09:57:01 +01:00
}
}
2013-05-20 20:37:13 +02:00
return true;
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
int Parser::decodeUnicodeChar()
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
int i;
unsigned uc = 0;
char* p;
int trail_bytes;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
poco_assert(_parseBufferCount >= 6);
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
p = &_parseBuffer[_parseBufferCount - 4];
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
for (i = 12; i >= 0; i -= 4, ++p) {
unsigned x = *p;
if (x >= 'a') {
x -= ('a' - 10);
} else if (x >= 'A') {
x -= ('A' - 10);
} else {
x &= ~0x30u;
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
poco_assert(x < 16);
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
uc |= x << i;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
if ( !_allowNullByte && uc == 0 ) return 0; // Null byte not allowed
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// clear UTF-16 char from buffer
_parseBufferCount -= 6;
_parseBuffer[_parseBufferCount] = 0;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
// attempt decoding
if (_utf16HighSurrogate) {
if (IS_LOW_SURROGATE(uc)) {
uc = DECODE_SURROGATE_PAIR(_utf16HighSurrogate, uc);
trail_bytes = 3;
_utf16HighSurrogate = 0;
} else {
// high surrogate without a following low surrogate
return 0;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
} else {
if (uc < 0x80) {
trail_bytes = 0;
} else if (uc < 0x800) {
trail_bytes = 1;
} else if (IS_HIGH_SURROGATE(uc)) {
// save the high surrogate and wait for the low surrogate
_utf16HighSurrogate = uc;
return 1;
} else if (IS_LOW_SURROGATE(uc)) {
// low surrogate without a preceding high surrogate
return 0;
} else {
trail_bytes = 2;
2012-11-11 09:57:01 +01:00
}
}
2013-05-20 20:37:13 +02:00
_parseBuffer[_parseBufferCount++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
_parseBuffer[_parseBufferCount++] = (char) (((uc >> i) & 0x3F) | 0x80);
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
_parseBuffer[_parseBufferCount] = 0;
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
return 1;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
void Parser::parseBuffer()
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
if (_pHandler)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
int type = _type; // just to silence g++
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
if (type != JSON_T_NONE)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
assertNonContainer();
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
switch(type)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
case JSON_T_TRUE:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
_pHandler->value(true);
break;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
case JSON_T_FALSE:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
_pHandler->value(false);
break;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
case JSON_T_NULL:
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
_pHandler->null();
break;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
case JSON_T_FLOAT:
{
// Float can't end with a dot
if ( _parseBuffer[_parseBufferCount-1] == '.' ) throw SyntaxException("JSON syntax error");
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
double float_value = NumberParser::parseFloat(_parseBuffer.begin());
_pHandler->value(float_value);
break;
}
2013-05-20 20:37:13 +02:00
case JSON_T_INTEGER:
{
2013-05-20 20:37:13 +02:00
#if defined(POCO_HAVE_INT64)
try
{
Int64 value = NumberParser::parse64(_parseBuffer.begin());
// if number is 32-bit, then handle as such
2013-05-20 23:36:58 -05:00
if (value > std::numeric_limits<int>::max()
|| value < std::numeric_limits<int>::min() )
2013-05-20 20:37:13 +02:00
{
_pHandler->value(value);
}
else
{
_pHandler->value(static_cast<int>(value));
}
}
// try to handle error as unsigned in case of overflow
catch ( const SyntaxException& )
{
UInt64 value = NumberParser::parseUnsigned64(_parseBuffer.begin());
// if number is 32-bit, then handle as such
if ( value > std::numeric_limits<unsigned>::max() )
{
_pHandler->value(value);
}
else
{
_pHandler->value(static_cast<unsigned>(value));
}
}
#else
try
{
int value = NumberParser::parse(_parseBuffer.begin());
_pHandler->value(value);
}
// try to handle error as unsigned in case of overflow
catch ( const SyntaxException& )
{
unsigned value = NumberParser::parseUnsigned(_parseBuffer.begin());
_pHandler->value(value);
}
#endif
}
2013-05-20 20:37:13 +02:00
break;
case JSON_T_STRING:
{
2013-05-20 20:37:13 +02:00
std::string str(_parseBuffer.begin(), _parseBufferCount);
_pHandler->value(str);
break;
}
}
2012-11-11 09:57:01 +01:00
}
}
2013-05-20 20:37:13 +02:00
clearBuffer();
}
2012-11-11 09:57:01 +01:00
2013-05-20 20:37:13 +02:00
int Parser::utf8_check_first(char byte)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
unsigned char u = (unsigned char) byte;
if(u < 0x80)
return 1;
if (0x80 <= u && u <= 0xBF)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
/* second, third or fourth byte of a multi-byte
sequence, i.e. a "continuation byte" */
return 0;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else if(u == 0xC0 || u == 0xC1)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
/* overlong encoding of an ASCII byte */
return 0;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else if(0xC2 <= u && u <= 0xDF)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
/* 2-byte sequence */
return 2;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else if(0xE0 <= u && u <= 0xEF)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
/* 3-byte sequence */
return 3;
2012-11-11 09:57:01 +01:00
}
2013-05-20 20:37:13 +02:00
else if(0xF0 <= u && u <= 0xF4)
2012-11-11 09:57:01 +01:00
{
2013-05-20 20:37:13 +02:00
/* 4-byte sequence */
return 4;
}
else
{
/* u >= 0xF5 */
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
UTF-8 */
return 0;
2012-11-11 09:57:01 +01:00
}
}
} } // namespace Poco::JSON