mirror of
https://github.com/pocoproject/poco.git
synced 2025-01-02 06:35:05 +01:00
42ff341cb9
- fixed GH #119: JSON::Object holds values in ordered map - added PrintHandler - renamed DefaultHandler to ParseHandler - redefined DefaultHandler as typedef to ParseHandler
795 lines
15 KiB
C++
795 lines
15 KiB
C++
//
|
|
// Parser.cpp
|
|
//
|
|
// $Id$
|
|
//
|
|
// Library: JSON
|
|
// Package: JSON
|
|
// Module: Parser
|
|
//
|
|
// Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
|
|
// and Contributors.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person or organization
|
|
// obtaining a copy of the software and accompanying documentation covered by
|
|
// this license (the "Software") to use, reproduce, display, distribute,
|
|
// execute, and transmit the Software, and to prepare derivative works of the
|
|
// Software, and to permit third-parties to whom the Software is furnished to
|
|
// do so, all subject to the following:
|
|
//
|
|
// The copyright notices in the Software and this entire statement, including
|
|
// the above license grant, this restriction and the following disclaimer,
|
|
// must be included in all copies of the Software, in whole or in part, and
|
|
// all derivative works of the Software, unless such copies or derivative
|
|
// works are solely in the form of machine-executable object code generated by
|
|
// a source language processor.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS IN THE SOFTWARE.
|
|
//
|
|
|
|
|
|
#include "Poco/JSON/Parser.h"
|
|
#include "Poco/JSON/JSONException.h"
|
|
#include "Poco/Ascii.h"
|
|
#include "Poco/Token.h"
|
|
#include "Poco/UTF8Encoding.h"
|
|
#undef min
|
|
#undef max
|
|
#include <limits>
|
|
|
|
|
|
namespace Poco {
|
|
namespace JSON {
|
|
|
|
|
|
class SeparatorToken: public Token
|
|
{
|
|
public:
|
|
SeparatorToken()
|
|
{
|
|
}
|
|
|
|
virtual ~SeparatorToken()
|
|
{
|
|
}
|
|
|
|
Class tokenClass() const
|
|
{
|
|
return Token::SEPARATOR_TOKEN;
|
|
}
|
|
|
|
bool start(char c, std::istream& istr)
|
|
{
|
|
if (c == '{'
|
|
|| c == '}'
|
|
|| c == ']'
|
|
|| c == '['
|
|
|| c == ','
|
|
|| c == ':')
|
|
{
|
|
_value = c;
|
|
return true;
|
|
}
|
|
|
|
if ( c == '\'' )
|
|
{
|
|
throw JSONException("Invalid quote found");
|
|
}
|
|
|
|
else return false;
|
|
}
|
|
|
|
void finish(std::istream& istr)
|
|
{
|
|
}
|
|
};
|
|
|
|
|
|
class StringToken: public Token
|
|
{
|
|
public:
|
|
StringToken()
|
|
{
|
|
}
|
|
|
|
virtual ~StringToken()
|
|
{
|
|
}
|
|
|
|
Class tokenClass() const
|
|
{
|
|
return Token::STRING_LITERAL_TOKEN;
|
|
}
|
|
|
|
bool start(char c, std::istream& istr)
|
|
{
|
|
if (c == '"')
|
|
{
|
|
_value = ""; // We don't need the quote!
|
|
return true;
|
|
}
|
|
else return false;
|
|
}
|
|
|
|
void finish(std::istream& istr)
|
|
{
|
|
int c = 0;
|
|
while ((c = istr.get()) != -1)
|
|
{
|
|
if (c == 0)
|
|
{
|
|
throw JSONException("Null byte not allowed");
|
|
}
|
|
|
|
if ( 0 < c && c <= 0x1F )
|
|
{
|
|
throw JSONException(format("Control character 0x%x not allowed", (unsigned int) c));
|
|
}
|
|
|
|
if (c == '"')
|
|
break;
|
|
|
|
if(0x80 <= c && c <= 0xFF)
|
|
{
|
|
int count = utf8_check_first(c);
|
|
if (!count)
|
|
{
|
|
throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c));
|
|
}
|
|
|
|
char buffer[5];
|
|
buffer[0] = c;
|
|
for(int i = 1; i < count; ++i)
|
|
{
|
|
buffer[i] = istr.get();
|
|
}
|
|
|
|
if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
|
|
{
|
|
throw JSONException("No legal UTF8 found");
|
|
}
|
|
buffer[count] = '\0';
|
|
_value += buffer;
|
|
|
|
continue;
|
|
}
|
|
|
|
if (c == '\\') // Escaped String
|
|
{
|
|
c = istr.get();
|
|
switch(c)
|
|
{
|
|
case '"' : c = '"'; break;
|
|
case '\\' : c = '\\'; break;
|
|
case '/' : c = '/'; break;
|
|
case 'b' : c = '\b'; break;
|
|
case 'f' : c = '\f'; break;
|
|
case 'n' : c = '\n'; break;
|
|
case 'r' : c = '\r'; break;
|
|
case 't' : c = '\t'; break;
|
|
case 'u' : // Unicode
|
|
{
|
|
Poco::Int32 unicode = decodeUnicode(istr);
|
|
if ( unicode == 0 )
|
|
{
|
|
throw JSONException("\\u0000 is not allowed");
|
|
}
|
|
if ( unicode >= 0xD800 && unicode <= 0xDBFF )
|
|
{
|
|
c = istr.get();
|
|
if ( c != '\\' )
|
|
{
|
|
throw JSONException("Invalid unicode surrogate pair");
|
|
}
|
|
c = istr.get();
|
|
if ( c != 'u' )
|
|
{
|
|
throw JSONException("Invalid unicode surrogate pair");
|
|
}
|
|
Poco::Int32 surrogatePair = decodeUnicode(istr);
|
|
if ( 0xDC00 <= surrogatePair && surrogatePair <= 0xDFFF )
|
|
{
|
|
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
|
|
}
|
|
else
|
|
{
|
|
throw JSONException("Invalid unicode surrogate pair");
|
|
}
|
|
}
|
|
else if ( 0xDC00 <= unicode && unicode <= 0xDFFF )
|
|
{
|
|
throw JSONException("Invalid unicode");
|
|
}
|
|
|
|
Poco::UTF8Encoding utf8encoding;
|
|
int length = utf8encoding.convert(unicode, NULL, 0);
|
|
std::vector<unsigned char> convert(length);
|
|
utf8encoding.convert(unicode, &convert[0], length);
|
|
for(int i = 0; i < length; ++i)
|
|
{
|
|
_value += (char) convert[i];
|
|
}
|
|
continue;
|
|
}
|
|
default:
|
|
{
|
|
throw JSONException(format("Invalid escape '%c' character used", (char) c));
|
|
}
|
|
}
|
|
}
|
|
_value += c;
|
|
}
|
|
|
|
if ( c == -1 )
|
|
{
|
|
throw JSONException("Unterminated string found");
|
|
}
|
|
}
|
|
|
|
Poco::Int32 decodeUnicode(std::istream& istr)
|
|
{
|
|
Poco::Int32 value = 0;
|
|
|
|
for(int i = 0; i < 4; i++)
|
|
{
|
|
value <<= 4;
|
|
int nc = istr.peek();
|
|
if ( nc == -1 )
|
|
{
|
|
throw JSONException("Invalid unicode sequence");
|
|
}
|
|
istr.get(); // No EOF, so read the character
|
|
|
|
if (nc >= '0' && nc <= '9')
|
|
value += nc - '0';
|
|
else if (nc >= 'A' && nc <= 'F')
|
|
value += 10 + nc - 'A';
|
|
else if (nc >= 'a' && nc <= 'f')
|
|
value += 10 + nc - 'a';
|
|
else
|
|
throw JSONException("Invalid unicode sequence. Hexadecimal digit expected");
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
private:
|
|
int utf8_check_first(char byte)
|
|
{
|
|
unsigned char u = (unsigned char) byte;
|
|
|
|
if(u < 0x80)
|
|
return 1;
|
|
|
|
if (0x80 <= u && u <= 0xBF)
|
|
{
|
|
/* second, third or fourth byte of a multi-byte
|
|
sequence, i.e. a "continuation byte" */
|
|
return 0;
|
|
}
|
|
else if(u == 0xC0 || u == 0xC1)
|
|
{
|
|
/* overlong encoding of an ASCII byte */
|
|
return 0;
|
|
}
|
|
else if(0xC2 <= u && u <= 0xDF)
|
|
{
|
|
/* 2-byte sequence */
|
|
return 2;
|
|
}
|
|
else if(0xE0 <= u && u <= 0xEF)
|
|
{
|
|
/* 3-byte sequence */
|
|
return 3;
|
|
}
|
|
else if(0xF0 <= u && u <= 0xF4)
|
|
{
|
|
/* 4-byte sequence */
|
|
return 4;
|
|
}
|
|
else
|
|
{
|
|
/* u >= 0xF5 */
|
|
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
|
|
UTF-8 */
|
|
return 0;
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
class KeywordToken : public Token
|
|
{
|
|
public:
|
|
KeywordToken()
|
|
{
|
|
}
|
|
|
|
virtual ~KeywordToken()
|
|
{
|
|
}
|
|
|
|
Class tokenClass() const
|
|
{
|
|
return Token::KEYWORD_TOKEN;
|
|
}
|
|
|
|
bool start(char c, std::istream& istr)
|
|
{
|
|
if ( Ascii::isAlpha(c) )
|
|
{
|
|
_value = c;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void finish(std::istream& istr)
|
|
{
|
|
int c = istr.peek();
|
|
while (c != -1 && Ascii::isAlpha(c) )
|
|
{
|
|
istr.get();
|
|
_value += c;
|
|
c = istr.peek();
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
class NumberToken: public Token
|
|
{
|
|
public:
|
|
NumberToken() : _activeClass(INTEGER_LITERAL_TOKEN)
|
|
{
|
|
}
|
|
|
|
virtual ~NumberToken()
|
|
{
|
|
}
|
|
|
|
Class tokenClass() const
|
|
{
|
|
return _activeClass;
|
|
}
|
|
|
|
bool start(char c, std::istream& istr)
|
|
{
|
|
// Reset the active class to integer
|
|
_activeClass = INTEGER_LITERAL_TOKEN;
|
|
|
|
if ( c == -1 )
|
|
return false;
|
|
|
|
if (Ascii::isDigit(c))
|
|
{
|
|
if ( c == '0' )
|
|
{
|
|
int nc = istr.peek();
|
|
if ( Ascii::isDigit(nc) ) // A digit after a zero is not allowed
|
|
{
|
|
throw JSONException("Number can't start with a zero");
|
|
}
|
|
}
|
|
_value = c;
|
|
return true;
|
|
}
|
|
|
|
if (c == '-')
|
|
{
|
|
_value = c;
|
|
|
|
int nc = istr.peek();
|
|
if (Ascii::isDigit(nc))
|
|
{
|
|
if (nc == '0')
|
|
{
|
|
_value += '0';
|
|
istr.get();
|
|
|
|
nc = istr.peek();
|
|
if ( Ascii::isDigit(nc) ) // A digit after -0 is not allowed
|
|
{
|
|
throw JSONException("Number can't start with a zero");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void finish(std::istream& istr)
|
|
{
|
|
int c;
|
|
while( (c = istr.peek()) != -1)
|
|
{
|
|
if (Ascii::isDigit(c))
|
|
{
|
|
_value += c;
|
|
istr.get();
|
|
}
|
|
else
|
|
{
|
|
switch(c)
|
|
{
|
|
case '.': // Float
|
|
{
|
|
if (_activeClass == Token::FLOAT_LITERAL_TOKEN)
|
|
{
|
|
throw JSONException("Invalid float value");
|
|
}
|
|
_activeClass = Token::FLOAT_LITERAL_TOKEN;
|
|
|
|
_value += c;
|
|
istr.get();
|
|
|
|
// After a . we need a digit
|
|
c = istr.peek();
|
|
if ( ! Ascii::isDigit(c) )
|
|
{
|
|
throw JSONException("Invalid float value");
|
|
}
|
|
|
|
break;
|
|
}
|
|
case 'E':
|
|
case 'e':
|
|
{
|
|
if (_activeClass == Token::DOUBLE_LITERAL_TOKEN)
|
|
{
|
|
throw JSONException("Invalid double value");
|
|
}
|
|
_activeClass = Token::DOUBLE_LITERAL_TOKEN;
|
|
|
|
// Add the e or E
|
|
_value += c;
|
|
istr.get();
|
|
|
|
// When the next char is - or + then read the next char
|
|
c = istr.peek();
|
|
if (c == '-' || c == '+')
|
|
{
|
|
_value += c;
|
|
istr.get();
|
|
c = istr.peek();
|
|
}
|
|
|
|
if (! Ascii::isDigit(c))
|
|
{
|
|
throw JSONException("Invalid double value");
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
return; // End of number token
|
|
}
|
|
|
|
istr.get(); // If we get here we have a valid character for a number
|
|
_value += c;
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
Class _activeClass;
|
|
};
|
|
|
|
|
|
Parser::Parser() : _tokenizer(), _handler(NULL)
|
|
{
|
|
_tokenizer.addToken(new WhitespaceToken());
|
|
_tokenizer.addToken(new InvalidToken());
|
|
_tokenizer.addToken(new SeparatorToken());
|
|
_tokenizer.addToken(new StringToken());
|
|
_tokenizer.addToken(new NumberToken());
|
|
_tokenizer.addToken(new KeywordToken());
|
|
}
|
|
|
|
|
|
Parser::~Parser()
|
|
{
|
|
|
|
}
|
|
|
|
|
|
const Token* Parser::nextToken()
|
|
{
|
|
const Token* token = _tokenizer.next();
|
|
if (token->is(Token::EOF_TOKEN))
|
|
{
|
|
throw JSONException("Unexpected EOF found");
|
|
}
|
|
return token;
|
|
}
|
|
|
|
|
|
void Parser::parse(std::istream& in)
|
|
{
|
|
_tokenizer.attachToStream(in);
|
|
const Token* token = nextToken();
|
|
|
|
if (token->is(Token::SEPARATOR_TOKEN))
|
|
{
|
|
// This must be a { or a [
|
|
if (token->asChar() == '{')
|
|
{
|
|
readObject();
|
|
}
|
|
else if (token->asChar() == '[')
|
|
{
|
|
readArray();
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid separator '%c' found. Expecting { or [", token->asChar()));
|
|
}
|
|
token = _tokenizer.next();
|
|
if (! token->is(Token::EOF_TOKEN))
|
|
{
|
|
throw JSONException(format("EOF expected but found '%s'", token->asString()));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid token '%s' found. Expecting { or [", token->asString()));
|
|
}
|
|
}
|
|
|
|
|
|
void Parser::readObject()
|
|
{
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->startObject();
|
|
}
|
|
|
|
if ( readRow(true) ) // First call is special: check for empty object
|
|
{
|
|
while(readRow());
|
|
}
|
|
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->endObject();
|
|
}
|
|
}
|
|
|
|
|
|
bool Parser::readRow(bool firstCall)
|
|
{
|
|
const Token* token = nextToken();
|
|
|
|
if (firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}')
|
|
{
|
|
return false; // End of object is possible for an empty object
|
|
}
|
|
|
|
if (token->tokenClass() == Token::STRING_LITERAL_TOKEN)
|
|
{
|
|
std::string propertyName = token->tokenString();
|
|
if ( _handler != NULL )
|
|
{
|
|
_handler->key(propertyName);
|
|
}
|
|
|
|
token = nextToken();
|
|
|
|
if (token->is(Token::SEPARATOR_TOKEN)
|
|
&& token->asChar() == ':')
|
|
{
|
|
readValue(nextToken());
|
|
|
|
token = nextToken();
|
|
|
|
if (token->is(Token::SEPARATOR_TOKEN))
|
|
{
|
|
if (token->asChar() == ',')
|
|
{
|
|
_handler->comma();
|
|
return true; // Read next row
|
|
}
|
|
else if (token->asChar() == '}')
|
|
{
|
|
return false; // End of object
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid separator '%c' found. Expecting , or }", token->asChar()));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid token '%s' found. Expecting , or }", token->asString()));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid token '%s' found. Expecting :", token->asString()));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid token '%s' found. Expecting key", token->asString()));
|
|
}
|
|
}
|
|
|
|
|
|
void Parser::readValue(const Token* token)
|
|
{
|
|
switch(token->tokenClass())
|
|
{
|
|
default:
|
|
case Token::IDENTIFIER_TOKEN:
|
|
case Token::OPERATOR_TOKEN:
|
|
case Token::CHAR_LITERAL_TOKEN:
|
|
break;
|
|
|
|
case Token::INTEGER_LITERAL_TOKEN:
|
|
if (_handler != NULL)
|
|
{
|
|
#if defined(POCO_HAVE_INT64)
|
|
try
|
|
{
|
|
Int64 value = token->asInteger64();
|
|
// if number is 32-bit, then handle as such
|
|
if ( value > std::numeric_limits<int>::max()
|
|
|| value < std::numeric_limits<int>::min() )
|
|
{
|
|
_handler->value(value);
|
|
}
|
|
else
|
|
{
|
|
_handler->value(static_cast<int>(value));
|
|
}
|
|
}
|
|
// try to handle error as unsigned in case of overflow
|
|
catch ( const SyntaxException& )
|
|
{
|
|
UInt64 value = token->asUnsignedInteger64();
|
|
// if number is 32-bit, then handle as such
|
|
if ( value > std::numeric_limits<unsigned>::max() )
|
|
{
|
|
_handler->value(value);
|
|
}
|
|
else
|
|
{
|
|
_handler->value(static_cast<unsigned>(value));
|
|
}
|
|
}
|
|
#else
|
|
try
|
|
{
|
|
int value = token->asInteger();
|
|
_handle->value(value);
|
|
}
|
|
// try to handle error as unsigned in case of overflow
|
|
catch ( const SyntaxException& )
|
|
{
|
|
unsigned value = token->asUnsignedInteger();
|
|
_handle->value(value);
|
|
}
|
|
#endif
|
|
}
|
|
break;
|
|
case Token::KEYWORD_TOKEN:
|
|
{
|
|
if (token->tokenString().compare("null") == 0)
|
|
{
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->null();
|
|
}
|
|
}
|
|
else if (token->tokenString().compare("true") == 0)
|
|
{
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->value(true);
|
|
}
|
|
}
|
|
else if (token->tokenString().compare("false") == 0)
|
|
{
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->value(false);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw JSONException(format("Invalid keyword '%s' found", token->asString()));
|
|
}
|
|
break;
|
|
}
|
|
case Token::FLOAT_LITERAL_TOKEN:
|
|
// Fall through
|
|
case Token::DOUBLE_LITERAL_TOKEN:
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->value(token->asFloat());
|
|
}
|
|
break;
|
|
case Token::STRING_LITERAL_TOKEN:
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->value(token->tokenString());
|
|
}
|
|
break;
|
|
case Token::SEPARATOR_TOKEN:
|
|
{
|
|
if (token->asChar() == '{')
|
|
{
|
|
readObject();
|
|
}
|
|
else if (token->asChar() == '[')
|
|
{
|
|
readArray();
|
|
}
|
|
break;
|
|
}
|
|
case Token::INVALID_TOKEN:
|
|
throw JSONException(format("Invalid token '%s' found", token->asString()));
|
|
}
|
|
}
|
|
|
|
|
|
void Parser::readArray()
|
|
{
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->startArray();
|
|
}
|
|
|
|
if (readElements(true)) // First call is special: check for empty array
|
|
{
|
|
while(readElements());
|
|
}
|
|
|
|
if (_handler != NULL)
|
|
{
|
|
_handler->endArray();
|
|
}
|
|
}
|
|
|
|
|
|
bool Parser::readElements(bool firstCall)
|
|
{
|
|
const Token* token = nextToken();
|
|
|
|
if (firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']')
|
|
{
|
|
// End of array is possible for an empty array
|
|
return false;
|
|
}
|
|
|
|
readValue(token);
|
|
|
|
token = nextToken();
|
|
|
|
if (token->is(Token::SEPARATOR_TOKEN))
|
|
{
|
|
if (token->asChar() == ']')
|
|
return false; // End of array
|
|
|
|
if (token->asChar() == ',')
|
|
{
|
|
_handler->comma();
|
|
return true;
|
|
}
|
|
|
|
throw JSONException(format("Invalid separator '%c' found. Expecting , or ]", token->asChar()));
|
|
}
|
|
|
|
throw JSONException(format("Invalid token '%s' found.", token->asString()));
|
|
}
|
|
|
|
} } // namespace Poco::JSON
|