poco/JSON/src/Parser.cpp

709 lines
13 KiB
C++
Raw Normal View History

2012-11-11 09:57:01 +01:00
//
// Parser.cpp
//
// $Id$
//
// Library: JSON
// Package: JSON
// Module: Parser
//
// Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:
//
// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
#include "Poco/JSON/Parser.h"
#include "Poco/JSON/JSONException.h"
#include "Poco/Ascii.h"
#include "Poco/Token.h"
#include "Poco/UnicodeConverter.h"
2012-11-11 09:57:01 +01:00
#undef min
#undef max
#include <limits>
namespace Poco {
namespace JSON {
class SeparatorToken: public Token
{
public:
SeparatorToken()
{
}
virtual ~SeparatorToken()
{
}
Class tokenClass() const
{
return Token::SEPARATOR_TOKEN;
}
bool start(char c, std::istream& istr)
{
if ( c == '{'
|| c == '}'
|| c == ']'
|| c == '['
|| c == ','
|| c == ':' )
{
_value = c;
return true;
}
if ( c == '\'' )
{
throw JSONException("Invalid quote found");
}
else return false;
}
void finish(std::istream& istr)
{
}
};
class StringToken: public Token
{
public:
StringToken()
{
}
virtual ~StringToken()
{
}
Class tokenClass() const
{
return Token::STRING_LITERAL_TOKEN;
}
bool start(char c, std::istream& istr)
{
if ( c == '"')
{
_value = ""; // We don't need the quote!
return true;
}
else return false;
}
void finish(std::istream& istr)
{
int c = istr.get();
while (c != -1)
{
if ( c == 0 )
{
throw JSONException("Null byte not allowed");
}
if ( 0 < c && c <= 0x1F )
{
throw JSONException(format("Control character 0x%x not allowed", (unsigned int) c));
}
if ( c == '"' )
break;
if ( c == '\\' ) // Escaped String
{
c = istr.get();
switch(c)
{
case '"' :
_value += '"';
2012-11-11 09:57:01 +01:00
break;
case '\\' :
_value += '\\';
2012-11-11 09:57:01 +01:00
break;
case '/' :
_value += '/';
2012-11-11 09:57:01 +01:00
break;
case 'b' :
_value += '\b';
2012-11-11 09:57:01 +01:00
break;
case 'f' :
_value += '\f';
2012-11-11 09:57:01 +01:00
break;
case 'n' :
_value += '\n';
2012-11-11 09:57:01 +01:00
break;
case 'r' :
_value += '\r';
2012-11-11 09:57:01 +01:00
break;
case 't' :
_value += '\t';
2012-11-11 09:57:01 +01:00
break;
case 'u' : // Unicode
{
Poco::Int32 unicode = decodeUnicode(istr);
if ( unicode == 0 )
{
throw JSONException("\\u0000 is not allowed");
}
if ( unicode >= 0xD800 && unicode <= 0xDBFF )
{
c = istr.get();
if ( c != '\\' )
{
throw JSONException("Invalid unicode surrogate pair");
}
c = istr.get();
if ( c != 'u' )
{
throw JSONException("Invalid unicode surrogate pair");
}
Poco::Int32 surrogatePair = decodeUnicode(istr);
if ( 0xDC00 <= surrogatePair && surrogatePair <= 0xDFFF )
{
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
}
else
{
throw JSONException("Invalid unicode surrogate pair");
}
}
else if ( 0xDC00 <= unicode && unicode <= 0xDFFF )
{
throw JSONException("Invalid unicode");
}
//unicode to utf8
std::string utf8;
UnicodeConverter::toUTF8((const UTF32Char*)&unicode,1,utf8);
_value += utf8;
2012-11-11 09:57:01 +01:00
break;
}
default:
{
throw JSONException(format("Invalid escape '%c' character used", (char) c));
}
}
}else{
_value += c;
2012-11-11 09:57:01 +01:00
}
c = istr.get();
}
if ( c == -1 )
{
throw JSONException("Unterminated string found");
}
}
Poco::Int32 decodeUnicode(std::istream& istr)
{
Poco::Int32 value = 0;
for(int i = 0; i < 4; i++)
{
value <<= 4;
int nc = istr.peek();
if ( nc == -1 )
{
throw JSONException("Invalid unicode sequence");
}
istr.get(); // No EOF, so read the character
if (nc >= '0' && nc <= '9')
value += nc - '0';
else if (nc >= 'A' && nc <= 'F')
value += 10 + nc - 'A';
else if (nc >= 'a' && nc <= 'f')
value += 10 + nc - 'a';
else
throw JSONException("Invalid unicode sequence. Hexadecimal digit expected");
}
return value;
}
};
class KeywordToken : public Token
{
public:
KeywordToken()
{
}
virtual ~KeywordToken()
{
}
Class tokenClass() const
{
return Token::KEYWORD_TOKEN;
}
bool start(char c, std::istream& istr)
{
if ( Ascii::isAlpha(c) )
{
_value = c;
return true;
}
return false;
}
void finish(std::istream& istr)
{
int c = istr.peek();
while (c != -1 && Ascii::isAlpha(c) )
{
istr.get();
_value += c;
c = istr.peek();
}
}
};
class NumberToken: public Token
{
public:
NumberToken() : _activeClass(INTEGER_LITERAL_TOKEN)
{
}
virtual ~NumberToken()
{
}
Class tokenClass() const
{
return _activeClass;
}
bool start(char c, std::istream& istr)
{
// Reset the active class to integer
_activeClass = INTEGER_LITERAL_TOKEN;
if ( c == -1 )
return false;
if ( Ascii::isDigit(c) )
{
if ( c == '0' )
{
int nc = istr.peek();
if ( Ascii::isDigit(nc) ) // A digit after a zero is not allowed
{
throw JSONException("Number can't start with a zero");
}
}
_value = c;
return true;
}
if ( c == '-' )
{
_value = c;
int nc = istr.peek();
if ( Ascii::isDigit(nc) )
{
if ( nc == '0' )
{
_value += '0';
istr.get();
nc = istr.peek();
if ( Ascii::isDigit(nc) ) // A digit after -0 is not allowed
{
throw JSONException("Number can't start with a zero");
}
}
return true;
}
}
return false;
}
void finish(std::istream& istr)
{
int c;
while( (c = istr.peek()) != -1)
{
if ( Ascii::isDigit(c) )
{
_value += c;
istr.get();
}
else
{
switch(c)
{
case '.': // Float
{
if ( _activeClass == Token::FLOAT_LITERAL_TOKEN )
{
throw JSONException("Invalid float value");
}
_activeClass = Token::FLOAT_LITERAL_TOKEN;
_value += c;
istr.get();
// After a . we need a digit
c = istr.peek();
if ( ! Ascii::isDigit(c) )
{
throw JSONException("Invalid float value");
}
break;
}
case 'E':
case 'e':
{
if ( _activeClass == Token::DOUBLE_LITERAL_TOKEN )
{
throw JSONException("Invalid double value");
}
_activeClass = Token::DOUBLE_LITERAL_TOKEN;
// Add the e or E
_value += c;
istr.get();
// When the next char is - or + then read the next char
c = istr.peek();
if ( c == '-' || c == '+' )
{
_value += c;
istr.get();
c = istr.peek();
}
if ( ! Ascii::isDigit(c) )
{
throw JSONException("Invalid double value");
}
break;
}
default:
return; // End of number token
}
istr.get(); // If we get here we have a valid character for a number
_value += c;
}
}
}
private:
Class _activeClass;
};
Parser::Parser() : _tokenizer(), _handler(NULL)
{
_tokenizer.addToken(new WhitespaceToken());
_tokenizer.addToken(new InvalidToken());
_tokenizer.addToken(new SeparatorToken());
_tokenizer.addToken(new StringToken());
_tokenizer.addToken(new NumberToken());
_tokenizer.addToken(new KeywordToken());
}
Parser::~Parser()
{
}
const Token* Parser::nextToken()
{
const Token* token = _tokenizer.next();
if ( token->is(Token::EOF_TOKEN) )
{
throw JSONException("Unexpected EOF found");
}
return token;
}
void Parser::parse(std::istream& in)
{
_tokenizer.attachToStream(in);
const Token* token = nextToken();
if ( token->is(Token::SEPARATOR_TOKEN) )
{
// This must be a { or a [
if ( token->asChar() == '{' )
{
readObject();
}
else if ( token->asChar() == '[' )
{
readArray();
}
else
{
throw JSONException(format("Invalid separator '%c' found. Expecting { or [", token->asChar()));
}
token = _tokenizer.next();
if ( ! token->is(Token::EOF_TOKEN) )
{
throw JSONException(format("EOF expected but found '%s'", token->asString()));
}
}
else
{
throw JSONException(format("Invalid token '%s' found. Expecting { or [", token->asString()));
}
}
void Parser::readObject()
{
if ( _handler != NULL )
{
_handler->startObject();
}
if ( readRow(true) ) // First call is special: check for empty object
{
while(readRow());
}
if ( _handler != NULL )
{
_handler->endObject();
}
}
bool Parser::readRow(bool firstCall)
{
const Token* token = nextToken();
if ( firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}' )
{
return false; // End of object is possible for an empty object
}
if ( token->tokenClass() == Token::STRING_LITERAL_TOKEN )
{
std::string propertyName = token->tokenString();
if ( _handler != NULL )
{
_handler->key(propertyName);
}
token = nextToken();
if ( token->is(Token::SEPARATOR_TOKEN)
&& token->asChar() == ':' )
{
readValue(nextToken());
token = nextToken();
if ( token->is(Token::SEPARATOR_TOKEN) )
{
if ( token->asChar() == ',' )
{
return true; // Read next row
}
else if ( token->asChar() == '}' )
{
return false; // End of object
}
else
{
throw JSONException(format("Invalid separator '%c' found. Expecting , or }", token->asChar()));
}
}
else
{
throw JSONException(format("Invalid token '%s' found. Expecting , or }", token->asString()));
}
}
else
{
throw JSONException(format("Invalid token '%s' found. Expecting :", token->asString()));
}
}
else
{
throw JSONException(format("Invalid token '%s' found. Expecting key", token->asString()));
}
}
void Parser::readValue(const Token* token)
{
switch(token->tokenClass())
{
default:
case Token::IDENTIFIER_TOKEN:
case Token::OPERATOR_TOKEN:
case Token::CHAR_LITERAL_TOKEN:
break;
case Token::INTEGER_LITERAL_TOKEN:
if ( _handler != NULL )
{
#if defined(POCO_HAVE_INT64)
Int64 value = token->asInteger64();
// if number is 32-bit, then handle as such
if ( value > std::numeric_limits<int>::max()
|| value < std::numeric_limits<int>::min() )
{
_handler->value(value);
}
else
{
_handler->value(static_cast<int>(value));
}
2012-11-11 09:57:01 +01:00
#else
int value = token->asInteger();
2012-11-11 09:57:01 +01:00
_handle->value(value);
#endif
}
break;
case Token::KEYWORD_TOKEN:
{
if ( token->tokenString().compare("null") == 0 )
{
if ( _handler != NULL )
{
_handler->null();
}
}
else if ( token->tokenString().compare("true") == 0 )
{
if ( _handler != NULL )
{
_handler->value(true);
}
}
else if ( token->tokenString().compare("false") == 0 )
{
if ( _handler != NULL )
{
_handler->value(false);
}
}
else
{
throw JSONException(format("Invalid keyword '%s' found", token->asString()));
}
break;
}
case Token::FLOAT_LITERAL_TOKEN:
// Fall through
case Token::DOUBLE_LITERAL_TOKEN:
if ( _handler != NULL )
{
_handler->value(token->asFloat());
}
break;
case Token::STRING_LITERAL_TOKEN:
if ( _handler != NULL )
{
_handler->value(token->tokenString());
}
break;
case Token::SEPARATOR_TOKEN:
{
if ( token->asChar() == '{' )
{
readObject();
}
else if ( token->asChar() == '[' )
{
readArray();
}
break;
}
}
}
void Parser::readArray()
{
if ( _handler != NULL )
{
_handler->startArray();
}
if ( readElements(true) ) // First call is special: check for empty array
{
while(readElements());
}
if ( _handler != NULL )
{
_handler->endArray();
}
}
bool Parser::readElements(bool firstCall)
{
const Token* token = nextToken();
if ( firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']' )
{
// End of array is possible for an empty array
return false;
}
readValue(token);
token = nextToken();
if ( token->is(Token::SEPARATOR_TOKEN) )
{
if ( token->asChar() == ']' )
return false; // End of array
if ( token->asChar() == ',' )
return true;
throw JSONException(format("Invalid separator '%c' found. Expecting , or ]", token->asChar()));
}
throw JSONException(format("Invalid token '%s' found.", token->asString()));
}
} } // namespace Poco::JSON