// // Parser.cpp // // $Id$ // // Library: JSON // Package: JSON // Module: Parser // // Copyright (c) 2012, Applied Informatics Software Engineering GmbH. // and Contributors. // // Permission is hereby granted, free of charge, to any person or organization // obtaining a copy of the software and accompanying documentation covered by // this license (the "Software") to use, reproduce, display, distribute, // execute, and transmit the Software, and to prepare derivative works of the // Software, and to permit third-parties to whom the Software is furnished to // do so, all subject to the following: // // The copyright notices in the Software and this entire statement, including // the above license grant, this restriction and the following disclaimer, // must be included in all copies of the Software, in whole or in part, and // all derivative works of the Software, unless such copies or derivative // works are solely in the form of machine-executable object code generated by // a source language processor. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. // #include "Poco/JSON/Parser.h" #include "Poco/JSON/JSONException.h" #include "Poco/Ascii.h" #include "Poco/Token.h" #include "Poco/UTF8Encoding.h" #undef min #undef max #include namespace Poco { namespace JSON { class SeparatorToken: public Token { public: SeparatorToken() { } virtual ~SeparatorToken() { } Class tokenClass() const { return Token::SEPARATOR_TOKEN; } bool start(char c, std::istream& istr) { if (c == '{' || c == '}' || c == ']' || c == '[' || c == ',' || c == ':') { _value = c; return true; } if ( c == '\'' ) { throw JSONException("Invalid quote found"); } else return false; } void finish(std::istream& istr) { } }; class StringToken: public Token { public: StringToken() { } virtual ~StringToken() { } Class tokenClass() const { return Token::STRING_LITERAL_TOKEN; } bool start(char c, std::istream& istr) { if (c == '"') { _value = ""; // We don't need the quote! return true; } else return false; } void finish(std::istream& istr) { int c = 0; while ((c = istr.get()) != -1) { if (c == 0) { throw JSONException("Null byte not allowed"); } if ( 0 < c && c <= 0x1F ) { throw JSONException(format("Control character 0x%x not allowed", (unsigned int) c)); } if (c == '"') break; if(0x80 <= c && c <= 0xFF) { int count = utf8_check_first(c); if (!count) { throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c)); } char buffer[5]; buffer[0] = c; for(int i = 1; i < count; ++i) { buffer[i] = istr.get(); } if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) ) { throw JSONException("No legal UTF8 found"); } buffer[count] = '\0'; _value += buffer; continue; } if (c == '\\') // Escaped String { c = istr.get(); switch(c) { case '"' : c = '"'; break; case '\\' : c = '\\'; break; case '/' : c = '/'; break; case 'b' : c = '\b'; break; case 'f' : c = '\f'; break; case 'n' : c = '\n'; break; case 'r' : c = '\r'; break; case 't' : c = '\t'; break; case 'u' : // Unicode { Poco::Int32 unicode = decodeUnicode(istr); if ( unicode == 0 ) { throw JSONException("\\u0000 is not allowed"); } if ( unicode >= 0xD800 && unicode <= 0xDBFF ) { c = istr.get(); if ( c != '\\' ) { throw JSONException("Invalid unicode surrogate pair"); } c = istr.get(); if ( c != 'u' ) { throw JSONException("Invalid unicode surrogate pair"); } Poco::Int32 surrogatePair = decodeUnicode(istr); if ( 0xDC00 <= surrogatePair && surrogatePair <= 0xDFFF ) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else { throw JSONException("Invalid unicode surrogate pair"); } } else if ( 0xDC00 <= unicode && unicode <= 0xDFFF ) { throw JSONException("Invalid unicode"); } Poco::UTF8Encoding utf8encoding; int length = utf8encoding.convert(unicode, NULL, 0); std::vector convert(length); utf8encoding.convert(unicode, &convert[0], length); for(int i = 0; i < length; ++i) { _value += (char) convert[i]; } continue; } default: { throw JSONException(format("Invalid escape '%c' character used", (char) c)); } } } _value += c; } if ( c == -1 ) { throw JSONException("Unterminated string found"); } } Poco::Int32 decodeUnicode(std::istream& istr) { Poco::Int32 value = 0; for(int i = 0; i < 4; i++) { value <<= 4; int nc = istr.peek(); if ( nc == -1 ) { throw JSONException("Invalid unicode sequence"); } istr.get(); // No EOF, so read the character if (nc >= '0' && nc <= '9') value += nc - '0'; else if (nc >= 'A' && nc <= 'F') value += 10 + nc - 'A'; else if (nc >= 'a' && nc <= 'f') value += 10 + nc - 'a'; else throw JSONException("Invalid unicode sequence. Hexadecimal digit expected"); } return value; } private: int utf8_check_first(char byte) { unsigned char u = (unsigned char) byte; if(u < 0x80) return 1; if (0x80 <= u && u <= 0xBF) { /* second, third or fourth byte of a multi-byte sequence, i.e. a "continuation byte" */ return 0; } else if(u == 0xC0 || u == 0xC1) { /* overlong encoding of an ASCII byte */ return 0; } else if(0xC2 <= u && u <= 0xDF) { /* 2-byte sequence */ return 2; } else if(0xE0 <= u && u <= 0xEF) { /* 3-byte sequence */ return 3; } else if(0xF0 <= u && u <= 0xF4) { /* 4-byte sequence */ return 4; } else { /* u >= 0xF5 */ /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 */ return 0; } } }; class KeywordToken : public Token { public: KeywordToken() { } virtual ~KeywordToken() { } Class tokenClass() const { return Token::KEYWORD_TOKEN; } bool start(char c, std::istream& istr) { if ( Ascii::isAlpha(c) ) { _value = c; return true; } return false; } void finish(std::istream& istr) { int c = istr.peek(); while (c != -1 && Ascii::isAlpha(c) ) { istr.get(); _value += c; c = istr.peek(); } } }; class NumberToken: public Token { public: NumberToken() : _activeClass(INTEGER_LITERAL_TOKEN) { } virtual ~NumberToken() { } Class tokenClass() const { return _activeClass; } bool start(char c, std::istream& istr) { // Reset the active class to integer _activeClass = INTEGER_LITERAL_TOKEN; if ( c == -1 ) return false; if (Ascii::isDigit(c)) { if ( c == '0' ) { int nc = istr.peek(); if ( Ascii::isDigit(nc) ) // A digit after a zero is not allowed { throw JSONException("Number can't start with a zero"); } } _value = c; return true; } if (c == '-') { _value = c; int nc = istr.peek(); if (Ascii::isDigit(nc)) { if (nc == '0') { _value += '0'; istr.get(); nc = istr.peek(); if ( Ascii::isDigit(nc) ) // A digit after -0 is not allowed { throw JSONException("Number can't start with a zero"); } } return true; } } return false; } void finish(std::istream& istr) { int c; while( (c = istr.peek()) != -1) { if (Ascii::isDigit(c)) { _value += c; istr.get(); } else { switch(c) { case '.': // Float { if (_activeClass == Token::FLOAT_LITERAL_TOKEN) { throw JSONException("Invalid float value"); } _activeClass = Token::FLOAT_LITERAL_TOKEN; _value += c; istr.get(); // After a . we need a digit c = istr.peek(); if ( ! Ascii::isDigit(c) ) { throw JSONException("Invalid float value"); } break; } case 'E': case 'e': { if (_activeClass == Token::DOUBLE_LITERAL_TOKEN) { throw JSONException("Invalid double value"); } _activeClass = Token::DOUBLE_LITERAL_TOKEN; // Add the e or E _value += c; istr.get(); // When the next char is - or + then read the next char c = istr.peek(); if (c == '-' || c == '+') { _value += c; istr.get(); c = istr.peek(); } if (! Ascii::isDigit(c)) { throw JSONException("Invalid double value"); } break; } default: return; // End of number token } istr.get(); // If we get here we have a valid character for a number _value += c; } } } private: Class _activeClass; }; Parser::Parser() : _tokenizer(), _handler(NULL) { _tokenizer.addToken(new WhitespaceToken()); _tokenizer.addToken(new InvalidToken()); _tokenizer.addToken(new SeparatorToken()); _tokenizer.addToken(new StringToken()); _tokenizer.addToken(new NumberToken()); _tokenizer.addToken(new KeywordToken()); } Parser::~Parser() { } const Token* Parser::nextToken() { const Token* token = _tokenizer.next(); if (token->is(Token::EOF_TOKEN)) { throw JSONException("Unexpected EOF found"); } return token; } void Parser::parse(std::istream& in) { _tokenizer.attachToStream(in); const Token* token = nextToken(); if (token->is(Token::SEPARATOR_TOKEN)) { // This must be a { or a [ if (token->asChar() == '{') { readObject(); } else if (token->asChar() == '[') { readArray(); } else { throw JSONException(format("Invalid separator '%c' found. Expecting { or [", token->asChar())); } token = _tokenizer.next(); if (! token->is(Token::EOF_TOKEN)) { throw JSONException(format("EOF expected but found '%s'", token->asString())); } } else { throw JSONException(format("Invalid token '%s' found. Expecting { or [", token->asString())); } } void Parser::readObject() { if (_handler != NULL) { _handler->startObject(); } if ( readRow(true) ) // First call is special: check for empty object { while(readRow()); } if (_handler != NULL) { _handler->endObject(); } } bool Parser::readRow(bool firstCall) { const Token* token = nextToken(); if (firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}') { return false; // End of object is possible for an empty object } if (token->tokenClass() == Token::STRING_LITERAL_TOKEN) { std::string propertyName = token->tokenString(); if ( _handler != NULL ) { _handler->key(propertyName); } token = nextToken(); if (token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ':') { readValue(nextToken()); token = nextToken(); if (token->is(Token::SEPARATOR_TOKEN)) { if (token->asChar() == ',') { return true; // Read next row } else if (token->asChar() == '}') { return false; // End of object } else { throw JSONException(format("Invalid separator '%c' found. Expecting , or }", token->asChar())); } } else { throw JSONException(format("Invalid token '%s' found. Expecting , or }", token->asString())); } } else { throw JSONException(format("Invalid token '%s' found. Expecting :", token->asString())); } } else { throw JSONException(format("Invalid token '%s' found. Expecting key", token->asString())); } } void Parser::readValue(const Token* token) { switch(token->tokenClass()) { default: case Token::IDENTIFIER_TOKEN: case Token::OPERATOR_TOKEN: case Token::CHAR_LITERAL_TOKEN: break; case Token::INTEGER_LITERAL_TOKEN: if (_handler != NULL) { #if defined(POCO_HAVE_INT64) try { Int64 value = token->asInteger64(); // if number is 32-bit, then handle as such if ( value > std::numeric_limits::max() || value < std::numeric_limits::min() ) { _handler->value(value); } else { _handler->value(static_cast(value)); } } // try to handle error as unsigned in case of overflow catch ( const SyntaxException& ) { UInt64 value = token->asUnsignedInteger64(); // if number is 32-bit, then handle as such if ( value > std::numeric_limits::max() ) { _handler->value(value); } else { _handler->value(static_cast(value)); } } #else try { int value = token->asInteger(); _handle->value(value); } // try to handle error as unsigned in case of overflow catch ( const SyntaxException& ) { unsigned value = token->asUnsignedInteger(); _handle->value(value); } #endif } break; case Token::KEYWORD_TOKEN: { if (token->tokenString().compare("null") == 0) { if (_handler != NULL) { _handler->null(); } } else if (token->tokenString().compare("true") == 0) { if (_handler != NULL) { _handler->value(true); } } else if (token->tokenString().compare("false") == 0) { if (_handler != NULL) { _handler->value(false); } } else { throw JSONException(format("Invalid keyword '%s' found", token->asString())); } break; } case Token::FLOAT_LITERAL_TOKEN: // Fall through case Token::DOUBLE_LITERAL_TOKEN: if (_handler != NULL) { _handler->value(token->asFloat()); } break; case Token::STRING_LITERAL_TOKEN: if (_handler != NULL) { _handler->value(token->tokenString()); } break; case Token::SEPARATOR_TOKEN: { if (token->asChar() == '{') { readObject(); } else if (token->asChar() == '[') { readArray(); } break; } case Token::INVALID_TOKEN: throw JSONException(format("Invalid token '%s' found", token->asString())); } } void Parser::readArray() { if (_handler != NULL) { _handler->startArray(); } if (readElements(true)) // First call is special: check for empty array { while(readElements()); } if (_handler != NULL) { _handler->endArray(); } } bool Parser::readElements(bool firstCall) { const Token* token = nextToken(); if (firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']') { // End of array is possible for an empty array return false; } readValue(token); token = nextToken(); if ( token->is(Token::SEPARATOR_TOKEN) ) { if (token->asChar() == ']') return false; // End of array if (token->asChar() == ',') return true; throw JSONException(format("Invalid separator '%c' found. Expecting , or ]", token->asChar())); } throw JSONException(format("Invalid token '%s' found.", token->asString())); } } } // namespace Poco::JSON