Merge pull request #53 from fbraem/develop

JSON unicode fixes and running tests on invalid unicode JSON
This commit is contained in:
Aleksandar Fabijanic 2013-01-04 08:29:08 -08:00
commit 80cf789dcd
3 changed files with 199 additions and 50 deletions

View File

@ -38,6 +38,7 @@
#include "Poco/JSON/JSONException.h"
#include "Poco/Ascii.h"
#include "Poco/Token.h"
#include "Poco/UTF8Encoding.h"
#undef min
#undef max
#include <limits>
@ -118,8 +119,8 @@ public:
void finish(std::istream& istr)
{
int c = istr.get();
while (c != -1)
int c = 0;
while ((c = istr.get()) != -1)
{
if ( c == 0 )
{
@ -134,6 +135,31 @@ public:
if ( c == '"' )
break;
if(0x80 <= c && c <= 0xFF)
{
int count = utf8_check_first(c);
if (!count)
{
throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c));
}
char buffer[5];
buffer[0] = c;
for(int i = 1; i < count; ++i)
{
buffer[i] = istr.get();
}
if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
{
throw JSONException("No legal UTF8 found");
}
buffer[count] = '\0';
_value += buffer;
continue;
}
if ( c == '\\' ) // Escaped String
{
c = istr.get();
@ -196,8 +222,16 @@ public:
{
throw JSONException("Invalid unicode");
}
c = unicode;
break;
Poco::UTF8Encoding utf8encoding;
int length = utf8encoding.convert(unicode, NULL, 0);
std::vector<unsigned char> convert(length);
utf8encoding.convert(unicode, &convert[0], length);
for(int i = 0; i < length; ++i)
{
_value += (char) convert[i];
}
continue;
}
default:
{
@ -206,7 +240,6 @@ public:
}
}
_value += c;
c = istr.get();
}
if ( c == -1 )
@ -241,6 +274,49 @@ public:
return value;
}
private:
int utf8_check_first(char byte)
{
unsigned char u = (unsigned char) byte;
if(u < 0x80)
return 1;
if (0x80 <= u && u <= 0xBF)
{
/* second, third or fourth byte of a multi-byte
sequence, i.e. a "continuation byte" */
return 0;
}
else if(u == 0xC0 || u == 0xC1)
{
/* overlong encoding of an ASCII byte */
return 0;
}
else if(0xC2 <= u && u <= 0xDF)
{
/* 2-byte sequence */
return 2;
}
else if(0xE0 <= u && u <= 0xEF)
{
/* 3-byte sequence */
return 3;
}
else if(0xF0 <= u && u <= 0xF4)
{
/* 4-byte sequence */
return 4;
}
else
{
/* u >= 0xF5 */
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
UTF-8 */
return 0;
}
}
};
@ -648,6 +724,8 @@ void Parser::readValue(const Token* token)
}
break;
}
case Token::INVALID_TOKEN:
throw JSONException(format("Invalid token '%s' found", token->asString()));
}
}

View File

@ -33,6 +33,7 @@
#include "JSONTest.h"
#include "CppUnit/TestCaller.h"
#include "CppUnit/TestSuite.h"
#include "Poco/JSON/Object.h"
#include "Poco/JSON/Parser.h"
#include "Poco/JSON/Query.h"
@ -40,13 +41,17 @@
#include "Poco/JSON/Stringifier.h"
#include "Poco/JSON/DefaultHandler.h"
#include "Poco/JSON/Template.h"
#include "Poco/Path.h"
#include "Poco/Environment.h"
#include "Poco/File.h"
#include "Poco/FileStream.h"
#include "Poco/Glob.h"
#include <set>
#include "Poco/UTF8Encoding.h"
#include "Poco/Latin1Encoding.h"
#include "Poco/TextConverter.h"
#include <set>
using namespace Poco::JSON;
using namespace Poco::Dynamic;
@ -74,22 +79,6 @@ void JSONTest::tearDown()
}
void JSONTest::testStringifier()
{
Object obj;
Array arr;
Object obj2;
obj.set("array", arr);
obj.set("obj2", obj2);
std::ostringstream ostr;
obj.stringify(ostr);
assert (ostr.str() == "{\"array\":[],\"obj2\":{}}");
}
void JSONTest::testNullProperty()
{
std::string json = "{ \"test\" : null }";
@ -845,6 +834,50 @@ void JSONTest::testInvalidJanssonFiles()
}
void JSONTest::testInvalidUnicodeJanssonFiles()
{
Poco::Path pathPattern(getTestFilesPath("invalid-unicode"));
std::set<std::string> paths;
Poco::Glob::glob(pathPattern, paths);
for(std::set<std::string>::iterator it = paths.begin(); it != paths.end(); ++it)
{
Poco::Path filePath(*it, "input");
if ( filePath.isFile() )
{
Poco::File inputFile(filePath);
if ( inputFile.exists() )
{
Poco::FileInputStream fis(filePath.toString());
std::cout << filePath.toString() << std::endl;
Parser parser;
Var result;
try
{
DefaultHandler handler;
parser.setHandler(&handler);
parser.parse(fis);
result = handler.result();
// We shouldn't get here.
std::cout << "We didn't get an exception. This is the result: " << result.convert<std::string>() << std::endl;
fail(result.convert<std::string>());
}
catch(JSONException&)
{
continue;
}
catch(Poco::SyntaxException&)
{ }
}
}
}
}
void JSONTest::testTemplate()
{
Template tpl;
@ -858,6 +891,40 @@ void JSONTest::testTemplate()
tpl.render(data, std::cout);
}
void JSONTest::testUnicode()
{
const unsigned char supp[] = {0x61, 0xE1, 0xE9, 0x78, 0xED, 0xF3, 0xFA, 0x0};
std::string text((const char*) supp);
std::string json = "{ \"test\" : \"a\\u00E1\\u00E9x\\u00ED\\u00F3\\u00FA\" }";
Parser parser;
Var result;
try
{
DefaultHandler handler;
parser.setHandler(&handler);
parser.parse(json);
result = handler.result();
}
catch(JSONException& jsone)
{
std::cout << jsone.message() << std::endl;
assert(false);
}
assert(result.type() == typeid(Object::Ptr));
Object::Ptr object = result.extract<Object::Ptr>();
Var test = object->get("test");
Poco::Latin1Encoding latin1;
Poco::UTF8Encoding utf8;
Poco::TextConverter converter(latin1, utf8);
std::string original;
converter.convert(text, original);
assert(test.convert<std::string>() == original);
}
std::string JSONTest::getTestFilesPath(const std::string& type)
{
@ -879,8 +946,10 @@ std::string JSONTest::getTestFilesPath(const std::string& type)
if (Poco::File(pathPattern).exists())
validDir += '*';
else
{
std::cout << "Can't find " << validDir << std::endl;
throw Poco::NotFoundException("cannot locate directory containing valid JSON test files");
}
return validDir;
}
@ -889,7 +958,6 @@ CppUnit::Test* JSONTest::suite()
{
CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("JSONTest");
CppUnit_addTest(pSuite, JSONTest, testStringifier);
CppUnit_addTest(pSuite, JSONTest, testNullProperty);
CppUnit_addTest(pSuite, JSONTest, testTrueProperty);
CppUnit_addTest(pSuite, JSONTest, testFalseProperty);
@ -917,7 +985,9 @@ CppUnit::Test* JSONTest::suite()
CppUnit_addTest(pSuite, JSONTest, testQuery);
CppUnit_addTest(pSuite, JSONTest, testValidJanssonFiles);
CppUnit_addTest(pSuite, JSONTest, testInvalidJanssonFiles);
CppUnit_addTest(pSuite, JSONTest, testInvalidUnicodeJanssonFiles);
CppUnit_addTest(pSuite, JSONTest, testTemplate);
CppUnit_addTest(pSuite, JSONTest, testUnicode);
return pSuite;
}

View File

@ -46,7 +46,6 @@ public:
JSONTest(const std::string& name);
~JSONTest();
void testStringifier();
void testNullProperty();
void testTrueProperty();
void testFalseProperty();
@ -76,6 +75,8 @@ public:
void testInvalidJanssonFiles();
void testTemplate();
void testItunes();
void testUnicode();
void testInvalidUnicodeJanssonFiles();
void setUp();
void tearDown();