poco/JSON/src/pdjson.c
Aleksandar Fabijanic ff879f5905
Fix/tsan (#3617)
* fix(Foundation): tsan warnings fixes

* fix(Thread_POSIX): tsan warnings fixes; add tsan.suppress

* fix(Util): tsan fixes

* fix(netSSL_OpenSSL): tsan fixes

* fix(Data): tsan warnings fixes

* feat(ci): add tsan job

* feat(ci): add tsan job, another attempt

* feat(ci): add tsan job, 3rd attempt

* fix(Foundation): tsan warnings fixes

* fix(Thread_POSIX): tsan warnings fixes; add tsan.suppress

* fix(Util): tsan fixes

* fix(netSSL_OpenSSL): tsan fixes

* fix(Data): tsan warnings fixes

* feat(ci): add tsan job

* feat(ci): add tsan job, another attempt

* feat(ci): add tsan job, 3rd attempt

* fix(ResultMetadata): memory leak #3474

* feat(ci): disable ActiveDispatcher tests for tsan runs

* feat(ci): try to fix tsan options file detection (again)

* chore(TestLibrary: correct spelling

* fix(ci): fix tsan run; add -y to apt; disable samples build for some jobs

* fix(ci): add mysql ports

* feat(ci): add VS asan

* feat(double-conversion): Upgrade double-conversion to v3.2.0 #3624

* chore(asan): disable msvc asan build (dll not found)

* chore(double-conversion): move NumericString.h before double-conversion includes to prevent min/max collision; reinstate lost loongarch64

* chore(JSON): sync pdjson with upstream

* fix(Statement): Poco::Data::Statement becomes unusable after exception #2287
2022-06-01 23:47:26 -05:00

966 lines
25 KiB
C

#ifndef _POSIX_C_SOURCE
# define _POSIX_C_SOURCE 200112L
#elif _POSIX_C_SOURCE < 200112L
# error incompatible _POSIX_C_SOURCE level
#endif
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#ifndef PDJSON_H
# include "pdjson.h"
#endif
#define JSON_FLAG_ERROR (1u << 0)
#define JSON_FLAG_STREAMING (1u << 1)
#if defined(_MSC_VER) && (_MSC_VER < 1900)
#define json_error(json, format, ...) \
if (!(json->flags & JSON_FLAG_ERROR)) { \
json->flags |= JSON_FLAG_ERROR; \
_snprintf_s(json->errmsg, sizeof(json->errmsg), \
_TRUNCATE, \
format, \
__VA_ARGS__); \
} \
#else
#define json_error(json, format, ...) \
if (!(json->flags & JSON_FLAG_ERROR)) { \
json->flags |= JSON_FLAG_ERROR; \
snprintf(json->errmsg, sizeof(json->errmsg), \
format, \
__VA_ARGS__); \
} \
#endif /* _MSC_VER */
/* See also PDJSON_STACK_MAX below. */
#ifndef PDJSON_STACK_INC
# define PDJSON_STACK_INC 4
#endif
struct json_stack {
enum json_type type;
long count;
};
static enum json_type
push(json_stream *json, enum json_type type)
{
json->stack_top++;
#ifdef PDJSON_STACK_MAX
if (json->stack_top > PDJSON_STACK_MAX) {
json_error(json, "%s", "maximum depth of nesting reached");
return JSON_ERROR;
}
#endif
if (json->stack_top >= json->stack_size) {
struct json_stack *stack;
size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack);
stack = (struct json_stack *)json->alloc.realloc(json->stack, size);
if (stack == NULL) {
json_error(json, "%s", "out of memory");
return JSON_ERROR;
}
json->stack_size += PDJSON_STACK_INC;
json->stack = stack;
}
json->stack[json->stack_top].type = type;
json->stack[json->stack_top].count = 0;
return type;
}
static enum json_type
pop(json_stream *json, int c, enum json_type expected)
{
if (json->stack == NULL || json->stack[json->stack_top].type != expected) {
json_error(json, "unexpected byte '%c'", c);
return JSON_ERROR;
}
json->stack_top--;
return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END;
}
static int buffer_peek(struct json_source *source)
{
if (source->position < source->source.buffer.length)
return source->source.buffer.buffer[source->position];
else
return EOF;
}
static int buffer_get(struct json_source *source)
{
int c = source->peek(source);
source->position++;
return c;
}
static int stream_get(struct json_source *source)
{
source->position++;
return fgetc(source->source.stream.stream);
}
static int stream_peek(struct json_source *source)
{
int c = fgetc(source->source.stream.stream);
ungetc(c, source->source.stream.stream);
return c;
}
static void init(json_stream *json)
{
json->lineno = 1;
json->flags = JSON_FLAG_STREAMING;
json->errmsg[0] = '\0';
json->ntokens = 0;
json->next = (enum json_type)0;
json->stack = NULL;
json->stack_top = -1;
json->stack_size = 0;
json->data.string = NULL;
json->data.string_size = 0;
json->data.string_fill = 0;
json->source.position = 0;
json->alloc.malloc = malloc;
json->alloc.realloc = realloc;
json->alloc.free = free;
}
static enum json_type
is_match(json_stream *json, const char *pattern, enum json_type type)
{
int c;
for (const char *p = pattern; *p; p++) {
if (*p != (c = json->source.get(&json->source))) {
json_error(json, "expected '%c' instead of byte '%c'", *p, c);
return JSON_ERROR;
}
}
return type;
}
static int pushchar(json_stream *json, int c)
{
if (json->data.string_fill == json->data.string_size) {
size_t size = json->data.string_size * 2;
char *buffer = (char *)json->alloc.realloc(json->data.string, size);
if (buffer == NULL) {
json_error(json, "%s", "out of memory");
return -1;
} else {
json->data.string_size = size;
json->data.string = buffer;
}
}
json->data.string[json->data.string_fill++] = c;
return 0;
}
static int init_string(json_stream *json)
{
json->data.string_fill = 0;
if (json->data.string == NULL) {
json->data.string_size = 1024;
json->data.string = (char *)json->alloc.malloc(json->data.string_size);
if (json->data.string == NULL) {
json_error(json, "%s", "out of memory");
return -1;
}
}
json->data.string[0] = '\0';
return 0;
}
static int encode_utf8(json_stream *json, unsigned long c)
{
if (c < 0x80UL) {
return pushchar(json, c);
} else if (c < 0x0800UL) {
return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) &&
(pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
} else if (c < 0x010000UL) {
if (c >= 0xd800 && c <= 0xdfff) {
json_error(json, "invalid codepoint %06lx", c);
return -1;
}
return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) &&
(pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
(pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
} else if (c < 0x110000UL) {
return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) &&
(pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) &&
(pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
(pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
} else {
json_error(json, "unable to encode %06lx as UTF-8", c);
return -1;
}
}
static int hexchar(int c)
{
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'a':
case 'A': return 10;
case 'b':
case 'B': return 11;
case 'c':
case 'C': return 12;
case 'd':
case 'D': return 13;
case 'e':
case 'E': return 14;
case 'f':
case 'F': return 15;
default:
return -1;
}
}
static long
read_unicode_cp(json_stream *json)
{
long cp = 0;
int shift = 12;
for (size_t i = 0; i < 4; i++) {
int c = json->source.get(&json->source);
int hc;
if (c == EOF) {
json_error(json, "%s", "unterminated string literal in Unicode");
return -1;
} else if ((hc = hexchar(c)) == -1) {
json_error(json, "invalid escape Unicode byte '%c'", c);
return -1;
}
cp += hc * (1 << shift);
shift -= 4;
}
return cp;
}
static int read_unicode(json_stream *json)
{
long cp, h, l;
if ((cp = read_unicode_cp(json)) == -1) {
return -1;
}
if (cp >= 0xd800 && cp <= 0xdbff) {
/* This is the high portion of a surrogate pair; we need to read the
* lower portion to get the codepoint
*/
h = cp;
int c = json->source.get(&json->source);
if (c == EOF) {
json_error(json, "%s", "unterminated string literal in Unicode");
return -1;
} else if (c != '\\') {
json_error(json, "invalid continuation for surrogate pair '%c', "
"expected '\\'", c);
return -1;
}
c = json->source.get(&json->source);
if (c == EOF) {
json_error(json, "%s", "unterminated string literal in Unicode");
return -1;
} else if (c != 'u') {
json_error(json, "invalid continuation for surrogate pair '%c', "
"expected 'u'", c);
return -1;
}
if ((l = read_unicode_cp(json)) == -1) {
return -1;
}
if (l < 0xdc00 || l > 0xdfff) {
json_error(json, "surrogate pair continuation \\u%04lx out "
"of range (dc00-dfff)", l);
return -1;
}
cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
} else if (cp >= 0xdc00 && cp <= 0xdfff) {
json_error(json, "dangling surrogate \\u%04lx", cp);
return -1;
}
return encode_utf8(json, cp);
}
static int
read_escaped(json_stream *json)
{
int c = json->source.get(&json->source);
if (c == EOF) {
json_error(json, "%s", "unterminated string literal in escape");
return -1;
} else if (c == 'u') {
if (read_unicode(json) != 0)
return -1;
} else {
switch (c) {
case '\\':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case '/':
case '"':
{
const char *codes = "\\bfnrt/\"";
const char *p = strchr(codes, c);
if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0)
return -1;
}
break;
default:
json_error(json, "invalid escaped byte '%c'", c);
return -1;
}
}
return 0;
}
static int
char_needs_escaping(int c)
{
if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) {
return 1;
}
return 0;
}
static int
utf8_seq_length(char byte)
{
unsigned char u = (unsigned char) byte;
if (u < 0x80) return 1;
if (0x80 <= u && u <= 0xBF)
{
// second, third or fourth byte of a multi-byte
// sequence, i.e. a "continuation byte"
return 0;
}
else if (u == 0xC0 || u == 0xC1)
{
// overlong encoding of an ASCII byte
return 0;
}
else if (0xC2 <= u && u <= 0xDF)
{
// 2-byte sequence
return 2;
}
else if (0xE0 <= u && u <= 0xEF)
{
// 3-byte sequence
return 3;
}
else if (0xF0 <= u && u <= 0xF4)
{
// 4-byte sequence
return 4;
}
else
{
// u >= 0xF5
// Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8
return 0;
}
}
static int
is_legal_utf8(const unsigned char *bytes, int length)
{
if (0 == bytes || 0 == length) return 0;
unsigned char a;
const unsigned char* srcptr = bytes + length;
switch (length)
{
default:
return 0;
// Everything else falls through when true.
case 4:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
/* FALLTHRU */
case 3:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
/* FALLTHRU */
case 2:
a = (*--srcptr);
switch (*bytes)
{
case 0xE0:
if (a < 0xA0 || a > 0xBF) return 0;
break;
case 0xED:
if (a < 0x80 || a > 0x9F) return 0;
break;
case 0xF0:
if (a < 0x90 || a > 0xBF) return 0;
break;
case 0xF4:
if (a < 0x80 || a > 0x8F) return 0;
break;
default:
if (a < 0x80 || a > 0xBF) return 0;
break;
}
/* FALLTHRU */
case 1:
if (*bytes >= 0x80 && *bytes < 0xC2) return 0;
}
return *bytes <= 0xF4;
}
static int
read_utf8(json_stream* json, int next_char)
{
int count = utf8_seq_length(next_char);
if (!count)
{
json_error(json, "%s", "invalid UTF-8 character");
return -1;
}
char buffer[4];
buffer[0] = next_char;
int i;
for (i = 1; i < count; ++i)
{
buffer[i] = json->source.get(&json->source);;
}
if (!is_legal_utf8((unsigned char*) buffer, count))
{
json_error(json, "%s", "invalid UTF-8 text");
return -1;
}
for (i = 0; i < count; ++i)
{
if (pushchar(json, buffer[i]) != 0)
return -1;
}
return 0;
}
static enum json_type
read_string(json_stream *json)
{
if (init_string(json) != 0)
return JSON_ERROR;
while (1) {
int c = json->source.get(&json->source);
if (c == EOF) {
json_error(json, "%s", "unterminated string literal");
return JSON_ERROR;
} else if (c == '"') {
if (pushchar(json, '\0') == 0)
return JSON_STRING;
else
return JSON_ERROR;
} else if (c == '\\') {
if (read_escaped(json) != 0)
return JSON_ERROR;
} else if ((unsigned) c >= 0x80) {
if (read_utf8(json, c) != 0)
return JSON_ERROR;
} else {
if (char_needs_escaping(c)) {
json_error(json, "%s", "unescaped control character in string");
return JSON_ERROR;
}
if (pushchar(json, c) != 0)
return JSON_ERROR;
}
}
return JSON_ERROR;
}
static int
is_digit(int c)
{
return c >= 48 /*0*/ && c <= 57 /*9*/;
}
static int
read_digits(json_stream *json)
{
int c;
unsigned nread = 0;
while (is_digit(c = json->source.peek(&json->source))) {
if (pushchar(json, json->source.get(&json->source)) != 0)
return -1;
nread++;
}
if (nread == 0) {
json_error(json, "expected digit instead of byte '%c'", c);
return -1;
}
return 0;
}
static enum json_type
read_number(json_stream *json, int c)
{
if (pushchar(json, c) != 0)
return JSON_ERROR;
if (c == '-') {
c = json->source.get(&json->source);
if (is_digit(c)) {
return read_number(json, c);
} else {
json_error(json, "unexpected byte '%c' in number", c);
return JSON_ERROR;
}
} else if (c >= '1' && c <= '9') {
c = json->source.peek(&json->source);
if (is_digit(c)) {
if (read_digits(json) != 0)
return JSON_ERROR;
}
}
/* Up to decimal or exponent has been read. */
c = json->source.peek(&json->source);
if (strchr(".eE", c) == NULL) {
if (pushchar(json, '\0') != 0)
return JSON_ERROR;
else
return JSON_NUMBER;
}
if (c == '.') {
json->source.get(&json->source); // consume .
if (pushchar(json, c) != 0)
return JSON_ERROR;
if (read_digits(json) != 0)
return JSON_ERROR;
}
/* Check for exponent. */
c = json->source.peek(&json->source);
if (c == 'e' || c == 'E') {
json->source.get(&json->source); // consume e/E
if (pushchar(json, c) != 0)
return JSON_ERROR;
c = json->source.peek(&json->source);
if (c == '+' || c == '-') {
json->source.get(&json->source); // consume
if (pushchar(json, c) != 0)
return JSON_ERROR;
if (read_digits(json) != 0)
return JSON_ERROR;
} else if (is_digit(c)) {
if (read_digits(json) != 0)
return JSON_ERROR;
} else {
json_error(json, "unexpected byte '%c' in number", c);
return JSON_ERROR;
}
}
if (pushchar(json, '\0') != 0)
return JSON_ERROR;
else
return JSON_NUMBER;
}
bool
json_isspace(int c)
{
switch (c) {
case 0x09:
case 0x0a:
case 0x0d:
case 0x20:
return true;
}
return false;
}
/* Returns the next non-whitespace character in the stream. */
static int next(json_stream *json)
{
int c;
while (json_isspace(c = json->source.get(&json->source)))
if (c == '\n')
json->lineno++;
return c;
}
static enum json_type
read_value(json_stream *json, int c)
{
json->ntokens++;
switch (c) {
case EOF:
json_error(json, "%s", "unexpected end of text");
return JSON_ERROR;
case '{':
return push(json, JSON_OBJECT);
case '[':
return push(json, JSON_ARRAY);
case '"':
return read_string(json);
case 'n':
return is_match(json, "ull", JSON_NULL);
case 'f':
return is_match(json, "alse", JSON_FALSE);
case 't':
return is_match(json, "rue", JSON_TRUE);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
if (init_string(json) != 0)
return JSON_ERROR;
return read_number(json, c);
default:
json_error(json, "unexpected byte '%c' in value", c);
return JSON_ERROR;
}
}
enum json_type json_peek(json_stream *json)
{
enum json_type next;
if (json->next)
next = json->next;
else
next = json->next = json_next(json);
return next;
}
enum json_type json_next(json_stream *json)
{
if (json->flags & JSON_FLAG_ERROR)
return JSON_ERROR;
if (json->next != 0) {
enum json_type next = json->next;
json->next = (enum json_type)0;
return next;
}
if (json->ntokens > 0 && json->stack_top == (size_t)-1) {
/* In the streaming mode leave any trailing whitespaces in the stream.
* This allows the user to validate any desired separation between
* values (such as newlines) using json_source_get/peek() with any
* remaining whitespaces ignored as leading when we parse the next
* value. */
if (!(json->flags & JSON_FLAG_STREAMING)) {
int c;
do {
c = json->source.peek(&json->source);
if (json_isspace(c)) {
c = json->source.get(&json->source);
}
} while (json_isspace(c));
if (c != EOF) {
json_error(json, "expected end of text instead of byte '%c'", c);
return JSON_ERROR;
}
}
return JSON_DONE;
}
int c = next(json);
if (json->stack_top == (size_t)-1) {
if (c == EOF && (json->flags & JSON_FLAG_STREAMING))
return JSON_DONE;
return read_value(json, c);
}
if (json->stack[json->stack_top].type == JSON_ARRAY) {
if (json->stack[json->stack_top].count == 0) {
if (c == ']') {
return pop(json, c, JSON_ARRAY);
}
json->stack[json->stack_top].count++;
return read_value(json, c);
} else if (c == ',') {
json->stack[json->stack_top].count++;
return read_value(json, next(json));
} else if (c == ']') {
return pop(json, c, JSON_ARRAY);
} else {
json_error(json, "unexpected byte '%c'", c);
return JSON_ERROR;
}
} else if (json->stack[json->stack_top].type == JSON_OBJECT) {
if (json->stack[json->stack_top].count == 0) {
if (c == '}') {
return pop(json, c, JSON_OBJECT);
}
/* No member name/value pairs yet. */
enum json_type value = read_value(json, c);
if (value != JSON_STRING) {
if (value != JSON_ERROR)
json_error(json, "%s", "expected member name or '}'");
return JSON_ERROR;
} else {
json->stack[json->stack_top].count++;
return value;
}
} else if ((json->stack[json->stack_top].count % 2) == 0) {
/* Expecting comma followed by member name. */
if (c != ',' && c != '}') {
json_error(json, "%s", "expected ',' or '}' after member value");
return JSON_ERROR;
} else if (c == '}') {
return pop(json, c, JSON_OBJECT);
} else {
enum json_type value = read_value(json, next(json));
if (value != JSON_STRING) {
if (value != JSON_ERROR)
json_error(json, "%s", "expected member name");
return JSON_ERROR;
} else {
json->stack[json->stack_top].count++;
return value;
}
}
} else if ((json->stack[json->stack_top].count % 2) == 1) {
/* Expecting colon followed by value. */
if (c != ':') {
json_error(json, "%s", "expected ':' after member name");
return JSON_ERROR;
} else {
json->stack[json->stack_top].count++;
return read_value(json, next(json));
}
}
}
json_error(json, "%s", "invalid parser state");
return JSON_ERROR;
}
void json_reset(json_stream *json)
{
json->stack_top = -1;
json->ntokens = 0;
json->flags &= ~JSON_FLAG_ERROR;
json->errmsg[0] = '\0';
}
enum json_type json_skip(json_stream *json)
{
enum json_type type = json_next(json);
size_t cnt_arr = 0;
size_t cnt_obj = 0;
for (enum json_type skip = type; ; skip = json_next(json)) {
if (skip == JSON_ERROR || skip == JSON_DONE)
return skip;
if (skip == JSON_ARRAY) {
++cnt_arr;
} else if (skip == JSON_ARRAY_END && cnt_arr > 0) {
--cnt_arr;
} else if (skip == JSON_OBJECT) {
++cnt_obj;
} else if (skip == JSON_OBJECT_END && cnt_obj > 0) {
--cnt_obj;
}
if (!cnt_arr && !cnt_obj)
break;
}
return type;
}
enum json_type json_skip_until(json_stream *json, enum json_type type)
{
while (1) {
enum json_type skip = json_skip(json);
if (skip == JSON_ERROR || skip == JSON_DONE)
return skip;
if (skip == type)
break;
}
return type;
}
const char *json_get_string(json_stream *json, size_t *length)
{
if (length != NULL)
*length = json->data.string_fill;
if (json->data.string == NULL)
return "";
else
return json->data.string;
}
double json_get_number(json_stream *json)
{
char *p = json->data.string;
return p == NULL ? 0 : strtod(p, NULL);
}
const char *json_get_error(json_stream *json)
{
return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL;
}
size_t json_get_lineno(json_stream *json)
{
return json->lineno;
}
size_t json_get_position(json_stream *json)
{
return json->source.position;
}
size_t json_get_depth(json_stream *json)
{
return json->stack_top + 1;
}
/* Return the current parsing context, that is, JSON_OBJECT if we are inside
an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are
not yet/anymore in either.
Additionally, for the first two cases, also return the number of parsing
events that have already been observed at this level with json_next/peek().
In particular, inside an object, an odd number would indicate that the just
observed JSON_STRING event is a member name.
*/
enum json_type json_get_context(json_stream *json, size_t *count)
{
if (json->stack_top == (size_t)-1)
return JSON_DONE;
if (count != NULL)
*count = json->stack[json->stack_top].count;
return json->stack[json->stack_top].type;
}
int json_source_get(json_stream *json)
{
int c = json->source.get(&json->source);
if (c == '\n')
json->lineno++;
return c;
}
int json_source_peek(json_stream *json)
{
return json->source.peek(&json->source);
}
void json_open_buffer(json_stream *json, const void *buffer, size_t size)
{
init(json);
json->source.get = buffer_get;
json->source.peek = buffer_peek;
json->source.source.buffer.buffer = (const char *)buffer;
json->source.source.buffer.length = size;
}
void json_open_string(json_stream *json, const char *string)
{
json_open_buffer(json, string, strlen(string));
}
void json_open_stream(json_stream *json, FILE * stream)
{
init(json);
json->source.get = stream_get;
json->source.peek = stream_peek;
json->source.source.stream.stream = stream;
}
static int user_get(struct json_source *json)
{
return json->source.user.get(json->source.user.ptr);
}
static int user_peek(struct json_source *json)
{
return json->source.user.peek(json->source.user.ptr);
}
void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user)
{
init(json);
json->source.get = user_get;
json->source.peek = user_peek;
json->source.source.user.ptr = user;
json->source.source.user.get = get;
json->source.source.user.peek = peek;
}
void json_set_allocator(json_stream *json, json_allocator *a)
{
json->alloc = *a;
}
void json_set_streaming(json_stream *json, bool streaming)
{
if (streaming)
json->flags |= JSON_FLAG_STREAMING;
else
json->flags &= ~JSON_FLAG_STREAMING;
}
void json_close(json_stream *json)
{
json->alloc.free(json->stack);
json->alloc.free(json->data.string);
}