updated bundled expat to 2.2.0

This commit is contained in:
Guenter Obiltschnig
2016-07-03 10:28:11 +02:00
parent 1662bc87a1
commit b42bb014e1
8 changed files with 515 additions and 332 deletions

View File

@@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler); XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE /* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above. This handler has been superseded by the EntityDeclHandler above.
It is provided here for backward compatibility. It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA) entity. This is called for a declaration of an unparsed (NDATA) entity.
@@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */ /* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_ATTR_MALLOC
XML_ATTR_ALLOC_SIZE(2)
XML_MemMalloc(XML_Parser parser, size_t size); XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_ATTR_ALLOC_SIZE(3)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void) XMLPARSEAPI(void)
@@ -1031,14 +1034,12 @@ XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void); XML_GetFeatureList(void);
/* Expat follows the GNU/Linux convention of odd number minor version for /* Expat follows the semantic versioning convention.
beta/development releases and even number minor version for stable See http://semver.org.
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/ */
#define XML_MAJOR_VERSION 2 #define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 1 #define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 1 #define XML_MICRO_VERSION 0
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -65,12 +65,26 @@
#endif #endif
#endif /* not defined XML_STATIC */ #endif /* not defined XML_STATIC */
#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
#define XMLIMPORT __attribute__ ((visibility ("default")))
#endif
/* If we didn't define it above, define it away: */ /* If we didn't define it above, define it away: */
#ifndef XMLIMPORT #ifndef XMLIMPORT
#define XMLIMPORT #define XMLIMPORT
#endif #endif
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
#define XML_ATTR_MALLOC __attribute__((__malloc__))
#else
#define XML_ATTR_MALLOC
#endif
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
#else
#define XML_ATTR_ALLOC_SIZE(x)
#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL

View File

@@ -71,3 +71,25 @@
#define inline #define inline
#endif #endif
#endif #endif
#ifndef UNUSED_P
# ifdef __GNUC__
# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
# else
# define UNUSED_P(p) UNUSED_ ## p
# endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
void
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
#ifdef __cplusplus
}
#endif

View File

@@ -6,11 +6,18 @@
#include <string.h> /* memset(), memcpy() */ #include <string.h> /* memset(), memcpy() */
#include <assert.h> #include <assert.h>
#include <limits.h> /* UINT_MAX */ #include <limits.h> /* UINT_MAX */
#include <time.h> /* time() */
#ifdef WIN32
#define getpid GetCurrentProcessId
#else
#include <sys/time.h> /* gettimeofday() */
#include <sys/types.h> /* getpid() */
#include <unistd.h> /* getpid() */
#endif
#define XML_BUILDING_EXPAT 1 #define XML_BUILDING_EXPAT 1
#ifdef COMPILED_FROM_DSP #ifdef EXPAT_WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@@ -20,7 +27,7 @@
#include "watcomconfig.h" #include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H) #elif defined(HAVE_EXPAT_CONFIG_H)
#include "expat_config.h" #include "expat_config.h"
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef EXPAT_WIN32 */
#include "ascii.h" #include "ascii.h"
#include "Poco/XML/expat.h" #include "Poco/XML/expat.h"
@@ -435,7 +442,7 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc, getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end); const char *ptr, const char *end);
static unsigned long generate_hash_secret_salt(void); static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser);
static XML_Parser static XML_Parser
@@ -694,15 +701,38 @@ static const XML_Char implicitContext[] = {
}; };
static unsigned long static unsigned long
generate_hash_secret_salt(void) gather_time_entropy(void)
{ {
#if defined(_WIN32_WCE) #ifdef WIN32
unsigned int seed = GetTickCount(); FILETIME ft;
GetSystemTimeAsFileTime(&ft); /* never fails */
return ft.dwHighDateTime ^ ft.dwLowDateTime;
#else #else
unsigned int seed = time(NULL) % UINT_MAX; struct timeval tv;
int gettimeofday_res;
gettimeofday_res = gettimeofday(&tv, NULL);
assert (gettimeofday_res == 0);
/* Microseconds time is <20 bits entropy */
return tv.tv_usec;
#endif #endif
srand(seed); }
return rand();
static unsigned long
generate_hash_secret_salt(XML_Parser parser)
{
/* Process ID is 0 bits entropy if attacker has local access
* XML_Parser address is few bits of entropy if attacker has local access */
const unsigned long entropy =
gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
/* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
if (sizeof(unsigned long) == 4) {
return entropy * 2147483647;
} else {
return entropy * (unsigned long)2305843009213693951;
}
} }
static XML_Bool /* only valid for root parser */ static XML_Bool /* only valid for root parser */
@@ -710,7 +740,7 @@ startParsing(XML_Parser parser)
{ {
/* hash functions must be initialized before setContext() is called */ /* hash functions must be initialized before setContext() is called */
if (hash_secret_salt == 0) if (hash_secret_salt == 0)
hash_secret_salt = generate_hash_secret_salt(); hash_secret_salt = generate_hash_secret_salt(parser);
if (ns) { if (ns) {
/* implicit context only set for root parser, since child /* implicit context only set for root parser, since child
parsers (i.e. external entity parsers) will inherit it parsers (i.e. external entity parsers) will inherit it
@@ -1700,14 +1730,17 @@ XML_GetBuffer(XML_Parser parser, int len)
} }
if (len > bufferLim - bufferEnd) { if (len > bufferLim - bufferEnd) {
int neededSize = len + (int)(bufferEnd - bufferPtr); #ifdef XML_CONTEXT_BYTES
int keep;
#endif /* defined XML_CONTEXT_BYTES */
/* Do not invoke signed arithmetic overflow: */
int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
if (neededSize < 0) { if (neededSize < 0) {
errorCode = XML_ERROR_NO_MEMORY; errorCode = XML_ERROR_NO_MEMORY;
return NULL; return NULL;
} }
#ifdef XML_CONTEXT_BYTES #ifdef XML_CONTEXT_BYTES
int keep = (int)(bufferPtr - buffer); keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES) if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES; keep = XML_CONTEXT_BYTES;
neededSize += keep; neededSize += keep;
@@ -1732,7 +1765,8 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0) if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE; bufferSize = INIT_BUFFER_SIZE;
do { do {
bufferSize *= 2; /* Do not invoke signed arithmetic overflow: */
bufferSize = (int) (2U * (unsigned) bufferSize);
} while (bufferSize < neededSize && bufferSize > 0); } while (bufferSize < neededSize && bufferSize > 0);
if (bufferSize <= 0) { if (bufferSize <= 0) {
errorCode = XML_ERROR_NO_MEMORY; errorCode = XML_ERROR_NO_MEMORY;
@@ -1859,7 +1893,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser) XML_GetCurrentByteIndex(XML_Parser parser)
{ {
if (eventPtr) if (eventPtr)
return parseEndByteIndex - (parseEndPtr - eventPtr); return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1; return -1;
} }
@@ -2433,11 +2467,11 @@ doContent(XML_Parser parser,
for (;;) { for (;;) {
int bufSize; int bufSize;
int convLen; int convLen;
XmlConvert(enc, const enum XML_Convert_Result convert_res = XmlConvert(enc,
&fromPtr, rawNameEnd, &fromPtr, rawNameEnd,
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
convLen = (int)(toPtr - (XML_Char *)tag->buf); convLen = (int)(toPtr - (XML_Char *)tag->buf);
if (fromPtr == rawNameEnd) { if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
tag->name.strLen = convLen; tag->name.strLen = convLen;
break; break;
} }
@@ -2658,11 +2692,11 @@ doContent(XML_Parser parser,
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
for (;;) { for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s; *eventEndPP = s;
charDataHandler(handlerArg, dataBuf, charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf)); (int)(dataPtr - (ICHAR *)dataBuf));
if (s == next) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
*eventPP = s; *eventPP = s;
} }
@@ -3268,11 +3302,11 @@ doCdataSection(XML_Parser parser,
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
for (;;) { for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = next; *eventEndPP = next;
charDataHandler(handlerArg, dataBuf, charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf)); (int)(dataPtr - (ICHAR *)dataBuf));
if (s == next) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
*eventPP = s; *eventPP = s;
} }
@@ -4931,9 +4965,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser, errorProcessor(XML_Parser parser,
const char *s, const char *UNUSED_P(s),
const char *end, const char *UNUSED_P(end),
const char **nextPtr) const char **UNUSED_P(nextPtr))
{ {
return errorCode; return errorCode;
} }
@@ -5349,6 +5383,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end) const char *s, const char *end)
{ {
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
enum XML_Convert_Result convert_res;
const char **eventPP; const char **eventPP;
const char **eventEndPP; const char **eventEndPP;
if (enc == encoding) { if (enc == encoding) {
@@ -5361,11 +5396,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
} }
do { do {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s; *eventEndPP = s;
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
*eventPP = s; *eventPP = s;
} while (s != end); } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
} }
else else
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
@@ -6170,8 +6205,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
if (!pool->ptr && !poolGrow(pool)) if (!pool->ptr && !poolGrow(pool))
return NULL; return NULL;
for (;;) { for (;;) {
XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
if (ptr == end) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
if (!poolGrow(pool)) if (!poolGrow(pool))
return NULL; return NULL;
@@ -6255,8 +6290,13 @@ poolGrow(STRING_POOL *pool)
} }
} }
if (pool->blocks && pool->start == pool->blocks->s) { if (pool->blocks && pool->start == pool->blocks->s) {
int blockSize = (int)(pool->end - pool->start)*2; BLOCK *temp;
BLOCK *temp = (BLOCK *) int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
if (blockSize < 0)
return XML_FALSE;
temp = (BLOCK *)
pool->mem->realloc_fcn(pool->blocks, pool->mem->realloc_fcn(pool->blocks,
(offsetof(BLOCK, s) (offsetof(BLOCK, s)
+ blockSize * sizeof(XML_Char))); + blockSize * sizeof(XML_Char)));
@@ -6271,6 +6311,10 @@ poolGrow(STRING_POOL *pool)
else { else {
BLOCK *tem; BLOCK *tem;
int blockSize = (int)(pool->end - pool->start); int blockSize = (int)(pool->end - pool->start);
if (blockSize < 0)
return XML_FALSE;
if (blockSize < INIT_BLOCK_SIZE) if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE; blockSize = INIT_BLOCK_SIZE;
else else

View File

@@ -4,7 +4,7 @@
#include <stddef.h> #include <stddef.h>
#ifdef COMPILED_FROM_DSP #ifdef EXPAT_WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H #ifdef HAVE_EXPAT_CONFIG_H
#include "expat_config.h" #include "expat_config.h"
#endif #endif
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef EXPAT_WIN32 */
#include "Poco/XML/expat_external.h" #include "Poco/XML/expat_external.h"
#include "internal.h" #include "internal.h"
@@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
prolog2(PROLOG_STATE *state, prolog2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype0(PROLOG_STATE *state, doctype0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype2(PROLOG_STATE *state, doctype2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype3(PROLOG_STATE *state, doctype3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype4(PROLOG_STATE *state, doctype4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype5(PROLOG_STATE *state, doctype5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity0(PROLOG_STATE *state, entity0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity1(PROLOG_STATE *state, entity1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity3(PROLOG_STATE *state, entity3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity4(PROLOG_STATE *state, entity4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity6(PROLOG_STATE *state, entity6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity8(PROLOG_STATE *state, entity8(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity9(PROLOG_STATE *state, entity9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity10(PROLOG_STATE *state, entity10(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation0(PROLOG_STATE *state, notation0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation2(PROLOG_STATE *state, notation2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation3(PROLOG_STATE *state, notation3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation4(PROLOG_STATE *state, notation4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist0(PROLOG_STATE *state, attlist0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist1(PROLOG_STATE *state, attlist1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist3(PROLOG_STATE *state, attlist3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist4(PROLOG_STATE *state, attlist4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist5(PROLOG_STATE *state, attlist5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist6(PROLOG_STATE *state, attlist6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist7(PROLOG_STATE *state, attlist7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist9(PROLOG_STATE *state, attlist9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element0(PROLOG_STATE *state, element0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element3(PROLOG_STATE *state, element3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element4(PROLOG_STATE *state, element4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element5(PROLOG_STATE *state, element5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element6(PROLOG_STATE *state, element6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element7(PROLOG_STATE *state, element7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
condSect1(PROLOG_STATE *state, condSect1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
condSect2(PROLOG_STATE *state, condSect2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
declClose(PROLOG_STATE *state, declClose(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
} }
static int PTRCALL static int PTRCALL
error(PROLOG_STATE *state, error(PROLOG_STATE *UNUSED_P(state),
int tok, int UNUSED_P(tok),
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
return XML_ROLE_NONE; return XML_ROLE_NONE;
} }

View File

@@ -4,7 +4,7 @@
#include <stddef.h> #include <stddef.h>
#ifdef COMPILED_FROM_DSP #ifdef EXPAT_WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H #ifdef HAVE_EXPAT_CONFIG_H
#include "expat_config.h" #include "expat_config.h"
#endif #endif
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef EXPAT_WIN32 */
#include "Poco/XML/expat_external.h" #include "Poco/XML/expat_external.h"
#include "internal.h" #include "internal.h"
@@ -46,7 +46,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \ #define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between /* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -56,7 +56,7 @@
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \ + ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \ + ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F))) & (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between /* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
@@ -69,7 +69,7 @@
<< 3) \ << 3) \
+ ((((byte)[1]) & 3) << 1) \ + ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \ + ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F))) & (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \ #define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \ ((n) == 2 \
@@ -122,19 +122,19 @@
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL static int PTRFASTCALL
isNever(const ENCODING *enc, const char *p) isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{ {
return 0; return 0;
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isName2(const ENCODING *enc, const char *p) utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isName3(const ENCODING *enc, const char *p) utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
} }
@@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
#define utf8_isName4 isNever #define utf8_isName4 isNever
static int PTRFASTCALL static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING *enc, const char *p) utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING *enc, const char *p) utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
} }
@@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define utf8_isNmstrt4 isNever #define utf8_isNmstrt4 isNever
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid2(const ENCODING *enc, const char *p) utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID2((const unsigned char *)p); return UTF8_INVALID2((const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid3(const ENCODING *enc, const char *p) utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID3((const unsigned char *)p); return UTF8_INVALID3((const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid4(const ENCODING *enc, const char *p) utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID4((const unsigned char *)p); return UTF8_INVALID4((const unsigned char *)p);
} }
@@ -222,6 +222,17 @@ struct normal_encoding {
E ## isInvalid3, \ E ## isInvalid3, \
E ## isInvalid4 E ## isInvalid4
#define NULL_VTABLE \
/* isName2 */ NULL, \
/* isName3 */ NULL, \
/* isName4 */ NULL, \
/* isNmstrt2 */ NULL, \
/* isNmstrt3 */ NULL, \
/* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, \
/* isInvalid3 */ NULL, \
/* isInvalid4 */ NULL
static int FASTCALL checkCharRefNumber(int); static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h" #include "xmltok_impl.h"
@@ -318,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0 UTF8_cval4 = 0xf0
}; };
static void PTRCALL void
utf8_toUtf8(const ENCODING *enc, align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
{
const char * fromLim = *fromLimRef;
size_t walked = 0;
for (; fromLim > from; fromLim--, walked++) {
const unsigned char prev = (unsigned char)fromLim[-1];
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if (walked + 1 >= 4) {
fromLim += 4 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
if (walked + 1 >= 3) {
fromLim += 3 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
if (walked + 1 >= 2) {
fromLim += 2 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
break;
}
}
*fromLimRef = fromLim;
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
char *to; char *to;
const char *from; const char *from;
if (fromLim - *fromP > toLim - *toP) { if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */ /* Avoid copying partial characters. */
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) res = XML_CONVERT_OUTPUT_EXHAUSTED;
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) fromLim = *fromP + (toLim - *toP);
break; align_limit_to_full_utf8_characters(*fromP, &fromLim);
} }
for (to = *toP, from = *fromP; from != fromLim; from++, to++) for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from; *to = *from;
*fromP = from; *fromP = from;
*toP = to; *toP = to;
if ((to == toLim) && (from < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return res;
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc, utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP; unsigned short *to = *toP;
const char *from = *fromP; const char *from = *fromP;
while (from != fromLim && to != toLim) { while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2: case BT_LEAD2:
if (fromLim - from < 2) {
res = XML_CONVERT_INPUT_INCOMPLETE;
break;
}
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2; from += 2;
break; break;
case BT_LEAD3: case BT_LEAD3:
if (fromLim - from < 3) {
res = XML_CONVERT_INPUT_INCOMPLETE;
break;
}
*to++ = (unsigned short)(((from[0] & 0xf) << 12) *to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3; from += 3;
@@ -358,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD4: case BT_LEAD4:
{ {
unsigned long n; unsigned long n;
if (to + 1 == toLim) if (toLim - to < 2) {
res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after; goto after;
}
if (fromLim - from < 4) {
res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
}
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000; n -= 0x10000;
@@ -377,6 +444,7 @@ utf8_toUtf16(const ENCODING *enc,
after: after:
*fromP = from; *fromP = from;
*toP = to; *toP = to;
return res;
} }
#ifdef XML_NS #ifdef XML_NS
@@ -425,38 +493,43 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}; };
static void PTRCALL static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING *enc, latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
for (;;) { for (;;) {
unsigned char c; unsigned char c;
if (*fromP == fromLim) if (*fromP == fromLim)
break; return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP; c = (unsigned char)**fromP;
if (c & 0x80) { if (c & 0x80) {
if (toLim - *toP < 2) if (toLim - *toP < 2)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80); *(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++; (*fromP)++;
} }
else { else {
if (*toP == toLim) if (*toP == toLim)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++; *(*toP)++ = *(*fromP)++;
} }
} }
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING *enc, latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
while (*fromP != fromLim && *toP != toLim) while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++; *(*toP)++ = (unsigned char)*(*fromP)++;
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
#ifdef XML_NS #ifdef XML_NS
@@ -467,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
#endif #endif
@@ -480,16 +553,21 @@ static const struct normal_encoding latin1_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
static void PTRCALL static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING *enc, ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
while (*fromP != fromLim && *toP != toLim) while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++; *(*toP)++ = *(*fromP)++;
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
#ifdef XML_NS #ifdef XML_NS
@@ -500,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
/* BT_NONXML == 0 */ /* BT_NONXML == 0 */
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
#endif #endif
@@ -513,7 +591,7 @@ static const struct normal_encoding ascii_encoding = {
#undef BT_COLON #undef BT_COLON
/* BT_NONXML == 0 */ /* BT_NONXML == 0 */
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
static int PTRFASTCALL static int PTRFASTCALL
@@ -536,13 +614,14 @@ unicode_byte_type(char hi, char lo)
} }
#define DEFINE_UTF16_TO_UTF8(E) \ #define DEFINE_UTF16_TO_UTF8(E) \
static void PTRCALL \ static enum XML_Convert_Result PTRCALL \
E ## toUtf8(const ENCODING *enc, \ E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \ char **toP, const char *toLim) \
{ \ { \
const char *from; \ const char *from = *fromP; \
for (from = *fromP; from != fromLim; from += 2) { \ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
for (; from < fromLim; from += 2) { \
int plane; \ int plane; \
unsigned char lo2; \ unsigned char lo2; \
unsigned char lo = GET_LO(from); \ unsigned char lo = GET_LO(from); \
@@ -552,7 +631,7 @@ E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \ if (lo < 0x80) { \
if (*toP == toLim) { \ if (*toP == toLim) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
*(*toP)++ = lo; \ *(*toP)++ = lo; \
break; \ break; \
@@ -562,7 +641,7 @@ E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \ case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \ if (toLim - *toP < 2) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \
@@ -570,7 +649,7 @@ E ## toUtf8(const ENCODING *enc, \
default: \ default: \
if (toLim - *toP < 3) { \ if (toLim - *toP < 3) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
@@ -580,7 +659,11 @@ E ## toUtf8(const ENCODING *enc, \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \ if (toLim - *toP < 4) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
if (fromLim - from < 4) { \
*fromP = from; \
return XML_CONVERT_INPUT_INCOMPLETE; \
} \ } \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
@@ -596,20 +679,32 @@ E ## toUtf8(const ENCODING *enc, \
} \ } \
} \ } \
*fromP = from; \ *fromP = from; \
if (from < fromLim) \
return XML_CONVERT_INPUT_INCOMPLETE; \
else \
return XML_CONVERT_COMPLETED; \
} }
#define DEFINE_UTF16_TO_UTF16(E) \ #define DEFINE_UTF16_TO_UTF16(E) \
static void PTRCALL \ static enum XML_Convert_Result PTRCALL \
E ## toUtf16(const ENCODING *enc, \ E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \ unsigned short **toP, const unsigned short *toLim) \
{ \ { \
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \ /* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \ if (fromLim - *fromP > ((toLim - *toP) << 1) \
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \ fromLim -= 2; \
for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ res = XML_CONVERT_INPUT_INCOMPLETE; \
} \
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
if ((*toP == toLim) && (*fromP < fromLim)) \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
else \
return res; \
} }
#define SET2(ptr, ch) \ #define SET2(ptr, ch) \
@@ -726,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@@ -745,7 +840,7 @@ static const struct normal_encoding little2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#if BYTEORDER != 4321 #if BYTEORDER != 4321
@@ -758,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#include "iasciitab.h" #include "iasciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@@ -771,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@@ -867,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@@ -886,7 +981,7 @@ static const struct normal_encoding big2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#if BYTEORDER != 1234 #if BYTEORDER != 1234
@@ -899,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
#include "iasciitab.h" #include "iasciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@@ -912,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@@ -938,7 +1033,7 @@ streqci(const char *s1, const char *s2)
} }
static void PTRCALL static void PTRCALL
initUpdatePosition(const ENCODING *enc, const char *ptr, initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos) const char *end, POSITION *pos)
{ {
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
@@ -1288,7 +1383,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc, unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
@@ -1299,21 +1394,21 @@ unknown_toUtf8(const ENCODING *enc,
const char *utf8; const char *utf8;
int n; int n;
if (*fromP == fromLim) if (*fromP == fromLim)
break; return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP]; utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++; n = *utf8++;
if (n == 0) { if (n == 0) {
int c = uenc->convert(uenc->userData, *fromP); int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf); n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP) if (n > toLim - *toP)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf; utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2)); - (BT_LEAD2 - 2));
} }
else { else {
if (n > toLim - *toP) if (n > toLim - *toP)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++; (*fromP)++;
} }
do { do {
@@ -1322,13 +1417,13 @@ unknown_toUtf8(const ENCODING *enc,
} }
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc, unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
while (*fromP != fromLim && *toP != toLim) { while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP]; unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) { if (c == 0) {
c = (unsigned short) c = (unsigned short)
@@ -1340,6 +1435,11 @@ unknown_toUtf16(const ENCODING *enc,
(*fromP)++; (*fromP)++;
*(*toP)++ = c; *(*toP)++ = c;
} }
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
ENCODING * ENCODING *
@@ -1503,7 +1603,7 @@ initScan(const ENCODING * const *encodingTable,
{ {
const ENCODING **encPtr; const ENCODING **encPtr;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
encPtr = enc->encPtr; encPtr = enc->encPtr;
if (ptr + 1 == end) { if (ptr + 1 == end) {

View File

@@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *, const char *,
const char **); const char **);
enum XML_Convert_Result {
XML_CONVERT_COMPLETED = 0,
XML_CONVERT_INPUT_INCOMPLETE = 1,
XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
};
struct encoding { struct encoding {
SCANNER scanners[XML_N_STATES]; SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES]; SCANNER literalScanners[XML_N_LITERAL_TYPES];
@@ -158,12 +164,12 @@ struct encoding {
const char *ptr, const char *ptr,
const char *end, const char *end,
const char **badPtr); const char **badPtr);
void (PTRCALL *utf8Convert)(const ENCODING *enc, enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
char **toP, char **toP,
const char *toLim); const char *toLim);
void (PTRCALL *utf16Convert)(const ENCODING *enc, enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
unsigned short **toP, unsigned short **toP,

View File

@@ -87,27 +87,45 @@
#define PREFIX(ident) ident #define PREFIX(ident) ident
#endif #endif
#define HAS_CHARS(enc, ptr, end, count) \
(end - ptr >= count * MINBPC(enc))
#define HAS_CHAR(enc, ptr, end) \
HAS_CHARS(enc, ptr, end, 1)
#define REQUIRE_CHARS(enc, ptr, end, count) \
{ \
if (! HAS_CHARS(enc, ptr, end, count)) { \
return XML_TOK_PARTIAL; \
} \
}
#define REQUIRE_CHAR(enc, ptr, end) \
REQUIRE_CHARS(enc, ptr, end, 1)
/* ptr points to character following "<!-" */ /* ptr points to character following "<!-" */
static int PTRCALL static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS: case BT_MINUS:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@@ -131,8 +149,7 @@ static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT: case BT_PERCNT:
if (ptr + MINBPC(enc) == end) REQUIRE_CHARS(enc, ptr, end, 2);
return XML_TOK_PARTIAL;
/* don't allow <!ENTITY% foo "whatever"> */ /* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
} }
static int PTRCALL static int PTRCALL
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr) const char *end, int *tokPtr)
{ {
int upper = 0; int upper = 0;
@@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
{ {
int tok; int tok;
const char *target = ptr; const char *target = ptr;
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
@@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST: case BT_QUEST:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc); *nextTokPtr = ptr + MINBPC(enc);
return tok; return tok;
@@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc); *nextTokPtr = ptr + MINBPC(enc);
return tok; return tok;
@@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
} }
static int PTRCALL static int PTRCALL
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB }; ASCII_T, ASCII_A, ASCII_LSQB };
int i; int i;
/* CDATA[ */ /* CDATA[ */
if (end - ptr < 6 * MINBPC(enc)) REQUIRE_CHARS(enc, ptr, end, 6);
return XML_TOK_PARTIAL;
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
@@ -305,7 +317,7 @@ static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break; break;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc); ptr -= MINBPC(enc);
break; break;
@@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_CDATA_SECT_CLOSE; return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR: case BT_CR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
*nextTokPtr = ptr; *nextTokPtr = ptr;
@@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@@ -383,19 +392,18 @@ static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
break; break;
@@ -432,7 +440,7 @@ static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@@ -464,7 +472,7 @@ static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x)) if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
break; break;
@@ -496,8 +504,7 @@ static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM: case BT_NUM:
@@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI: case BT_SEMI:
@@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS #ifdef XML_NS
int hadColon = 0; int hadColon = 0;
#endif #endif
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS #ifdef XML_NS
@@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
} }
hadColon = 1; hadColon = 1;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
@@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
int t; int t;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr); t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS) if (t == BT_EQUALS)
break; break;
@@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#endif #endif
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
open = BYTE_TYPE(enc, ptr); open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS) if (open == BT_QUOT || open == BT_APOS)
break; break;
@@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* in attribute value */ /* in attribute value */
for (;;) { for (;;) {
int t; int t;
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr); t = BYTE_TYPE(enc, ptr);
if (t == open) if (t == open)
break; break;
@@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
} }
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_S:
case BT_CR: case BT_CR:
@@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to closing quote */ /* ptr points to closing quote */
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
@@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL: case BT_SOL:
sol: sol:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS #ifdef XML_NS
int hadColon; int hadColon;
#endif #endif
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL: case BT_EXCL:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0; hadColon = 0;
#endif #endif
/* we have a start-tag */ /* we have a start-tag */
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS #ifdef XML_NS
@@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
} }
hadColon = 1; hadColon = 1;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
@@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_GT:
@@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL: case BT_SOL:
sol: sol:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@@ -785,7 +782,7 @@ static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_CR: case BT_CR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DATA_NEWLINE; return XML_TOK_DATA_NEWLINE;
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB; return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break; break;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB; return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc); ptr -= MINBPC(enc);
@@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE #undef LEAD_CASE
case BT_RSQB: case BT_RSQB:
if (ptr + MINBPC(enc) != end) { if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
if (ptr + 2*MINBPC(enc) != end) { if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
@@ -884,8 +881,7 @@ static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI: case BT_SEMI:
@@ -913,15 +909,14 @@ static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF: case BT_S:
@@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr); int t = BYTE_TYPE(enc, ptr);
switch (t) { switch (t) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
@@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (t != open) if (t != open)
break; break;
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_LITERAL; return -XML_TOK_LITERAL;
*nextTokPtr = ptr; *nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
int tok; int tok;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LT: case BT_LT:
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL: case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_LF: case BT_S: case BT_LF:
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
break; break;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF: case BT_S: case BT_LF:
@@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_BRACKET; return XML_TOK_OPEN_BRACKET;
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_BRACKET; return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if (ptr + MINBPC(enc) == end) REQUIRE_CHARS(enc, ptr, end, 2);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc); *nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE; return XML_TOK_COND_SECT_CLOSE;
@@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_PAREN; return XML_TOK_OPEN_PAREN;
case BT_RPAR: case BT_RPAR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_PAREN; return -XML_TOK_CLOSE_PAREN;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_AST: case BT_AST:
@@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA: case BT_GT: case BT_RPAR: case BT_COMMA:
@@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc); ptr += MINBPC(enc);
switch (tok) { switch (tok) {
case XML_TOK_NAME: case XML_TOK_NAME:
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
tok = XML_TOK_PREFIXED_NAME; tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
start = ptr; start = ptr;
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break; case BT_LEAD ## n: ptr += n; break;
@@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR: case BT_CR:
if (ptr == start) { if (ptr == start) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
start = ptr; start = ptr;
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break; case BT_LEAD ## n: ptr += n; break;
@@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR: case BT_CR:
if (ptr == start) { if (ptr == start) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
end = ptr + n; end = ptr + n;
} }
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_LT: case BT_LT:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level; ++level;
ptr += MINBPC(enc); ptr += MINBPC(enc);
@@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
} }
break; break;
case BT_RSQB: case BT_RSQB:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (level == 0) { if (level == 0) {
@@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
end -= MINBPC(enc); end -= MINBPC(enc);
for (; ptr != end; ptr += MINBPC(enc)) { for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
} }
static int PTRFASTCALL static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{ {
int result = 0; int result = 0;
/* skip &# */ /* skip &# */
@@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
} }
static int PTRCALL static int PTRCALL
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end) const char *end)
{ {
switch ((end - ptr)/MINBPC(enc)) { switch ((end - ptr)/MINBPC(enc)) {
@@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
} }
static int PTRCALL static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
{ {
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (ptr1 == end1) if (end1 - ptr1 < MINBPC(enc))
return 0; return 0;
if (!CHAR_MATCHES(enc, ptr1, *ptr2)) if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0; return 0;
@@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end, const char *end,
POSITION *pos) POSITION *pos)
{ {
while (ptr < end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
case BT_CR: case BT_CR:
pos->lineNumber++; pos->lineNumber++;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
pos->columnNumber = (XML_Size)-1; pos->columnNumber = (XML_Size)-1;
break; break;