From 737ea798a8bf873bc04c715ffb6701fec0edb7f8 Mon Sep 17 00:00:00 2001 From: albertodemichelis Date: Sat, 5 Mar 2016 19:29:14 +0800 Subject: [PATCH] added \u and \U escape sequence for UTF8,UTF16 or UCS4 characters the escape character \x is based on sizeof(SQChar) --- HISTORY | 2 + sqstdlib/sqstdrex.cpp | 6 +-- squirrel/sqlexer.cpp | 98 ++++++++++++++++++++++++++++++++++++------- squirrel/sqlexer.h | 8 ++++ 4 files changed, 97 insertions(+), 17 deletions(-) diff --git a/HISTORY b/HISTORY index d1219b4..38b1e12 100644 --- a/HISTORY +++ b/HISTORY @@ -5,6 +5,8 @@ -added sq_getreleasehook -added thread.wakeupthrow() -added sq_pushthread +-added \u and \U escape sequence for UTF8,UTF16 or UCS4 characters +-the escape character \x is based on sizeof(SQChar) -fixed optimizer bug in compound arith oprators(+=,-= etc...) -fixed sq_getrefvmcount() (thx Gerrit) -fixed sq_getrefcount() when no references were added with sq_addref() (thx Gerrit) diff --git a/sqstdlib/sqstdrex.cpp b/sqstdlib/sqstdrex.cpp index 34c0cb1..22c1317 100644 --- a/sqstdlib/sqstdrex.cpp +++ b/sqstdlib/sqstdrex.cpp @@ -523,10 +523,10 @@ static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar return NULL; case OP_MB: { - int cb = node->left; //char that opens a balanced expression + SQInteger cb = node->left; //char that opens a balanced expression if(*str != cb) return NULL; // string doesnt start with open char - int ce = node->right; //char that closes a balanced expression - int cont = 1; + SQInteger ce = node->right; //char that closes a balanced expression + SQInteger cont = 1; const SQChar *streol = exp->_eol; while (++str < streol) { if (*str == ce) { diff --git a/squirrel/sqlexer.cpp b/squirrel/sqlexer.cpp index 7fdf496..29b0039 100644 --- a/squirrel/sqlexer.cpp +++ b/squirrel/sqlexer.cpp @@ -287,6 +287,65 @@ SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len) return TK_IDENTIFIER; } +#ifdef SQUNICODE +#if WCHAR_SIZE == 2 +SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch) +{ + if (ch >= 0x10000) + { + SQUnsignedInteger code = (ch - 0x10000); + APPEND_CHAR((SQChar)(0xD800 | (code >> 10))); + APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF))); + return 2; + } + else { + APPEND_CHAR((SQChar)ch); + return 1; + } +} +#endif +#else +SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch) +{ + if (ch < 0x80) { + APPEND_CHAR((char)ch); + return 1; + } + if (ch < 0x800) { + APPEND_CHAR((SQChar)((ch >> 6) | 0xC0)); + APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80)); + return 2; + } + if (ch < 0x10000) { + APPEND_CHAR((SQChar)((ch >> 12) | 0xE0)); + APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80)); + APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80)); + return 3; + } + if (ch < 0x110000) { + APPEND_CHAR((SQChar)((ch >> 18) | 0xF0)); + APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80)); + APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80)); + APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80)); + return 4; + } + return 0; +} +#endif + +SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits) +{ + NEXT(); + if (!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); + SQInteger n = 0; + while (isxdigit(CUR_CHAR) && n < maxdigits) { + dest[n] = CUR_CHAR; + n++; + NEXT(); + } + dest[n] = 0; + return n; +} SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim) { @@ -295,7 +354,8 @@ SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim) if(IS_EOB()) return -1; for(;;) { while(CUR_CHAR != ndelim) { - switch(CUR_CHAR) { + SQInteger x = CUR_CHAR; + switch (x) { case SQUIRREL_EOB: Error(_SC("unfinished string")); return -1; @@ -311,19 +371,29 @@ SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim) else { NEXT(); switch(CUR_CHAR) { - case _SC('x'): NEXT(); { - if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); - const SQInteger maxdigits = 4; - SQChar temp[maxdigits+1]; - SQInteger n = 0; - while(isxdigit(CUR_CHAR) && n < maxdigits) { - temp[n] = CUR_CHAR; - n++; - NEXT(); - } - temp[n] = 0; - SQChar *sTemp; - APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16)); + case _SC('x'): { + const SQInteger maxdigits = sizeof(SQChar) * 2; + SQChar temp[maxdigits + 1]; + ProcessStringHexEscape(temp, maxdigits); + SQChar *stemp; + APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16)); + } + break; + case _SC('U'): + case _SC('u'): { + const SQInteger maxdigits = x == 'u' ? 4 : 8; + SQChar temp[8 + 1]; + ProcessStringHexEscape(temp, maxdigits); + SQChar *stemp; +#ifdef SQUNICODE +#if WCHAR_SIZE == 2 + AddUTF16(scstrtoul(temp, &stemp, 16)); +#else + ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16)); +#endif +#else + AddUTF8(scstrtoul(temp, &stemp, 16)); +#endif } break; case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break; diff --git a/squirrel/sqlexer.h b/squirrel/sqlexer.h index 937ae25..4f02631 100644 --- a/squirrel/sqlexer.h +++ b/squirrel/sqlexer.h @@ -24,6 +24,14 @@ private: void LexLineComment(); SQInteger ReadID(); void Next(); +#ifdef SQUNICODE +#if WCHAR_SIZE == 2 + SQInteger AddUTF16(SQUnsignedInteger ch); +#endif +#else + SQInteger AddUTF8(SQUnsignedInteger ch); +#endif + SQInteger ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits); SQInteger _curtoken; SQTable *_keywords; SQBool _reached_eof;