Merge pull request #743 from tjanc/tjanc/fix-utf8-codepoint

Incorrect byte shift when interpreting 32-bit utf-8 codepoints
This commit is contained in:
Christopher Dunn 2018-02-14 10:33:35 -06:00 committed by GitHub
commit 313a0e4c34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -209,7 +209,7 @@ static unsigned int utf8ToCodepoint(const char*& s, const char* e) {
if (e - s < 4)
return REPLACEMENT_CHARACTER;
unsigned int calculated = ((firstByte & 0x07) << 24)
unsigned int calculated = ((firstByte & 0x07) << 18)
| ((static_cast<unsigned int>(s[1]) & 0x3F) << 12)
| ((static_cast<unsigned int>(s[2]) & 0x3F) << 6)
| (static_cast<unsigned int>(s[3]) & 0x3F);