From ababb64e0c792ad2a314245233db0833ba12036b Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Tue, 20 Oct 2015 11:27:52 +0200 Subject: [PATCH] EbmlUnicodeString: don't read beyond end of string The conversion from an UTF-8 encoded string into a wchar_t one was reading from beyond the end of the source buffer if the length indicated by a UTF-8 character's first byte exceeds the number of bytes actually present afterwards. Fixes the issue reported as Cisco TALOS-CAN-0036. --- ChangeLog | 9 ++++++ src/EbmlUnicodeString.cpp | 65 ++++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/ChangeLog b/ChangeLog index a1c95dc..a5139c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2015-10-20 Moritz Bunkus + + * EbmlUnicodeString::UpdateFromUTF8(): Fixed an invalid memory + access. When reading from a UTF-8 string in which the length + indicated by a UTF-8 character's first byte exceeds the string's + actual number of bytes the parser would access beyond the end of + the string resulting in a heap information leak. Fixes the issue + reported as Cisco TALOS-CAN-0036. + 2015-10-17 Moritz Bunkus * Released v1.3.2. diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 3255add..7b7cd8e 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -47,6 +47,21 @@ START_LIBEBML_NAMESPACE // ===================== UTFstring class =================== +static unsigned int UTFCharLength(uint8 lead) +{ + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + // Invalid size? + return 0; +} + UTFstring::UTFstring() :_Length(0) ,_Data(NULL) @@ -143,39 +158,39 @@ void UTFstring::UpdateFromUTF8() delete [] _Data; // find the size of the final UCS-2 string size_t i; - for (_Length=0, i=0; i(UTF8string[i]); - if (lead < 0x80) - i++; - else if ((lead >> 5) == 0x6) - i += 2; - else if ((lead >> 4) == 0xe) - i += 3; - else if ((lead >> 3) == 0x1e) - i += 4; + const size_t SrcLength = UTF8string.length(); + for (_Length=0, i=0; i(UTF8string[i])); + if ((CharLength >= 1) && (CharLength <= 4)) + i += CharLength; else // Invalid size? break; } _Data = new wchar_t[_Length+1]; size_t j; - for (j=0, i=0; i(UTF8string[i]); - if (lead < 0x80) { - _Data[j] = lead; - i++; - } else if ((lead >> 5) == 0x6) { - _Data[j] = ((lead & 0x1F) << 6) + (UTF8string[i+1] & 0x3F); - i += 2; - } else if ((lead >> 4) == 0xe) { - _Data[j] = ((lead & 0x0F) << 12) + ((UTF8string[i+1] & 0x3F) << 6) + (UTF8string[i+2] & 0x3F); - i += 3; - } else if ((lead >> 3) == 0x1e) { - _Data[j] = ((lead & 0x07) << 18) + ((UTF8string[i+1] & 0x3F) << 12) + ((UTF8string[i+2] & 0x3F) << 6) + (UTF8string[i+3] & 0x3F); - i += 4; - } else + for (j=0, i=0; i(UTF8string[i]); + const unsigned int CharLength = UTFCharLength(lead); + if ((CharLength < 1) || (CharLength > 4)) // Invalid char? break; + + if ((i + CharLength) > SrcLength) + // Guard against invalid memory access beyond the end of the + // source buffer. + break; + + if (CharLength == 1) + _Data[j] = lead; + else if (CharLength == 2) + _Data[j] = ((lead & 0x1F) << 6) + (UTF8string[i+1] & 0x3F); + else if (CharLength == 3) + _Data[j] = ((lead & 0x0F) << 12) + ((UTF8string[i+1] & 0x3F) << 6) + (UTF8string[i+2] & 0x3F); + else if (CharLength == 4) + _Data[j] = ((lead & 0x07) << 18) + ((UTF8string[i+1] & 0x3F) << 12) + ((UTF8string[i+2] & 0x3F) << 6) + (UTF8string[i+3] & 0x3F); + + i += CharLength; } _Data[j] = 0; }