Merge pull request #197 from tyler92/fix-buffer-overflow

Fix buffer overflow in u8_strlen
This commit is contained in:
Tristan Penman 2024-10-22 10:03:24 +11:00 committed by GitHub
commit cc6ca369d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 72 additions and 31 deletions

View File

@ -126,6 +126,7 @@ if(valijson_BUILD_TESTS)
tests/test_validator.cpp tests/test_validator.cpp
tests/test_validator_with_custom_regular_expression_engine.cpp tests/test_validator_with_custom_regular_expression_engine.cpp
tests/test_yaml_cpp_adapter.cpp tests/test_yaml_cpp_adapter.cpp
tests/test_utf8_utils.cpp
) )
set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp) set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp)

View File

@ -14,45 +14,34 @@
namespace valijson { namespace valijson {
namespace utils { namespace utils {
static const uint32_t offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
/* is c the start of a utf8 sequence? */ /* is c the start of a utf8 sequence? */
inline bool isutf(char c) { inline bool isutf(char c)
return ((c & 0xC0) != 0x80);
}
/* reads the next utf-8 sequence out of a string, updating an index */
inline uint64_t u8_nextchar(const char *s, uint64_t *i)
{ {
uint64_t ch = 0; return ((c & 0xC0) != 0x80);
int sz = 0;
do {
ch <<= 6;
ch += static_cast<unsigned char>(s[(*i)++]);
sz++;
} while (s[*i] && !isutf(s[*i]));
ch -= offsetsFromUTF8[sz-1];
return ch;
} }
/* number of characters */ /* number of characters */
inline uint64_t u8_strlen(const char *s) inline uint64_t u8_strlen(const char *s)
{ {
constexpr auto maxLength = std::numeric_limits<uint64_t>::max();
uint64_t count = 0; uint64_t count = 0;
uint64_t i = 0;
while (s[i] != 0 && u8_nextchar(s, &i) != 0) { while (*s) {
if (i == maxLength) { unsigned char p = static_cast<unsigned char>(*s);
throwRuntimeError(
"String exceeded maximum size of " + size_t seqLen = p < 0x80 ? 1 // 0xxxxxxx: 1-byte (ASCII)
std::to_string(maxLength) + " bytes."); : p < 0xE0 ? 2 // 110xxxxx: 2-byte sequence
: p < 0xF0 ? 3 // 1110xxxx: 3-byte sequence
: p < 0xF8 ? 4 // 11110xxx: 4-byte sequence
: 1; // treat as a single character
for (size_t i = 1; i < seqLen; ++i) {
if (s[i] == 0 || isutf(s[i])) {
seqLen = i;
break;
} }
}
s += seqLen;
count++; count++;
} }

51
tests/test_utf8_utils.cpp Normal file
View File

@ -0,0 +1,51 @@
#include <gtest/gtest.h>
#include <valijson/utils/utf8_utils.hpp>
class TestUtf8Utils : public testing::Test
{
};
TEST_F(TestUtf8Utils, Utf8StringLength)
{
using valijson::utils::u8_strlen;
EXPECT_EQ(u8_strlen(""), 0);
EXPECT_EQ(u8_strlen("a"), 1);
EXPECT_EQ(u8_strlen("abc"), 3);
// U+0416
EXPECT_EQ(u8_strlen("\xD0\x96"), 1);
// U+0915
EXPECT_EQ(u8_strlen("\xE0\xA4\x95"), 1);
// U+10348
EXPECT_EQ(u8_strlen("\xF0\x90\x8D\x88"), 1);
// U+0915 + U+0416
EXPECT_EQ(u8_strlen("\xE0\xA4\x95\xD0\x96"), 2);
// incomplete U+0416 at the end
EXPECT_EQ(u8_strlen("\xD0"), 1);
// incomplete U+0416 in the middle
EXPECT_EQ(u8_strlen("\320abc"), 4);
// incomplete U+0915 at the end
EXPECT_EQ(u8_strlen("\xE0\xA4"), 1);
// incomplete U+0915 at the end
EXPECT_EQ(u8_strlen("\xE0\244abc"), 4);
// U+DFFF
EXPECT_EQ(u8_strlen("\xED\xBF\xBF"), 1);
// Overlong encoding for U+0000
EXPECT_EQ(u8_strlen("\xC0\x80"), 1);
// U+110000 (out of Unicode range)
EXPECT_EQ(u8_strlen("\xF5\x80\x80\x80"), 1);
// 0xE0 + 0xA4 repeating 9 times
EXPECT_EQ(u8_strlen("\340\244\244\244\244\244\244\244\244\244"), 5);
}