mirror of
https://github.com/tristanpenman/valijson.git
synced 2024-12-12 10:13:51 +01:00
Fix buffer overflow in u8_strlen
This commit is contained in:
parent
fc9ddf14db
commit
b7c051fbc1
@ -126,6 +126,7 @@ if(valijson_BUILD_TESTS)
|
||||
tests/test_validator.cpp
|
||||
tests/test_validator_with_custom_regular_expression_engine.cpp
|
||||
tests/test_yaml_cpp_adapter.cpp
|
||||
tests/test_utf8_utils.cpp
|
||||
)
|
||||
|
||||
set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp)
|
||||
|
@ -14,50 +14,39 @@
|
||||
namespace valijson {
|
||||
namespace utils {
|
||||
|
||||
static const uint32_t offsetsFromUTF8[6] = {
|
||||
0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL
|
||||
};
|
||||
|
||||
/* is c the start of a utf8 sequence? */
|
||||
inline bool isutf(char c) {
|
||||
return ((c & 0xC0) != 0x80);
|
||||
}
|
||||
|
||||
/* reads the next utf-8 sequence out of a string, updating an index */
|
||||
inline uint64_t u8_nextchar(const char *s, uint64_t *i)
|
||||
inline bool isutf(char c)
|
||||
{
|
||||
uint64_t ch = 0;
|
||||
int sz = 0;
|
||||
|
||||
do {
|
||||
ch <<= 6;
|
||||
ch += static_cast<unsigned char>(s[(*i)++]);
|
||||
sz++;
|
||||
} while (s[*i] && !isutf(s[*i]));
|
||||
ch -= offsetsFromUTF8[sz-1];
|
||||
|
||||
return ch;
|
||||
return ((c & 0xC0) != 0x80);
|
||||
}
|
||||
|
||||
/* number of characters */
|
||||
inline uint64_t u8_strlen(const char *s)
|
||||
{
|
||||
constexpr auto maxLength = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t count = 0;
|
||||
uint64_t i = 0;
|
||||
|
||||
while (s[i] != 0 && u8_nextchar(s, &i) != 0) {
|
||||
if (i == maxLength) {
|
||||
throwRuntimeError(
|
||||
"String exceeded maximum size of " +
|
||||
std::to_string(maxLength) + " bytes.");
|
||||
while (*s) {
|
||||
unsigned char p = static_cast<unsigned char>(*s);
|
||||
|
||||
size_t seqLen = p < 0x80 ? 1 // 0xxxxxxx: 1-byte (ASCII)
|
||||
: p < 0xE0 ? 2 // 110xxxxx: 2-byte sequence
|
||||
: p < 0xF0 ? 3 // 1110xxxx: 3-byte sequence
|
||||
: p < 0xF8 ? 4 // 11110xxx: 4-byte sequence
|
||||
: 1; // treat as a single character
|
||||
|
||||
for (size_t i = 1; i < seqLen; ++i) {
|
||||
if (s[i] == 0 || isutf(s[i])) {
|
||||
seqLen = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
s += seqLen;
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace valijson
|
||||
} // namespace utils
|
||||
} // namespace valijson
|
||||
|
51
tests/test_utf8_utils.cpp
Normal file
51
tests/test_utf8_utils.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <valijson/utils/utf8_utils.hpp>
|
||||
|
||||
class TestUtf8Utils : public testing::Test
|
||||
{
|
||||
};
|
||||
|
||||
TEST_F(TestUtf8Utils, Utf8StringLength)
|
||||
{
|
||||
using valijson::utils::u8_strlen;
|
||||
|
||||
EXPECT_EQ(u8_strlen(""), 0);
|
||||
EXPECT_EQ(u8_strlen("a"), 1);
|
||||
EXPECT_EQ(u8_strlen("abc"), 3);
|
||||
|
||||
// U+0416
|
||||
EXPECT_EQ(u8_strlen("\xD0\x96"), 1);
|
||||
|
||||
// U+0915
|
||||
EXPECT_EQ(u8_strlen("\xE0\xA4\x95"), 1);
|
||||
|
||||
// U+10348
|
||||
EXPECT_EQ(u8_strlen("\xF0\x90\x8D\x88"), 1);
|
||||
|
||||
// U+0915 + U+0416
|
||||
EXPECT_EQ(u8_strlen("\xE0\xA4\x95\xD0\x96"), 2);
|
||||
|
||||
// incomplete U+0416 at the end
|
||||
EXPECT_EQ(u8_strlen("\xD0"), 1);
|
||||
|
||||
// incomplete U+0416 in the middle
|
||||
EXPECT_EQ(u8_strlen("\320abc"), 4);
|
||||
|
||||
// incomplete U+0915 at the end
|
||||
EXPECT_EQ(u8_strlen("\xE0\xA4"), 1);
|
||||
|
||||
// incomplete U+0915 at the end
|
||||
EXPECT_EQ(u8_strlen("\xE0\244abc"), 4);
|
||||
|
||||
// U+DFFF
|
||||
EXPECT_EQ(u8_strlen("\xED\xBF\xBF"), 1);
|
||||
|
||||
// Overlong encoding for U+0000
|
||||
EXPECT_EQ(u8_strlen("\xC0\x80"), 1);
|
||||
|
||||
// U+110000 (out of Unicode range)
|
||||
EXPECT_EQ(u8_strlen("\xF5\x80\x80\x80"), 1);
|
||||
|
||||
// 0xE0 + 0xA4 repeating 9 times
|
||||
EXPECT_EQ(u8_strlen("\340\244\244\244\244\244\244\244\244\244"), 5);
|
||||
}
|
Loading…
Reference in New Issue
Block a user