mirror of
https://github.com/tristanpenman/valijson.git
synced 2024-12-13 10:32:58 +01:00
Fix buffer overflow in u8_strlen
This commit is contained in:
parent
fc9ddf14db
commit
b7c051fbc1
@ -126,6 +126,7 @@ if(valijson_BUILD_TESTS)
|
|||||||
tests/test_validator.cpp
|
tests/test_validator.cpp
|
||||||
tests/test_validator_with_custom_regular_expression_engine.cpp
|
tests/test_validator_with_custom_regular_expression_engine.cpp
|
||||||
tests/test_yaml_cpp_adapter.cpp
|
tests/test_yaml_cpp_adapter.cpp
|
||||||
|
tests/test_utf8_utils.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp)
|
set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp)
|
||||||
|
@ -14,50 +14,39 @@
|
|||||||
namespace valijson {
|
namespace valijson {
|
||||||
namespace utils {
|
namespace utils {
|
||||||
|
|
||||||
static const uint32_t offsetsFromUTF8[6] = {
|
|
||||||
0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
|
||||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL
|
|
||||||
};
|
|
||||||
|
|
||||||
/* is c the start of a utf8 sequence? */
|
/* is c the start of a utf8 sequence? */
|
||||||
inline bool isutf(char c) {
|
inline bool isutf(char c)
|
||||||
return ((c & 0xC0) != 0x80);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* reads the next utf-8 sequence out of a string, updating an index */
|
|
||||||
inline uint64_t u8_nextchar(const char *s, uint64_t *i)
|
|
||||||
{
|
{
|
||||||
uint64_t ch = 0;
|
return ((c & 0xC0) != 0x80);
|
||||||
int sz = 0;
|
|
||||||
|
|
||||||
do {
|
|
||||||
ch <<= 6;
|
|
||||||
ch += static_cast<unsigned char>(s[(*i)++]);
|
|
||||||
sz++;
|
|
||||||
} while (s[*i] && !isutf(s[*i]));
|
|
||||||
ch -= offsetsFromUTF8[sz-1];
|
|
||||||
|
|
||||||
return ch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* number of characters */
|
/* number of characters */
|
||||||
inline uint64_t u8_strlen(const char *s)
|
inline uint64_t u8_strlen(const char *s)
|
||||||
{
|
{
|
||||||
constexpr auto maxLength = std::numeric_limits<uint64_t>::max();
|
|
||||||
uint64_t count = 0;
|
uint64_t count = 0;
|
||||||
uint64_t i = 0;
|
|
||||||
|
|
||||||
while (s[i] != 0 && u8_nextchar(s, &i) != 0) {
|
while (*s) {
|
||||||
if (i == maxLength) {
|
unsigned char p = static_cast<unsigned char>(*s);
|
||||||
throwRuntimeError(
|
|
||||||
"String exceeded maximum size of " +
|
size_t seqLen = p < 0x80 ? 1 // 0xxxxxxx: 1-byte (ASCII)
|
||||||
std::to_string(maxLength) + " bytes.");
|
: p < 0xE0 ? 2 // 110xxxxx: 2-byte sequence
|
||||||
|
: p < 0xF0 ? 3 // 1110xxxx: 3-byte sequence
|
||||||
|
: p < 0xF8 ? 4 // 11110xxx: 4-byte sequence
|
||||||
|
: 1; // treat as a single character
|
||||||
|
|
||||||
|
for (size_t i = 1; i < seqLen; ++i) {
|
||||||
|
if (s[i] == 0 || isutf(s[i])) {
|
||||||
|
seqLen = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s += seqLen;
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace valijson
|
} // namespace valijson
|
||||||
|
51
tests/test_utf8_utils.cpp
Normal file
51
tests/test_utf8_utils.cpp
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <valijson/utils/utf8_utils.hpp>
|
||||||
|
|
||||||
|
class TestUtf8Utils : public testing::Test
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(TestUtf8Utils, Utf8StringLength)
|
||||||
|
{
|
||||||
|
using valijson::utils::u8_strlen;
|
||||||
|
|
||||||
|
EXPECT_EQ(u8_strlen(""), 0);
|
||||||
|
EXPECT_EQ(u8_strlen("a"), 1);
|
||||||
|
EXPECT_EQ(u8_strlen("abc"), 3);
|
||||||
|
|
||||||
|
// U+0416
|
||||||
|
EXPECT_EQ(u8_strlen("\xD0\x96"), 1);
|
||||||
|
|
||||||
|
// U+0915
|
||||||
|
EXPECT_EQ(u8_strlen("\xE0\xA4\x95"), 1);
|
||||||
|
|
||||||
|
// U+10348
|
||||||
|
EXPECT_EQ(u8_strlen("\xF0\x90\x8D\x88"), 1);
|
||||||
|
|
||||||
|
// U+0915 + U+0416
|
||||||
|
EXPECT_EQ(u8_strlen("\xE0\xA4\x95\xD0\x96"), 2);
|
||||||
|
|
||||||
|
// incomplete U+0416 at the end
|
||||||
|
EXPECT_EQ(u8_strlen("\xD0"), 1);
|
||||||
|
|
||||||
|
// incomplete U+0416 in the middle
|
||||||
|
EXPECT_EQ(u8_strlen("\320abc"), 4);
|
||||||
|
|
||||||
|
// incomplete U+0915 at the end
|
||||||
|
EXPECT_EQ(u8_strlen("\xE0\xA4"), 1);
|
||||||
|
|
||||||
|
// incomplete U+0915 at the end
|
||||||
|
EXPECT_EQ(u8_strlen("\xE0\244abc"), 4);
|
||||||
|
|
||||||
|
// U+DFFF
|
||||||
|
EXPECT_EQ(u8_strlen("\xED\xBF\xBF"), 1);
|
||||||
|
|
||||||
|
// Overlong encoding for U+0000
|
||||||
|
EXPECT_EQ(u8_strlen("\xC0\x80"), 1);
|
||||||
|
|
||||||
|
// U+110000 (out of Unicode range)
|
||||||
|
EXPECT_EQ(u8_strlen("\xF5\x80\x80\x80"), 1);
|
||||||
|
|
||||||
|
// 0xE0 + 0xA4 repeating 9 times
|
||||||
|
EXPECT_EQ(u8_strlen("\340\244\244\244\244\244\244\244\244\244"), 5);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user