boost/libs/nowide/test/test_sets.hpp
2021-10-05 21:37:46 +02:00

170 lines
5.6 KiB
C++

//
// Copyright (c) 2012 Artyom Beilis (Tonkikh)
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED
#define BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED
#include <boost/nowide/config.hpp>
#include <iostream>
#include <string>
struct utf8_to_wide
{
const char* utf8;
const wchar_t* wide;
};
struct wide_to_utf8
{
const wchar_t* wide;
const char* utf8;
};
#if defined(BOOST_MSVC) && BOOST_MSVC < 1700
#pragma warning(disable : 4428) // universal-character-name encountered in source
#endif
const std::wstring wreplacement_str(1, wchar_t(BOOST_NOWIDE_REPLACEMENT_CHARACTER));
// clang-format off
const utf8_to_wide roundtrip_tests[] = {
{"", L""},
// Ascii
{"a", L"a"},
// 2 Octet
{"\xc3\xb1", L"\u00F1"},
// 3 Octet
{"\xe2\x82\xa1", L"\u20A1"},
// 4 Octet
{"\xf0\x90\x8c\xbc", L"\U0001033C"},
// Last valid codepoint
{"\xf4\x8f\xbf\xbf", L"\U0010FFFF"},
// Misc
{"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt",
L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"},
{"\xd7\xa9-\xd0\xbc-\xce\xbd.txt",
L"\u05e9-\u043c-\u03bd.txt"},
{"\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d",
L"\u05e9\u05dc\u05d5\u05dd"},
};
const utf8_to_wide invalid_utf8_tests[] = {
// 2 Octet
{"\xc3\x28", L"\ufffd"},
{"\xa0\xa1", L"\ufffd\ufffd"},
// 3 Octet
{"\xe2\x28\xa1", L"\ufffd\ufffd"},
{"\xe2\x82\x28", L"\ufffd"},
// 4 Octet
{"\xf0\x28\x8c\xbc", L"\ufffd\ufffd\ufffd"},
{"\xf0\x90\x28\xbc", L"\ufffd\ufffd"},
{"\xf0\x90\x8c\x28", L"\ufffd"},
// 5 and 6 byte possible but invalid UTF
{"\xf8\xa1\xa1\xa1\xa1", L"\ufffd\ufffd\ufffd\ufffd\ufffd"},
{"\xfc\xa1\xa1\xa1\xa1\xa1", L"\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"},
// First invalid codepoint
{"\xf4\x90\x80\x80", L"\ufffd\ufffd\ufffd\ufffd"},
// Overlong ascii (0x2F),
{"\xc0\xaf", L"\ufffd\ufffd"},
{"\xe0\x80\xaf", L"\ufffd\ufffd\ufffd"},
{"\xf0\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd"},
{"\xf8\x80\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd\ufffd"},
{"\xfc\x80\x80\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"},
// Misc
{"\xFF\xFF", L"\ufffd\ufffd"},
{"\xd7\xa9\xFF", L"\u05e9\ufffd"},
{"\xd7", L"\ufffd"},
{"\xFF\xd7\xa9", L"\ufffd\u05e9"},
{"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"},
{"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"},
{"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"},
{"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"},
};
const wide_to_utf8 invalid_wide_tests[] = {
{L"\xDC01\x05e9", "\xEF\xBF\xBD\xd7\xa9"},
{L"\x05e9\xD800", "\xd7\xa9\xEF\xBF\xBD"},
{L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
{L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
};
const wide_to_utf8 invalid_utf16_tests[] = {
{L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
{L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
};
const wide_to_utf8 invalid_utf32_tests[] = {
{L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
{L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
"\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
};
// clang-format on
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable : 4127) // Constant expression detected
#endif
template<typename T, size_t N>
size_t array_size(const T (&)[N])
{
return N;
}
void run_all(std::wstring (*to_wide)(const std::string&), std::string (*to_narrow)(const std::wstring&))
{
for(size_t i = 0; i < array_size(roundtrip_tests); i++)
{
std::cout << " Roundtrip " << i << std::endl;
TEST(roundtrip_tests[i].utf8 == to_narrow(roundtrip_tests[i].wide));
TEST(to_wide(roundtrip_tests[i].utf8) == roundtrip_tests[i].wide);
}
for(size_t i = 0; i < array_size(invalid_utf8_tests); i++)
{
std::cout << " Invalid UTF8 " << i << std::endl;
const auto f3 = to_wide(invalid_utf8_tests[i].utf8);
TEST(to_wide(invalid_utf8_tests[i].utf8) == invalid_utf8_tests[i].wide);
}
for(size_t i = 0; i < array_size(invalid_wide_tests); i++)
{
std::cout << " Invalid Wide " << i << std::endl;
TEST(to_narrow(invalid_wide_tests[i].wide) == invalid_wide_tests[i].utf8);
}
size_t total = 0;
const wide_to_utf8* ptr = 0;
if(sizeof(wchar_t) == 2)
{
ptr = invalid_utf16_tests;
total = array_size(invalid_utf16_tests);
} else
{
ptr = invalid_utf32_tests;
total = array_size(invalid_utf32_tests);
}
for(size_t i = 0; i < total; i++)
{
std::cout << " Invalid UTF16/32 " << i << std::endl;
TEST(to_narrow(ptr[i].wide) == ptr[i].utf8);
}
}
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif