mirror of
https://github.com/pocoproject/poco.git
synced 2025-11-17 01:15:59 +01:00
UnicodeConverter: added support for native wchar_t via UTF16 or UTF32
This commit is contained in:
@@ -48,13 +48,31 @@ namespace Poco {
|
||||
|
||||
class Foundation_API UnicodeConverter
|
||||
/// A convenience class that converts strings from
|
||||
/// UTF-8 encoded std::strings to UTF-16 encoded std::wstrings
|
||||
/// UTF-8 encoded std::strings to UTF-16 or UTF-32 encoded std::wstrings
|
||||
/// and vice-versa.
|
||||
///
|
||||
/// This class is mainly used for working with the Unicode Windows APIs
|
||||
/// and probably won't be of much use anywhere else.
|
||||
/// and probably won't be of much use anywhere else ???
|
||||
{
|
||||
public:
|
||||
static void toWideUTF(const std::string& utf8String, std::wstring& utf32String);
|
||||
/// Converts the given UTF-8 encoded string into a native encoded wstring.
|
||||
|
||||
static void toWideUTF(const char* utf8String, int length, std::wstring& utf32String);
|
||||
/// Converts the given UTF-8 encoded character sequence into a native encoded string.
|
||||
|
||||
static void toWideUTF(const char* utf8String, std::wstring& utf32String);
|
||||
/// Converts the given zero-terminated UTF-8 encoded character sequence into a native encoded wstring.
|
||||
|
||||
static void toUTF32(const std::string& utf8String, std::wstring& utf32String);
|
||||
/// Converts the given UTF-8 encoded string into an UTF-32 encoded wstring.
|
||||
|
||||
static void toUTF32(const char* utf8String, int length, std::wstring& utf32String);
|
||||
/// Converts the given UTF-8 encoded character sequence into an UTF-32 encoded string.
|
||||
|
||||
static void toUTF32(const char* utf8String, std::wstring& utf32String);
|
||||
/// Converts the given zero-terminated UTF-8 encoded character sequence into an UTF-32 encoded wstring.
|
||||
|
||||
static void toUTF16(const std::string& utf8String, std::wstring& utf16String);
|
||||
/// Converts the given UTF-8 encoded string into an UTF-16 encoded wstring.
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "Poco/TextIterator.h"
|
||||
#include "Poco/UTF8Encoding.h"
|
||||
#include "Poco/UTF16Encoding.h"
|
||||
#include "Poco/UTF32Encoding.h"
|
||||
#include <cstring>
|
||||
#include <wchar.h>
|
||||
|
||||
@@ -49,6 +50,112 @@
|
||||
namespace Poco {
|
||||
|
||||
|
||||
void UnicodeConverter::toWideUTF(const std::string& utf8String, std::wstring& utfWideString)
|
||||
{
|
||||
if (sizeof(wchar_t) == 2)
|
||||
{
|
||||
toUTF16(utf8String, utfWideString);
|
||||
}
|
||||
else
|
||||
{
|
||||
toUTF32(utf8String, utfWideString);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toWideUTF(const char* utf8String, int length, std::wstring& utfWideString)
|
||||
{
|
||||
if (sizeof(wchar_t) == 2)
|
||||
{
|
||||
toUTF16(utf8String, utfWideString);
|
||||
}
|
||||
else
|
||||
{
|
||||
toUTF32(utf8String, utfWideString);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toWideUTF(const char* utf8String, std::wstring& utfWideString)
|
||||
{
|
||||
if (sizeof(wchar_t) == 2)
|
||||
{
|
||||
toUTF16(utf8String, utfWideString);
|
||||
}
|
||||
else
|
||||
{
|
||||
toUTF32(utf8String, utfWideString);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toUTF32(const std::string& utf8String, std::wstring& utf32String)
|
||||
{
|
||||
utf32String.clear();
|
||||
UTF8Encoding utf8Encoding;
|
||||
TextIterator it(utf8String, utf8Encoding);
|
||||
TextIterator end(utf8String);
|
||||
|
||||
while (it != end)
|
||||
{
|
||||
int cc = *it++;
|
||||
utf32String += (wchar_t) cc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toUTF32(const char* utf8String, int length, std::wstring& utf32String)
|
||||
{
|
||||
poco_check_ptr (utf8String);
|
||||
|
||||
utf32String.clear();
|
||||
|
||||
UTF8Encoding utf8Encoding;
|
||||
UTF32Encoding utf32Encoding;
|
||||
const unsigned char* it = (const unsigned char*) utf8String;
|
||||
const unsigned char* end = (const unsigned char*) utf8String + length;
|
||||
|
||||
while (it < end)
|
||||
{
|
||||
int n = utf8Encoding.queryConvert(it, 1);
|
||||
int uc;
|
||||
int read = 1;
|
||||
|
||||
while (-1 > n && (end - it) >= -n)
|
||||
{
|
||||
read = -n;
|
||||
n = utf8Encoding.queryConvert(it, read);
|
||||
}
|
||||
|
||||
if (-1 > n)
|
||||
{
|
||||
it = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
it += read;
|
||||
}
|
||||
|
||||
if (-1 >= n)
|
||||
{
|
||||
uc = 0xfffd; // Replacement Character (instead of '?')
|
||||
}
|
||||
else
|
||||
{
|
||||
uc = n;
|
||||
}
|
||||
|
||||
utf32String += (wchar_t) uc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toUTF32(const char* utf8String, std::wstring& utf32String)
|
||||
{
|
||||
toUTF32(utf8String, (int) std::strlen(utf8String), utf32String);
|
||||
}
|
||||
|
||||
|
||||
void UnicodeConverter::toUTF16(const std::string& utf8String, std::wstring& utf16String)
|
||||
{
|
||||
utf16String.clear();
|
||||
@@ -137,9 +244,18 @@ void UnicodeConverter::toUTF8(const std::wstring& utf16String, std::string& utf8
|
||||
{
|
||||
utf8String.clear();
|
||||
UTF8Encoding utf8Encoding;
|
||||
UTF16Encoding utf16Encoding;
|
||||
TextConverter converter(utf16Encoding, utf8Encoding);
|
||||
converter.convert(utf16String.data(), (int) utf16String.length()*sizeof(wchar_t), utf8String);
|
||||
if (sizeof(wchar_t) == 2)
|
||||
{
|
||||
UTF16Encoding utf16Encoding;
|
||||
TextConverter converter(utf16Encoding, utf8Encoding);
|
||||
converter.convert(utf16String.data(), (int) utf16String.length() * sizeof(wchar_t), utf8String);
|
||||
}
|
||||
else
|
||||
{
|
||||
UTF32Encoding utf32Encoding;
|
||||
TextConverter converter(utf32Encoding, utf8Encoding);
|
||||
converter.convert(utf16String.data(), (int) utf16String.length() * sizeof(wchar_t), utf8String);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -147,9 +263,18 @@ void UnicodeConverter::toUTF8(const wchar_t* utf16String, int length, std::strin
|
||||
{
|
||||
utf8String.clear();
|
||||
UTF8Encoding utf8Encoding;
|
||||
UTF16Encoding utf16Encoding;
|
||||
TextConverter converter(utf16Encoding, utf8Encoding);
|
||||
converter.convert(utf16String, (int) length*sizeof(wchar_t), utf8String);
|
||||
if (sizeof(wchar_t) == 2)
|
||||
{
|
||||
UTF16Encoding utf16Encoding;
|
||||
TextConverter converter(utf16Encoding, utf8Encoding);
|
||||
converter.convert(utf16String, (int) length * sizeof(wchar_t), utf8String);
|
||||
}
|
||||
else
|
||||
{
|
||||
UTF32Encoding utf32Encoding;
|
||||
TextConverter converter(utf32Encoding, utf8Encoding);
|
||||
converter.convert(utf16String, (int) length * sizeof(wchar_t), utf8String);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ objects = ActiveMethodTest ActivityTest ActiveDispatcherTest \
|
||||
LRUCacheTest ExpireCacheTest ExpireLRUCacheTest CacheTestSuite AnyTest FormatTest \
|
||||
HashingTestSuite HashTableTest SimpleHashTableTest LinearHashTableTest \
|
||||
HashSetTest HashMapTest SharedMemoryTest \
|
||||
UniqueExpireCacheTest UniqueExpireLRUCacheTest \
|
||||
UniqueExpireCacheTest UniqueExpireLRUCacheTest UnicodeConverterTest \
|
||||
TuplesTest NamedTuplesTest TypeListTest VarTest DynamicTestSuite FileStreamTest \
|
||||
MemoryStreamTest ObjectPoolTest
|
||||
|
||||
|
||||
@@ -37,9 +37,7 @@
|
||||
#include "StreamConverterTest.h"
|
||||
#include "TextEncodingTest.h"
|
||||
#include "UTF8StringTest.h"
|
||||
#ifdef _WINDOWS
|
||||
#include "UnicodeConverterTest.h"
|
||||
#endif
|
||||
|
||||
|
||||
CppUnit::Test* TextTestSuite::suite()
|
||||
@@ -52,9 +50,7 @@ CppUnit::Test* TextTestSuite::suite()
|
||||
pSuite->addTest(StreamConverterTest::suite());
|
||||
pSuite->addTest(TextEncodingTest::suite());
|
||||
pSuite->addTest(UTF8StringTest::suite());
|
||||
#ifdef _WINDOWS
|
||||
pSuite->addTest(UnicodeConverterTest::suite());
|
||||
#endif
|
||||
|
||||
return pSuite;
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "CppUnit/TestSuite.h"
|
||||
#include "Poco/UnicodeConverter.h"
|
||||
|
||||
|
||||
using Poco::UnicodeConverter;
|
||||
|
||||
|
||||
@@ -48,7 +47,10 @@ UnicodeConverterTest::~UnicodeConverterTest()
|
||||
{
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testString()
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
void UnicodeConverterTest::testString16()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
@@ -64,7 +66,7 @@ void UnicodeConverterTest::testString()
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtrLength()
|
||||
void UnicodeConverterTest::testCharPtrLength16()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
@@ -78,7 +80,7 @@ void UnicodeConverterTest::testCharPtrLength()
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtr()
|
||||
void UnicodeConverterTest::testCharPtr16()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
@@ -92,6 +94,95 @@ void UnicodeConverterTest::testCharPtr()
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void UnicodeConverterTest::testString32()
|
||||
{
|
||||
const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ";
|
||||
std::string text((const char*) utf8Chars);
|
||||
|
||||
std::wstring wtext;
|
||||
|
||||
UnicodeConverter::toUTF32 (text, wtext);
|
||||
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toUTF8 (wtext, text2);
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtrLength32()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
|
||||
std::wstring wtext;
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toUTF32 ((const char*)supp, 14, wtext);
|
||||
UnicodeConverter::toUTF8 (wtext.c_str (), (int) wtext.size (), text2);
|
||||
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtr32()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
|
||||
std::wstring wtext;
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toUTF32 ((const char*)supp, wtext);
|
||||
UnicodeConverter::toUTF8 (wtext.c_str (), text2);
|
||||
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void UnicodeConverterTest::testString()
|
||||
{
|
||||
const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ";
|
||||
std::string text((const char*) utf8Chars);
|
||||
|
||||
std::wstring wtext;
|
||||
|
||||
UnicodeConverter::toWideUTF (text, wtext);
|
||||
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toUTF8 (wtext, text2);
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtrLength()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
|
||||
std::wstring wtext;
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toWideUTF ((const char*)supp, 14, wtext);
|
||||
UnicodeConverter::toUTF8 (wtext.c_str (), (int) wtext.size (), text2);
|
||||
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::testCharPtr()
|
||||
{
|
||||
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
|
||||
std::string text((const char*) supp);
|
||||
|
||||
std::wstring wtext;
|
||||
std::string text2;
|
||||
|
||||
UnicodeConverter::toWideUTF ((const char*)supp, wtext);
|
||||
UnicodeConverter::toUTF8 (wtext.c_str (), text2);
|
||||
|
||||
assert (text == text2);
|
||||
}
|
||||
|
||||
void UnicodeConverterTest::setUp()
|
||||
{
|
||||
@@ -107,6 +198,15 @@ CppUnit::Test* UnicodeConverterTest::suite()
|
||||
{
|
||||
CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("UnicodeConverterTest");
|
||||
|
||||
#ifdef _WINDOWS
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testString16);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength16);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr16);
|
||||
#else
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testString32);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength32);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr32);
|
||||
#endif
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testString);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength);
|
||||
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr);
|
||||
|
||||
@@ -46,6 +46,15 @@ public:
|
||||
UnicodeConverterTest(const std::string& name);
|
||||
~UnicodeConverterTest();
|
||||
|
||||
#ifdef _WINDOWS
|
||||
void testString16();
|
||||
void testCharPtrLength16();
|
||||
void testCharPtr16();
|
||||
#else
|
||||
void testString32();
|
||||
void testCharPtrLength32();
|
||||
void testCharPtr32();
|
||||
#endif
|
||||
void testString();
|
||||
void testCharPtrLength();
|
||||
void testCharPtr();
|
||||
|
||||
Reference in New Issue
Block a user