UnicodeConverter: added support for native wchar_t via UTF16 or UTF32

This commit is contained in:
Marian Krivos
2012-07-31 18:37:04 +00:00
parent 24225d39ec
commit bd401297e7
6 changed files with 265 additions and 17 deletions

View File

@@ -48,13 +48,31 @@ namespace Poco {
class Foundation_API UnicodeConverter
/// A convenience class that converts strings from
/// UTF-8 encoded std::strings to UTF-16 encoded std::wstrings
/// UTF-8 encoded std::strings to UTF-16 or UTF-32 encoded std::wstrings
/// and vice-versa.
///
/// This class is mainly used for working with the Unicode Windows APIs
/// and probably won't be of much use anywhere else.
/// and probably won't be of much use anywhere else ???
{
public:
static void toWideUTF(const std::string& utf8String, std::wstring& utf32String);
/// Converts the given UTF-8 encoded string into a native encoded wstring.
static void toWideUTF(const char* utf8String, int length, std::wstring& utf32String);
/// Converts the given UTF-8 encoded character sequence into a native encoded string.
static void toWideUTF(const char* utf8String, std::wstring& utf32String);
/// Converts the given zero-terminated UTF-8 encoded character sequence into a native encoded wstring.
static void toUTF32(const std::string& utf8String, std::wstring& utf32String);
/// Converts the given UTF-8 encoded string into an UTF-32 encoded wstring.
static void toUTF32(const char* utf8String, int length, std::wstring& utf32String);
/// Converts the given UTF-8 encoded character sequence into an UTF-32 encoded string.
static void toUTF32(const char* utf8String, std::wstring& utf32String);
/// Converts the given zero-terminated UTF-8 encoded character sequence into an UTF-32 encoded wstring.
static void toUTF16(const std::string& utf8String, std::wstring& utf16String);
/// Converts the given UTF-8 encoded string into an UTF-16 encoded wstring.

View File

@@ -42,6 +42,7 @@
#include "Poco/TextIterator.h"
#include "Poco/UTF8Encoding.h"
#include "Poco/UTF16Encoding.h"
#include "Poco/UTF32Encoding.h"
#include <cstring>
#include <wchar.h>
@@ -49,6 +50,112 @@
namespace Poco {
void UnicodeConverter::toWideUTF(const std::string& utf8String, std::wstring& utfWideString)
{
if (sizeof(wchar_t) == 2)
{
toUTF16(utf8String, utfWideString);
}
else
{
toUTF32(utf8String, utfWideString);
}
}
void UnicodeConverter::toWideUTF(const char* utf8String, int length, std::wstring& utfWideString)
{
if (sizeof(wchar_t) == 2)
{
toUTF16(utf8String, utfWideString);
}
else
{
toUTF32(utf8String, utfWideString);
}
}
void UnicodeConverter::toWideUTF(const char* utf8String, std::wstring& utfWideString)
{
if (sizeof(wchar_t) == 2)
{
toUTF16(utf8String, utfWideString);
}
else
{
toUTF32(utf8String, utfWideString);
}
}
void UnicodeConverter::toUTF32(const std::string& utf8String, std::wstring& utf32String)
{
utf32String.clear();
UTF8Encoding utf8Encoding;
TextIterator it(utf8String, utf8Encoding);
TextIterator end(utf8String);
while (it != end)
{
int cc = *it++;
utf32String += (wchar_t) cc;
}
}
void UnicodeConverter::toUTF32(const char* utf8String, int length, std::wstring& utf32String)
{
poco_check_ptr (utf8String);
utf32String.clear();
UTF8Encoding utf8Encoding;
UTF32Encoding utf32Encoding;
const unsigned char* it = (const unsigned char*) utf8String;
const unsigned char* end = (const unsigned char*) utf8String + length;
while (it < end)
{
int n = utf8Encoding.queryConvert(it, 1);
int uc;
int read = 1;
while (-1 > n && (end - it) >= -n)
{
read = -n;
n = utf8Encoding.queryConvert(it, read);
}
if (-1 > n)
{
it = end;
}
else
{
it += read;
}
if (-1 >= n)
{
uc = 0xfffd; // Replacement Character (instead of '?')
}
else
{
uc = n;
}
utf32String += (wchar_t) uc;
}
}
void UnicodeConverter::toUTF32(const char* utf8String, std::wstring& utf32String)
{
toUTF32(utf8String, (int) std::strlen(utf8String), utf32String);
}
void UnicodeConverter::toUTF16(const std::string& utf8String, std::wstring& utf16String)
{
utf16String.clear();
@@ -137,9 +244,18 @@ void UnicodeConverter::toUTF8(const std::wstring& utf16String, std::string& utf8
{
utf8String.clear();
UTF8Encoding utf8Encoding;
UTF16Encoding utf16Encoding;
TextConverter converter(utf16Encoding, utf8Encoding);
converter.convert(utf16String.data(), (int) utf16String.length()*sizeof(wchar_t), utf8String);
if (sizeof(wchar_t) == 2)
{
UTF16Encoding utf16Encoding;
TextConverter converter(utf16Encoding, utf8Encoding);
converter.convert(utf16String.data(), (int) utf16String.length() * sizeof(wchar_t), utf8String);
}
else
{
UTF32Encoding utf32Encoding;
TextConverter converter(utf32Encoding, utf8Encoding);
converter.convert(utf16String.data(), (int) utf16String.length() * sizeof(wchar_t), utf8String);
}
}
@@ -147,9 +263,18 @@ void UnicodeConverter::toUTF8(const wchar_t* utf16String, int length, std::strin
{
utf8String.clear();
UTF8Encoding utf8Encoding;
UTF16Encoding utf16Encoding;
TextConverter converter(utf16Encoding, utf8Encoding);
converter.convert(utf16String, (int) length*sizeof(wchar_t), utf8String);
if (sizeof(wchar_t) == 2)
{
UTF16Encoding utf16Encoding;
TextConverter converter(utf16Encoding, utf8Encoding);
converter.convert(utf16String, (int) length * sizeof(wchar_t), utf8String);
}
else
{
UTF32Encoding utf32Encoding;
TextConverter converter(utf32Encoding, utf8Encoding);
converter.convert(utf16String, (int) length * sizeof(wchar_t), utf8String);
}
}

View File

@@ -37,7 +37,7 @@ objects = ActiveMethodTest ActivityTest ActiveDispatcherTest \
LRUCacheTest ExpireCacheTest ExpireLRUCacheTest CacheTestSuite AnyTest FormatTest \
HashingTestSuite HashTableTest SimpleHashTableTest LinearHashTableTest \
HashSetTest HashMapTest SharedMemoryTest \
UniqueExpireCacheTest UniqueExpireLRUCacheTest \
UniqueExpireCacheTest UniqueExpireLRUCacheTest UnicodeConverterTest \
TuplesTest NamedTuplesTest TypeListTest VarTest DynamicTestSuite FileStreamTest \
MemoryStreamTest ObjectPoolTest

View File

@@ -37,9 +37,7 @@
#include "StreamConverterTest.h"
#include "TextEncodingTest.h"
#include "UTF8StringTest.h"
#ifdef _WINDOWS
#include "UnicodeConverterTest.h"
#endif
CppUnit::Test* TextTestSuite::suite()
@@ -52,9 +50,7 @@ CppUnit::Test* TextTestSuite::suite()
pSuite->addTest(StreamConverterTest::suite());
pSuite->addTest(TextEncodingTest::suite());
pSuite->addTest(UTF8StringTest::suite());
#ifdef _WINDOWS
pSuite->addTest(UnicodeConverterTest::suite());
#endif
return pSuite;
}

View File

@@ -35,7 +35,6 @@
#include "CppUnit/TestSuite.h"
#include "Poco/UnicodeConverter.h"
using Poco::UnicodeConverter;
@@ -48,7 +47,10 @@ UnicodeConverterTest::~UnicodeConverterTest()
{
}
void UnicodeConverterTest::testString()
#ifdef _WINDOWS
void UnicodeConverterTest::testString16()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
@@ -64,7 +66,7 @@ void UnicodeConverterTest::testString()
assert (text == text2);
}
void UnicodeConverterTest::testCharPtrLength()
void UnicodeConverterTest::testCharPtrLength16()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
@@ -78,7 +80,7 @@ void UnicodeConverterTest::testCharPtrLength()
assert (text == text2);
}
void UnicodeConverterTest::testCharPtr()
void UnicodeConverterTest::testCharPtr16()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
@@ -92,6 +94,95 @@ void UnicodeConverterTest::testCharPtr()
assert (text == text2);
}
#else
void UnicodeConverterTest::testString32()
{
const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ";
std::string text((const char*) utf8Chars);
std::wstring wtext;
UnicodeConverter::toUTF32 (text, wtext);
std::string text2;
UnicodeConverter::toUTF8 (wtext, text2);
assert (text == text2);
}
void UnicodeConverterTest::testCharPtrLength32()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
std::wstring wtext;
std::string text2;
UnicodeConverter::toUTF32 ((const char*)supp, 14, wtext);
UnicodeConverter::toUTF8 (wtext.c_str (), (int) wtext.size (), text2);
assert (text == text2);
}
void UnicodeConverterTest::testCharPtr32()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
std::wstring wtext;
std::string text2;
UnicodeConverter::toUTF32 ((const char*)supp, wtext);
UnicodeConverter::toUTF8 (wtext.c_str (), text2);
assert (text == text2);
}
#endif
void UnicodeConverterTest::testString()
{
const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ";
std::string text((const char*) utf8Chars);
std::wstring wtext;
UnicodeConverter::toWideUTF (text, wtext);
std::string text2;
UnicodeConverter::toUTF8 (wtext, text2);
assert (text == text2);
}
void UnicodeConverterTest::testCharPtrLength()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
std::wstring wtext;
std::string text2;
UnicodeConverter::toWideUTF ((const char*)supp, 14, wtext);
UnicodeConverter::toUTF8 (wtext.c_str (), (int) wtext.size (), text2);
assert (text == text2);
}
void UnicodeConverterTest::testCharPtr()
{
const unsigned char supp[] = {0x41, 0x42, 0xf0, 0x90, 0x82, 0xa4, 0xf0, 0xaf, 0xa6, 0xa0, 0xf0, 0xaf, 0xa8, 0x9d, 0x00};
std::string text((const char*) supp);
std::wstring wtext;
std::string text2;
UnicodeConverter::toWideUTF ((const char*)supp, wtext);
UnicodeConverter::toUTF8 (wtext.c_str (), text2);
assert (text == text2);
}
void UnicodeConverterTest::setUp()
{
@@ -107,6 +198,15 @@ CppUnit::Test* UnicodeConverterTest::suite()
{
CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("UnicodeConverterTest");
#ifdef _WINDOWS
CppUnit_addTest(pSuite, UnicodeConverterTest, testString16);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength16);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr16);
#else
CppUnit_addTest(pSuite, UnicodeConverterTest, testString32);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength32);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr32);
#endif
CppUnit_addTest(pSuite, UnicodeConverterTest, testString);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtrLength);
CppUnit_addTest(pSuite, UnicodeConverterTest, testCharPtr);

View File

@@ -46,6 +46,15 @@ public:
UnicodeConverterTest(const std::string& name);
~UnicodeConverterTest();
#ifdef _WINDOWS
void testString16();
void testCharPtrLength16();
void testCharPtr16();
#else
void testString32();
void testCharPtrLength32();
void testCharPtr32();
#endif
void testString();
void testCharPtrLength();
void testCharPtr();