diff --git a/Foundation/Makefile b/Foundation/Makefile index 28e83776e..61caad18a 100644 --- a/Foundation/Makefile +++ b/Foundation/Makefile @@ -28,7 +28,7 @@ objects = ArchiveStrategy Ascii ASCIIEncoding AsyncChannel Base64Decoder Base64E Task TaskManager TaskNotification TeeStream Hash HashStatistic \ TemporaryFile TextConverter TextEncoding TextIterator TextBufferIterator Thread ThreadLocal \ ThreadPool ThreadTarget ActiveDispatcher Timer Timespan Timestamp Timezone Token URI \ - FileStreamFactory URIStreamFactory URIStreamOpener UTF16Encoding UTF8Encoding UTF8String \ + FileStreamFactory URIStreamFactory URIStreamOpener UTF32Encoding UTF16Encoding UTF8Encoding UTF8String \ Unicode UnicodeConverter Windows1250Encoding Windows1251Encoding Windows1252Encoding \ UUID UUIDGenerator Void Var VarHolder Format Pipe PipeImpl PipeStream SharedMemory \ MemoryStream FileStream AtomicCounter diff --git a/Foundation/include/Poco/UTF32Encoding.h b/Foundation/include/Poco/UTF32Encoding.h new file mode 100644 index 000000000..40e5bebe9 --- /dev/null +++ b/Foundation/include/Poco/UTF32Encoding.h @@ -0,0 +1,107 @@ +// +// UTF32Encoding.h +// +// $Id: //poco/1.4/Foundation/include/Poco/UTF32Encoding.h#1 $ +// +// Library: Foundation +// Package: Text +// Module: UTF32Encoding +// +// Definition of the UTF32Encoding class. +// +// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + + +#ifndef Foundation_UTF32Encoding_INCLUDED +#define Foundation_UTF32Encoding_INCLUDED + + +#include "Poco/Foundation.h" +#include "Poco/TextEncoding.h" + + +namespace Poco { + + +class Foundation_API UTF32Encoding: public TextEncoding + /// UTF-16 text encoding, as defined in RFC 2781. + /// + /// When converting from UTF-16 to Unicode, surrogates are + /// reported as they are - in other words, surrogate pairs + /// are not combined into one Unicode character. + /// When converting from Unicode to UTF-16, however, characters + /// outside the 16-bit range are converted into a low and + /// high surrogate. +{ +public: + enum ByteOrderType + { + BIG_ENDIAN_BYTE_ORDER, + LITTLE_ENDIAN_BYTE_ORDER, + NATIVE_BYTE_ORDER + }; + + UTF32Encoding(ByteOrderType byteOrder = NATIVE_BYTE_ORDER); + /// Creates and initializes the encoding for the given byte order. + + UTF32Encoding(int byteOrderMark); + /// Creates and initializes the encoding for the byte-order + /// indicated by the given byte-order mark, which is the Unicode + /// character 0xFEFF. + + ~UTF32Encoding(); + + ByteOrderType getByteOrder() const; + /// Returns the byte-order currently in use. + + void setByteOrder(ByteOrderType byteOrder); + /// Sets the byte order. + + void setByteOrder(int byteOrderMark); + /// Sets the byte order according to the given + /// byte order mark, which is the Unicode + /// character 0xFEFF. + + const char* canonicalName() const; + bool isA(const std::string& encodingName) const; + const CharacterMap& characterMap() const; + int convert(const unsigned char* bytes) const; + int convert(int ch, unsigned char* bytes, int length) const; + int queryConvert(const unsigned char* bytes, int length) const; + int sequenceLength(const unsigned char* bytes, int length) const; + +private: + bool _flipBytes; + static const char* _names[]; + static const CharacterMap _charMap; +}; + + +} // namespace Poco + + +#endif // Foundation_UTF32Encoding_INCLUDED diff --git a/Foundation/src/TextEncoding.cpp b/Foundation/src/TextEncoding.cpp index 684ba302e..195e12ea0 100644 --- a/Foundation/src/TextEncoding.cpp +++ b/Foundation/src/TextEncoding.cpp @@ -41,6 +41,7 @@ #include "Poco/Latin1Encoding.h" #include "Poco/Latin2Encoding.h" #include "Poco/Latin9Encoding.h" +#include "Poco/UTF32Encoding.h" #include "Poco/UTF16Encoding.h" #include "Poco/UTF8Encoding.h" #include "Poco/Windows1250Encoding.h" @@ -73,6 +74,7 @@ public: add(new Latin9Encoding); add(pUtf8Encoding); add(new UTF16Encoding); + add(new UTF32Encoding); add(new Windows1250Encoding); add(new Windows1251Encoding); add(new Windows1252Encoding); diff --git a/Foundation/src/UTF32Encoding.cpp b/Foundation/src/UTF32Encoding.cpp new file mode 100644 index 000000000..7ed341a3a --- /dev/null +++ b/Foundation/src/UTF32Encoding.cpp @@ -0,0 +1,200 @@ +// +// UTF32Encoding.cpp +// +// $Id: //poco/1.4/Foundation/src/UTF32Encoding.cpp#1 $ +// +// Library: Foundation +// Package: Text +// Module: UTF32Encoding +// +// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + + +#include "Poco/UTF32Encoding.h" +#include "Poco/ByteOrder.h" +#include "Poco/String.h" + + +namespace Poco { + + +const char* UTF32Encoding::_names[] = +{ + "UTF-32", + "UTF32", + NULL +}; + + +const TextEncoding::CharacterMap UTF32Encoding::_charMap = +{ + /* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, +}; + + +UTF32Encoding::UTF32Encoding(ByteOrderType byteOrder) +{ + setByteOrder(byteOrder); +} + + +UTF32Encoding::UTF32Encoding(int byteOrderMark) +{ + setByteOrder(byteOrderMark); +} + + +UTF32Encoding::~UTF32Encoding() +{ +} + + +UTF32Encoding::ByteOrderType UTF32Encoding::getByteOrder() const +{ +#if defined(POCO_ARCH_BIG_ENDIAN) + return _flipBytes ? LITTLE_ENDIAN_BYTE_ORDER : BIG_ENDIAN_BYTE_ORDER; +#else + return _flipBytes ? BIG_ENDIAN_BYTE_ORDER : LITTLE_ENDIAN_BYTE_ORDER; +#endif +} + + +void UTF32Encoding::setByteOrder(ByteOrderType byteOrder) +{ +#if defined(POCO_ARCH_BIG_ENDIAN) + _flipBytes = byteOrder == LITTLE_ENDIAN_BYTE_ORDER; +#else + _flipBytes = byteOrder == BIG_ENDIAN_BYTE_ORDER;; +#endif +} + + +void UTF32Encoding::setByteOrder(int byteOrderMark) +{ + _flipBytes = byteOrderMark != 0xFEFF; +} + + +const char* UTF32Encoding::canonicalName() const +{ + return _names[0]; +} + + +bool UTF32Encoding::isA(const std::string& encodingName) const +{ + for (const char** name = _names; *name; ++name) + { + if (Poco::icompare(encodingName, *name) == 0) + return true; + } + return false; +} + + +const TextEncoding::CharacterMap& UTF32Encoding::characterMap() const +{ + return _charMap; +} + + +int UTF32Encoding::convert(const unsigned char* bytes) const +{ + UInt32 uc; + unsigned char* p = (unsigned char*) &uc; + *p++ = *bytes++; + *p++ = *bytes++; + *p++ = *bytes++; + *p++ = *bytes++; + + if (_flipBytes) + { + ByteOrder::flipBytes(uc); + } + + return uc; +} + + +int UTF32Encoding::convert(int ch, unsigned char* bytes, int length) const +{ + if (bytes && length >= 4) + { + UInt32 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt32) ch) : (UInt32) ch; + unsigned char* p = (unsigned char*) &ch1; + *bytes++ = *p++; + *bytes++ = *p++; + *bytes++ = *p++; + *bytes++ = *p++; + } + return 4; +} + + +int UTF32Encoding::queryConvert(const unsigned char* bytes, int length) const +{ + int ret = -2; + + if (length >= 4) + { + UInt32 uc; + unsigned char* p = (unsigned char*) &uc; + *p++ = *bytes++; + *p++ = *bytes++; + *p++ = *bytes++; + *p++ = *bytes++; + if (_flipBytes) + ByteOrder::flipBytes(uc); + return uc; + } + + return ret; +} + + +int UTF32Encoding::sequenceLength(const unsigned char* bytes, int length) const +{ + return 4; +} + + +} // namespace Poco