diff --git a/include/__locale b/include/__locale index 395cb268..c2146d9b 100644 --- a/include/__locale +++ b/include/__locale @@ -1047,92 +1047,6 @@ protected: virtual int do_max_length() const throw(); }; -// template <> class codecvt - -template <> -class codecvt - : public locale::facet, - public codecvt_base -{ -public: - typedef char32_t intern_type; - typedef char16_t extern_type; - typedef mbstate_t state_type; - - _LIBCPP_ALWAYS_INLINE - explicit codecvt(size_t __refs = 0) - : locale::facet(__refs) {} - - _LIBCPP_ALWAYS_INLINE - result out(state_type& __st, - const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, - extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const - { - return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); - } - - _LIBCPP_ALWAYS_INLINE - result unshift(state_type& __st, - extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const - { - return do_unshift(__st, __to, __to_end, __to_nxt); - } - - _LIBCPP_ALWAYS_INLINE - result in(state_type& __st, - const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, - intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const - { - return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); - } - - _LIBCPP_ALWAYS_INLINE - int encoding() const throw() - { - return do_encoding(); - } - - _LIBCPP_ALWAYS_INLINE - bool always_noconv() const throw() - { - return do_always_noconv(); - } - - _LIBCPP_ALWAYS_INLINE - int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const - { - return do_length(__st, __frm, __end, __mx); - } - - _LIBCPP_ALWAYS_INLINE - int max_length() const throw() - { - return do_max_length(); - } - - static locale::id id; - -protected: - _LIBCPP_ALWAYS_INLINE - explicit codecvt(const char*, size_t __refs = 0) - : locale::facet(__refs) {} - - ~codecvt(); - - virtual result do_out(state_type& __st, - const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, - extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; - virtual result do_in(state_type& __st, - const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, - intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; - virtual result do_unshift(state_type& __st, - extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; - virtual int do_encoding() const throw(); - virtual bool do_always_noconv() const throw(); - virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const; - virtual int do_max_length() const throw(); -}; - // template class codecvt_byname template diff --git a/include/codecvt b/include/codecvt new file mode 100644 index 00000000..ad0862db --- /dev/null +++ b/include/codecvt @@ -0,0 +1,542 @@ +// -*- C++ -*- +//===-------------------------- codecvt -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_CODECVT +#define _LIBCPP_CODECVT + +/* + codecvt synopsis + +namespace std +{ + +enum codecvt_mode +{ + consume_header = 4, + generate_header = 2, + little_endian = 1 +}; + +template +class codecvt_utf8 + : public codecvt +{ + // unspecified +}; + +template +class codecvt_utf16 + : public codecvt +{ + // unspecified +}; + +template +class codecvt_utf8_utf16 + : public codecvt +{ + // unspecified +}; + +} // std + +*/ + +#include <__config> +#include <__locale> + +#pragma GCC system_header + +_LIBCPP_BEGIN_NAMESPACE_STD + +enum codecvt_mode +{ + consume_header = 4, + generate_header = 2, + little_endian = 1 +}; + +// codecvt_utf8 + +template class __codecvt_utf8; + +template <> +class __codecvt_utf8 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef wchar_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf8 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf8 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template +class codecvt_utf8 + : public __codecvt_utf8<_Elem> +{ +public: + _LIBCPP_ALWAYS_INLINE + explicit codecvt_utf8(size_t __refs = 0) + : __codecvt_utf8<_Elem>(__refs, _Maxcode, _Mode) {} + + ~codecvt_utf8() {} +}; + +// codecvt_utf16 + +template class __codecvt_utf16; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef wchar_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef wchar_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template +class codecvt_utf16 + : public __codecvt_utf16<_Elem, _Mode & little_endian> +{ +public: + _LIBCPP_ALWAYS_INLINE + explicit codecvt_utf16(size_t __refs = 0) + : __codecvt_utf16<_Elem, _Mode & little_endian>(__refs, _Maxcode, _Mode) {} + + ~codecvt_utf16() {} +}; + +// codecvt_utf8_utf16 + +template class __codecvt_utf8_utf16; + +template <> +class __codecvt_utf8_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef wchar_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf8_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template <> +class __codecvt_utf8_utf16 + : public codecvt +{ + unsigned long _Maxcode_; + codecvt_mode _Mode_; +public: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + _LIBCPP_ALWAYS_INLINE + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, + codecvt_mode _Mode) + : codecvt(__refs), _Maxcode_(_Maxcode), + _Mode_(_Mode) {} +protected: + virtual result + do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result + do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result + do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const throw(); + virtual bool do_always_noconv() const throw(); + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, + size_t __mx) const; + virtual int do_max_length() const throw(); +}; + +template +class codecvt_utf8_utf16 + : public __codecvt_utf8_utf16<_Elem> +{ +public: + _LIBCPP_ALWAYS_INLINE + explicit codecvt_utf8_utf16(size_t __refs = 0) + : __codecvt_utf8_utf16<_Elem>(__refs, _Maxcode, _Mode) {} + + ~codecvt_utf8_utf16() {} +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_CODECVT diff --git a/src/locale.cpp b/src/locale.cpp index a65d6ed3..6c41abfb 100644 --- a/src/locale.cpp +++ b/src/locale.cpp @@ -9,6 +9,7 @@ #include "string" #include "locale" +#include "codecvt" #include "vector" #include "algorithm" #include "algorithm" @@ -1304,6 +1305,1389 @@ codecvt::do_max_length() const throw() // 040000 - 0FFFFF D8C0 - DBBF, DC00 - DFFF F1 - F3, 80 - BF, 80 - BF, 80 - BF 786432 // 100000 - 10FFFF DBC0 - DBFF, DC00 - DFFF F4 - F4, 80 - 8F, 80 - BF, 80 - BF 65536 +static +codecvt_base::result +utf16_to_utf8(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xEF); + *to_nxt++ = static_cast(0xBB); + *to_nxt++ = static_cast(0xBF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint16_t wc1 = *frm_nxt; + if (wc1 > Maxcode) + return codecvt_base::error; + if (wc1 < 0x0080) + { + if (to_end-to_nxt < 1) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc1); + } + else if (wc1 < 0x0800) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xC0 | (wc1 >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x03F)); + } + else if (wc1 < 0xD800) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); + } + else if (wc1 < 0xDC00) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint16_t wc2 = frm_nxt[1]; + if ((wc2 & 0xFC00) != 0xDC00) + return codecvt_base::error; + if (to_end-to_nxt < 4) + return codecvt_base::partial; + if ((((((unsigned long)wc1 & 0x03C0) >> 6) + 1) << 16) + + (((unsigned long)wc1 & 0x003F) << 10) + (wc2 & 0x03FF) > Maxcode) + return codecvt_base::error; + ++frm_nxt; + uint8_t z = ((wc1 & 0x03C0) >> 6) + 1; + *to_nxt++ = static_cast(0xF0 | (z >> 2)); + *to_nxt++ = static_cast(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc2 & 0x003F)); + } + else if (wc1 < 0xE000) + { + return codecvt_base::error; + } + else + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf16_to_utf8(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xEF); + *to_nxt++ = static_cast(0xBB); + *to_nxt++ = static_cast(0xBF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint16_t wc1 = static_cast(*frm_nxt); + if (wc1 > Maxcode) + return codecvt_base::error; + if (wc1 < 0x0080) + { + if (to_end-to_nxt < 1) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc1); + } + else if (wc1 < 0x0800) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xC0 | (wc1 >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x03F)); + } + else if (wc1 < 0xD800) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); + } + else if (wc1 < 0xDC00) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint16_t wc2 = static_cast(frm_nxt[1]); + if ((wc2 & 0xFC00) != 0xDC00) + return codecvt_base::error; + if (to_end-to_nxt < 4) + return codecvt_base::partial; + if ((((((unsigned long)wc1 & 0x03C0) >> 6) + 1) << 16) + + (((unsigned long)wc1 & 0x003F) << 10) + (wc2 & 0x03FF) > Maxcode) + return codecvt_base::error; + ++frm_nxt; + uint8_t z = ((wc1 & 0x03C0) >> 6) + 1; + *to_nxt++ = static_cast(0xF0 | (z >> 2)); + *to_nxt++ = static_cast(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc2 & 0x003F)); + } + else if (wc1 < 0xE000) + { + return codecvt_base::error; + } + else + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) + { + uint8_t c1 = *frm_nxt; + if (c1 > Maxcode) + return codecvt_base::error; + if (c1 < 0x80) + { + *to_nxt = static_cast(c1); + ++frm_nxt; + } + else if (c1 < 0xC2) + { + return codecvt_base::error; + } + else if (c1 < 0xE0) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x1F) << 6) | (c2 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return codecvt_base::error; + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 3; + } + else if (c1 < 0xF5) + { + if (frm_end-frm_nxt < 4) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint8_t c4 = frm_nxt[3]; + switch (c1) + { + case 0xF0: + if (!(0x90 <= c2 && c2 <= 0xBF)) + return codecvt_base::error; + break; + case 0xF4: + if ((c2 & 0xF0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) + return codecvt_base::error; + if (to_end-to_nxt < 2) + return codecvt_base::partial; + if (((((unsigned long)c1 & 7) << 18) + + (((unsigned long)c2 & 0x3F) << 12) + + (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast( + 0xD800 + | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) + | ((c2 & 0x0F) << 2) + | ((c3 & 0x30) >> 4)); + *++to_nxt = static_cast( + 0xDC00 + | ((c3 & 0x0F) << 6) + | (c4 & 0x3F)); + frm_nxt += 4; + } + else + { + return codecvt_base::error; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +codecvt_base::result +utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) + { + uint8_t c1 = *frm_nxt; + if (c1 > Maxcode) + return codecvt_base::error; + if (c1 < 0x80) + { + *to_nxt = static_cast(c1); + ++frm_nxt; + } + else if (c1 < 0xC2) + { + return codecvt_base::error; + } + else if (c1 < 0xE0) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x1F) << 6) | (c2 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(t); + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return codecvt_base::error; + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(t); + frm_nxt += 3; + } + else if (c1 < 0xF5) + { + if (frm_end-frm_nxt < 4) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint8_t c4 = frm_nxt[3]; + switch (c1) + { + case 0xF0: + if (!(0x90 <= c2 && c2 <= 0xBF)) + return codecvt_base::error; + break; + case 0xF4: + if ((c2 & 0xF0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) + return codecvt_base::error; + if (to_end-to_nxt < 2) + return codecvt_base::partial; + if (((((unsigned long)c1 & 7) << 18) + + (((unsigned long)c2 & 0x3F) << 12) + + (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast( + 0xD800 + | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) + | ((c2 & 0x0F) << 2) + | ((c3 & 0x30) >> 4)); + *++to_nxt = static_cast( + 0xDC00 + | ((c3 & 0x0F) << 6) + | (c4 & 0x3F)); + frm_nxt += 4; + } + else + { + return codecvt_base::error; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf8_to_utf16_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (size_t nchar16_t = 0; frm_nxt < frm_end && nchar16_t < mx; ++nchar16_t) + { + uint8_t c1 = *frm_nxt; + if (c1 > Maxcode) + break; + if (c1 < 0x80) + { + ++frm_nxt; + } + else if (c1 < 0xC2) + { + break; + } + else if (c1 < 0xE0) + { + if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xC0) != 0x80) + break; + uint16_t t = static_cast(((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)); + if (t > Maxcode) + break; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + break; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint16_t t = static_cast(((c1 & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F)); + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return static_cast(frm_nxt - frm); + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return static_cast(frm_nxt - frm); + break; + default: + if ((c2 & 0xC0) != 0x80) + return static_cast(frm_nxt - frm); + break; + } + if ((c3 & 0xC0) != 0x80) + break; + if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode) + break; + frm_nxt += 3; + } + else if (c1 < 0xF5) + { + if (frm_end-frm_nxt < 4 || mx-nchar16_t < 2) + break; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint8_t c4 = frm_nxt[3]; + switch (c1) + { + case 0xF0: + if (!(0x90 <= c2 && c2 <= 0xBF)) + return static_cast(frm_nxt - frm); + break; + case 0xF4: + if ((c2 & 0xF0) != 0x80) + return static_cast(frm_nxt - frm); + break; + default: + if ((c2 & 0xC0) != 0x80) + return static_cast(frm_nxt - frm); + break; + } + if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) + break; + if (((((unsigned long)c1 & 7) << 18) + + (((unsigned long)c2 & 0x3F) << 12) + + (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode) + break; + ++nchar16_t; + frm_nxt += 4; + } + else + { + break; + } + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs4_to_utf8(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xEF); + *to_nxt++ = static_cast(0xBB); + *to_nxt++ = static_cast(0xBF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint32_t wc = *frm_nxt; + if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode) + return codecvt_base::error; + if (wc < 0x000080) + { + if (to_end-to_nxt < 1) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc); + } + else if (wc < 0x000800) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xC0 | (wc >> 6)); + *to_nxt++ = static_cast(0x80 | (wc & 0x03F)); + } + else if (wc < 0x010000) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc & 0x003F)); + } + else // if (wc < 0x110000) + { + if (to_end-to_nxt < 4) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xF0 | (wc >> 18)); + *to_nxt++ = static_cast(0x80 | ((wc & 0x03F000) >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc & 0x000FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc & 0x00003F)); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) + { + uint8_t c1 = static_cast(*frm_nxt); + if (c1 < 0x80) + { + if (c1 > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(c1); + ++frm_nxt; + } + else if (c1 < 0xC2) + { + return codecvt_base::error; + } + else if (c1 < 0xE0) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + uint32_t t = static_cast(((c1 & 0x1F) << 6) + | (c2 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return codecvt_base::error; + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80) + return codecvt_base::error; + uint32_t t = static_cast(((c1 & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 3; + } + else if (c1 < 0xF5) + { + if (frm_end-frm_nxt < 4) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint8_t c4 = frm_nxt[3]; + switch (c1) + { + case 0xF0: + if (!(0x90 <= c2 && c2 <= 0xBF)) + return codecvt_base::error; + break; + case 0xF4: + if ((c2 & 0xF0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) + return codecvt_base::error; + uint32_t t = static_cast(((c1 & 0x07) << 18) + | ((c2 & 0x3F) << 12) + | ((c3 & 0x3F) << 6) + | (c4 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 4; + } + else + { + return codecvt_base::error; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf8_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) + { + uint8_t c1 = static_cast(*frm_nxt); + if (c1 < 0x80) + { + if (c1 > Maxcode) + break; + ++frm_nxt; + } + else if (c1 < 0xC2) + { + break; + } + else if (c1 < 0xE0) + { + if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80)) + break; + if ((((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)) > Maxcode) + break; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + break; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return static_cast(frm_nxt - frm); + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return static_cast(frm_nxt - frm); + break; + default: + if ((c2 & 0xC0) != 0x80) + return static_cast(frm_nxt - frm); + break; + } + if ((c3 & 0xC0) != 0x80) + break; + if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode) + break; + frm_nxt += 3; + } + else if (c1 < 0xF5) + { + if (frm_end-frm_nxt < 4) + break; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + uint8_t c4 = frm_nxt[3]; + switch (c1) + { + case 0xF0: + if (!(0x90 <= c2 && c2 <= 0xBF)) + return static_cast(frm_nxt - frm); + break; + case 0xF4: + if ((c2 & 0xF0) != 0x80) + return static_cast(frm_nxt - frm); + break; + default: + if ((c2 & 0xC0) != 0x80) + return static_cast(frm_nxt - frm); + break; + } + if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) + break; + uint32_t t = static_cast(((c1 & 0x07) << 18) + | ((c2 & 0x3F) << 12) + | ((c3 & 0x3F) << 6) + | (c4 & 0x3F)); + if ((((c1 & 0x07) << 18) | ((c2 & 0x3F) << 12) | + ((c3 & 0x3F) << 6) | (c4 & 0x3F)) > Maxcode) + break; + frm_nxt += 4; + } + else + { + break; + } + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs2_to_utf8(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xEF); + *to_nxt++ = static_cast(0xBB); + *to_nxt++ = static_cast(0xBF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint16_t wc = *frm_nxt; + if ((wc & 0xF800) == 0xD800 || wc > Maxcode) + return codecvt_base::error; + if (wc < 0x0080) + { + if (to_end-to_nxt < 1) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc); + } + else if (wc < 0x0800) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xC0 | (wc >> 6)); + *to_nxt++ = static_cast(0x80 | (wc & 0x03F)); + } + else // if (wc <= 0xFFFF) + { + if (to_end-to_nxt < 3) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xE0 | (wc >> 12)); + *to_nxt++ = static_cast(0x80 | ((wc & 0x0FC0) >> 6)); + *to_nxt++ = static_cast(0x80 | (wc & 0x003F)); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf8_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) + { + uint8_t c1 = static_cast(*frm_nxt); + if (c1 < 0x80) + { + if (c1 > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(c1); + ++frm_nxt; + } + else if (c1 < 0xC2) + { + return codecvt_base::error; + } + else if (c1 < 0xE0) + { + if (frm_end-frm_nxt < 2) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x1F) << 6) + | (c2 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + return codecvt_base::partial; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return codecvt_base::error; + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return codecvt_base::error; + break; + default: + if ((c2 & 0xC0) != 0x80) + return codecvt_base::error; + break; + } + if ((c3 & 0xC0) != 0x80) + return codecvt_base::error; + uint16_t t = static_cast(((c1 & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 3; + } + else + { + return codecvt_base::error; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf8_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && + frm_nxt[2] == 0xBF) + frm_nxt += 3; + } + for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) + { + uint8_t c1 = static_cast(*frm_nxt); + if (c1 < 0x80) + { + if (c1 > Maxcode) + break; + ++frm_nxt; + } + else if (c1 < 0xC2) + { + break; + } + else if (c1 < 0xE0) + { + if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80)) + break; + if ((((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)) > Maxcode) + break; + frm_nxt += 2; + } + else if (c1 < 0xF0) + { + if (frm_end-frm_nxt < 3) + break; + uint8_t c2 = frm_nxt[1]; + uint8_t c3 = frm_nxt[2]; + switch (c1) + { + case 0xE0: + if ((c2 & 0xE0) != 0xA0) + return static_cast(frm_nxt - frm); + break; + case 0xED: + if ((c2 & 0xE0) != 0x80) + return static_cast(frm_nxt - frm); + break; + default: + if ((c2 & 0xC0) != 0x80) + return static_cast(frm_nxt - frm); + break; + } + if ((c3 & 0xC0) != 0x80) + break; + if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode) + break; + frm_nxt += 3; + } + else + { + break; + } + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs4_to_utf16be(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xFE); + *to_nxt++ = static_cast(0xFF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint32_t wc = *frm_nxt; + if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode) + return codecvt_base::error; + if (wc < 0x010000) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc >> 8); + *to_nxt++ = static_cast(wc); + } + else + { + if (to_end-to_nxt < 4) + return codecvt_base::partial; + uint16_t t = static_cast( + 0xD800 + | ((((wc & 0x1F0000) >> 16) - 1) << 6) + | ((wc & 0x00FC00) >> 10)); + *to_nxt++ = static_cast(t >> 8); + *to_nxt++ = static_cast(t); + t = static_cast(0xDC00 | (wc & 0x03FF)); + *to_nxt++ = static_cast(t >> 8); + *to_nxt++ = static_cast(t); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf16be_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF) + frm_nxt += 2; + } + for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt) + { + uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1]; + if ((c1 & 0xFC00) == 0xDC00) + return codecvt_base::error; + if ((c1 & 0xFC00) != 0xD800) + { + if (c1 > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(c1); + frm_nxt += 2; + } + else + { + if (frm_end-frm_nxt < 4) + return codecvt_base::partial; + uint16_t c2 = frm_nxt[2] << 8 | frm_nxt[3]; + if ((c2 & 0xFC00) != 0xDC00) + return codecvt_base::error; + uint32_t t = static_cast( + ((((c1 & 0x03C0) >> 6) + 1) << 16) + | ((c1 & 0x003F) << 10) + | (c2 & 0x03FF)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 4; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf16be_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF) + frm_nxt += 2; + } + for (size_t nchar32_t = 0; frm_nxt < frm_end - 1 && nchar32_t < mx; ++nchar32_t) + { + uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1]; + if ((c1 & 0xFC00) == 0xDC00) + break; + if ((c1 & 0xFC00) != 0xD800) + { + if (c1 > Maxcode) + break; + frm_nxt += 2; + } + else + { + if (frm_end-frm_nxt < 4) + break; + uint16_t c2 = frm_nxt[2] << 8 | frm_nxt[3]; + if ((c2 & 0xFC00) != 0xDC00) + break; + uint32_t t = static_cast( + ((((c1 & 0x03C0) >> 6) + 1) << 16) + | ((c1 & 0x003F) << 10) + | (c2 & 0x03FF)); + if (t > Maxcode) + break; + frm_nxt += 4; + } + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs4_to_utf16le(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xFF); + *to_nxt++ = static_cast(0xFE); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint32_t wc = *frm_nxt; + if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode) + return codecvt_base::error; + if (wc < 0x010000) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc); + *to_nxt++ = static_cast(wc >> 8); + } + else + { + if (to_end-to_nxt < 4) + return codecvt_base::partial; + uint16_t t = static_cast( + 0xD800 + | ((((wc & 0x1F0000) >> 16) - 1) << 6) + | ((wc & 0x00FC00) >> 10)); + *to_nxt++ = static_cast(t); + *to_nxt++ = static_cast(t >> 8); + t = static_cast(0xDC00 | (wc & 0x03FF)); + *to_nxt++ = static_cast(t); + *to_nxt++ = static_cast(t >> 8); + } + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf16le_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE) + frm_nxt += 2; + } + for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt) + { + uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0]; + if ((c1 & 0xFC00) == 0xDC00) + return codecvt_base::error; + if ((c1 & 0xFC00) != 0xD800) + { + if (c1 > Maxcode) + return codecvt_base::error; + *to_nxt = static_cast(c1); + frm_nxt += 2; + } + else + { + if (frm_end-frm_nxt < 4) + return codecvt_base::partial; + uint16_t c2 = frm_nxt[3] << 8 | frm_nxt[2]; + if ((c2 & 0xFC00) != 0xDC00) + return codecvt_base::error; + uint32_t t = static_cast( + ((((c1 & 0x03C0) >> 6) + 1) << 16) + | ((c1 & 0x003F) << 10) + | (c2 & 0x03FF)); + if (t > Maxcode) + return codecvt_base::error; + *to_nxt = t; + frm_nxt += 4; + } + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf16le_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE) + frm_nxt += 2; + } + for (size_t nchar32_t = 0; frm_nxt < frm_end - 1 && nchar32_t < mx; ++nchar32_t) + { + uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0]; + if ((c1 & 0xFC00) == 0xDC00) + break; + if ((c1 & 0xFC00) != 0xD800) + { + if (c1 > Maxcode) + break; + frm_nxt += 2; + } + else + { + if (frm_end-frm_nxt < 4) + break; + uint16_t c2 = frm_nxt[3] << 8 | frm_nxt[2]; + if ((c2 & 0xFC00) != 0xDC00) + break; + uint32_t t = static_cast( + ((((c1 & 0x03C0) >> 6) + 1) << 16) + | ((c1 & 0x003F) << 10) + | (c2 & 0x03FF)); + if (t > Maxcode) + break; + frm_nxt += 4; + } + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs2_to_utf16be(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xFE); + *to_nxt++ = static_cast(0xFF); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint16_t wc = *frm_nxt; + if ((wc & 0xF800) == 0xD800 || wc > Maxcode) + return codecvt_base::error; + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc >> 8); + *to_nxt++ = static_cast(wc); + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf16be_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF) + frm_nxt += 2; + } + for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt) + { + uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1]; + if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode) + return codecvt_base::error; + *to_nxt = c1; + frm_nxt += 2; + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf16be_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF) + frm_nxt += 2; + } + for (size_t nchar16_t = 0; frm_nxt < frm_end - 1 && nchar16_t < mx; ++nchar16_t) + { + uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1]; + if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode) + break; + frm_nxt += 2; + } + return static_cast(frm_nxt - frm); +} + +static +codecvt_base::result +ucs2_to_utf16le(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt, + uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & generate_header) + { + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(0xFF); + *to_nxt++ = static_cast(0xFE); + } + for (; frm_nxt < frm_end; ++frm_nxt) + { + uint16_t wc = *frm_nxt; + if ((wc & 0xF800) == 0xD800 || wc > Maxcode) + return codecvt_base::error; + if (to_end-to_nxt < 2) + return codecvt_base::partial; + *to_nxt++ = static_cast(wc); + *to_nxt++ = static_cast(wc >> 8); + } + return codecvt_base::ok; +} + +static +codecvt_base::result +utf16le_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, + uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt, + unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) +{ + frm_nxt = frm; + to_nxt = to; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE) + frm_nxt += 2; + } + for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt) + { + uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0]; + if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode) + return codecvt_base::error; + *to_nxt = c1; + frm_nxt += 2; + } + return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; +} + +static +int +utf16le_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end, + size_t mx, unsigned long Maxcode = 0x10FFFF, + codecvt_mode mode = codecvt_mode(0)) +{ + const uint8_t* frm_nxt = frm; + frm_nxt = frm; + if (mode & consume_header) + { + if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE) + frm_nxt += 2; + } + for (size_t nchar16_t = 0; frm_nxt < frm_end - 1 && nchar16_t < mx; ++nchar16_t) + { + uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0]; + if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode) + break; + frm_nxt += 2; + } + return static_cast(frm_nxt - frm); +} + // template <> class codecvt locale::id codecvt::id; @@ -1317,60 +2701,16 @@ codecvt::do_out(state_type&, const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt) - { - intern_type wc1 = *frm_nxt; - if (wc1 < 0x0080) - { - if (to_end-to_nxt < 1) - return partial; - *to_nxt++ = static_cast(wc1); - } - else if (wc1 < 0x0800) - { - if (to_end-to_nxt < 2) - return partial; - *to_nxt++ = static_cast(0xC0 | (wc1 >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x03F)); - } - else if (wc1 < 0xD800) - { - if (to_end-to_nxt < 3) - return partial; - *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); - } - else if (wc1 < 0xDC00) - { - if (frm_end-frm_nxt < 2) - return partial; - intern_type wc2 = frm_nxt[1]; - if ((wc2 & 0xFC00) != 0xDC00) - return error; - if (to_end-to_nxt < 4) - return partial; - ++frm_nxt; - unsigned char z = ((wc1 & 0x03C0) >> 6) + 1; - *to_nxt++ = static_cast(0xF0 | (z >> 2)); - *to_nxt++ = static_cast(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc2 & 0x003F)); - } - else if (wc1 < 0xE000) - { - return error; - } - else - { - if (to_end-to_nxt < 3) - return partial; - *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); - } - } - return ok; + const uint16_t* _frm = reinterpret_cast(frm); + const uint16_t* _frm_end = reinterpret_cast(frm_end); + const uint16_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } codecvt::result @@ -1378,100 +2718,16 @@ codecvt::do_in(state_type&, const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, intern_type* to, intern_type* to_end, intern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) - { - unsigned char c1 = static_cast(*frm_nxt); - if (c1 < 0x80) - { - *to_nxt = static_cast(c1); - ++frm_nxt; - } - else if (c1 < 0xC2) - { - return error; - } - else if (c1 < 0xE0) - { - if (frm_end-frm_nxt < 2) - return partial; - unsigned char c2 = frm_nxt[1]; - if ((c2 & 0xC0) != 0x80) - return error; - *to_nxt = static_cast(((c1 & 0x1F) << 6) - | (c2 & 0x3F)); - frm_nxt += 2; - } - else if (c1 < 0xF0) - { - if (frm_end-frm_nxt < 3) - return partial; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - switch (c1) - { - case 0xE0: - if ((c2 & 0xE0) != 0xA0) - return error; - break; - case 0xED: - if ((c2 & 0xE0) != 0x80) - return error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return error; - break; - } - if ((c3 & 0xC0) != 0x80) - return error; - *to_nxt = static_cast(((c1 & 0x0F) << 12) - | ((c2 & 0x3F) << 6) - | (c3 & 0x3F)); - frm_nxt += 3; - } - else if (c1 < 0xF5) - { - if (frm_end-frm_nxt < 4) - return partial; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - unsigned char c4 = frm_nxt[3]; - switch (c1) - { - case 0xF0: - if (!(0x90 <= c2 && c2 <= 0xBF)) - return error; - break; - case 0xF4: - if ((c2 & 0xF0) != 0x80) - return error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return error; - break; - } - if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) - return error; - if (to_end-to_nxt < 2) - return partial; - *to_nxt = static_cast( - 0xD800 - | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) - | ((c2 & 0x0F) << 2) - | ((c3 & 0x30) >> 4)); - *++to_nxt = static_cast( - 0xDC00 - | ((c3 & 0x0F) << 6) - | (c4 & 0x3F)); - frm_nxt += 4; - } - else - { - return error; - } - } - return frm_nxt < frm_end ? partial : ok; + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint16_t* _to = reinterpret_cast(to); + uint16_t* _to_end = reinterpret_cast(to_end); + uint16_t* _to_nxt = _to; + result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } codecvt::result @@ -1498,82 +2754,9 @@ int codecvt::do_length(state_type&, const extern_type* frm, const extern_type* frm_end, size_t mx) const { - const extern_type* frm_nxt = frm; - for (size_t nchar16_t = 0; frm_nxt < frm_end && nchar16_t < mx; ++nchar16_t) - { - unsigned char c1 = static_cast(*frm_nxt); - if (c1 < 0x80) - { - ++frm_nxt; - } - else if (c1 < 0xC2) - { - break; - } - else if (c1 < 0xE0) - { - if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xC0) != 0x80) - break; - frm_nxt += 2; - } - else if (c1 < 0xF0) - { - if (frm_end-frm_nxt < 3) - break; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - switch (c1) - { - case 0xE0: - if ((c2 & 0xE0) != 0xA0) - return static_cast(frm_nxt - frm); - break; - case 0xED: - if ((c2 & 0xE0) != 0x80) - return static_cast(frm_nxt - frm); - break; - default: - if ((c2 & 0xC0) != 0x80) - return static_cast(frm_nxt - frm); - break; - } - if ((c3 & 0xC0) != 0x80) - break; - frm_nxt += 3; - } - else if (c1 < 0xF5) - { - if (frm_end-frm_nxt < 4 || mx-nchar16_t < 2) - break; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - unsigned char c4 = frm_nxt[3]; - switch (c1) - { - case 0xF0: - if (!(0x90 <= c2 && c2 <= 0xBF)) - return static_cast(frm_nxt - frm); - break; - case 0xF4: - if ((c2 & 0xF0) != 0x80) - return static_cast(frm_nxt - frm); - break; - default: - if ((c2 & 0xC0) != 0x80) - return static_cast(frm_nxt - frm); - break; - } - if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) - break; - ++nchar16_t; - frm_nxt += 4; - } - else - { - break; - } - } - return static_cast(frm_nxt - frm); + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_utf16_length(_frm, _frm_end, mx); } int @@ -1595,43 +2778,16 @@ codecvt::do_out(state_type&, const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt) - { - intern_type wc = *frm_nxt; - if ((wc & 0xFFFFF800) == 0x00D800 || wc >= 0x110000) - return error; - if (wc < 0x000080) - { - if (to_end-to_nxt < 1) - return partial; - *to_nxt++ = static_cast(wc); - } - else if (wc < 0x000800) - { - if (to_end-to_nxt < 2) - return partial; - *to_nxt++ = static_cast(0xC0 | (wc >> 6)); - *to_nxt++ = static_cast(0x80 | (wc & 0x03F)); - } - else if (wc < 0x010000) - { - if (to_end-to_nxt < 3) - return partial; - *to_nxt++ = static_cast(0xE0 | (wc >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc & 0x0FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc & 0x003F)); - } - else // if (wc < 0x110000) - { - if (to_end-to_nxt < 4) - return partial; - *to_nxt++ = static_cast(0xF0 | (wc >> 18)); - *to_nxt++ = static_cast(0x80 | ((wc & 0x03F000) >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc & 0x000FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc & 0x00003F)); - } - } - return ok; + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } codecvt::result @@ -1639,93 +2795,16 @@ codecvt::do_in(state_type&, const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, intern_type* to, intern_type* to_end, intern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) - { - unsigned char c1 = static_cast(*frm_nxt); - if (c1 < 0x80) - { - *to_nxt = static_cast(c1); - ++frm_nxt; - } - else if (c1 < 0xC2) - { - return error; - } - else if (c1 < 0xE0) - { - if (frm_end-frm_nxt < 2) - return partial; - unsigned char c2 = frm_nxt[1]; - if ((c2 & 0xC0) != 0x80) - return error; - *to_nxt = static_cast(((c1 & 0x1F) << 6) - | (c2 & 0x3F)); - frm_nxt += 2; - } - else if (c1 < 0xF0) - { - if (frm_end-frm_nxt < 3) - return partial; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - switch (c1) - { - case 0xE0: - if ((c2 & 0xE0) != 0xA0) - return error; - break; - case 0xED: - if ((c2 & 0xE0) != 0x80) - return error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return error; - break; - } - if ((c3 & 0xC0) != 0x80) - return error; - *to_nxt = static_cast(((c1 & 0x0F) << 12) - | ((c2 & 0x3F) << 6) - | (c3 & 0x3F)); - frm_nxt += 3; - } - else if (c1 < 0xF5) - { - if (frm_end-frm_nxt < 4) - return partial; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - unsigned char c4 = frm_nxt[3]; - switch (c1) - { - case 0xF0: - if (!(0x90 <= c2 && c2 <= 0xBF)) - return error; - break; - case 0xF4: - if ((c2 & 0xF0) != 0x80) - return error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return error; - break; - } - if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) - return error; - *to_nxt = static_cast(((c1 & 0x07) << 18) - | ((c2 & 0x3F) << 12) - | ((c3 & 0x3F) << 6) - | (c4 & 0x3F)); - frm_nxt += 4; - } - else - { - return error; - } - } - return frm_nxt < frm_end ? partial : ok; + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } codecvt::result @@ -1752,81 +2831,9 @@ int codecvt::do_length(state_type&, const extern_type* frm, const extern_type* frm_end, size_t mx) const { - const extern_type* frm_nxt = frm; - for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) - { - unsigned char c1 = static_cast(*frm_nxt); - if (c1 < 0x80) - { - ++frm_nxt; - } - else if (c1 < 0xC2) - { - break; - } - else if (c1 < 0xE0) - { - if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80)) - break; - frm_nxt += 2; - } - else if (c1 < 0xF0) - { - if (frm_end-frm_nxt < 3) - break; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - switch (c1) - { - case 0xE0: - if ((c2 & 0xE0) != 0xA0) - return static_cast(frm_nxt - frm); - break; - case 0xED: - if ((c2 & 0xE0) != 0x80) - return static_cast(frm_nxt - frm); - break; - default: - if ((c2 & 0xC0) != 0x80) - return static_cast(frm_nxt - frm); - break; - } - if ((c3 & 0xC0) != 0x80) - break; - frm_nxt += 3; - } - else if (c1 < 0xF5) - { - if (frm_end-frm_nxt < 4) - break; - unsigned char c2 = frm_nxt[1]; - unsigned char c3 = frm_nxt[2]; - unsigned char c4 = frm_nxt[3]; - switch (c1) - { - case 0xF0: - if (!(0x90 <= c2 && c2 <= 0xBF)) - return static_cast(frm_nxt - frm); - break; - case 0xF4: - if ((c2 & 0xF0) != 0x80) - return static_cast(frm_nxt - frm); - break; - default: - if ((c2 & 0xC0) != 0x80) - return static_cast(frm_nxt - frm); - break; - } - if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) - break; - frm_nxt += 4; - } - else - { - break; - } - } - return static_cast(frm_nxt - frm); + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_ucs4_length(_frm, _frm_end, mx); } int @@ -1835,80 +2842,46 @@ codecvt::do_max_length() const throw() return 4; } -// template <> class codecvt +// __codecvt_utf8 -locale::id codecvt::id; - -codecvt::~codecvt() -{ -} - -codecvt::result -codecvt::do_out(state_type&, - const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, +__codecvt_utf8::result +__codecvt_utf8::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt) - { - intern_type wc = *frm_nxt; - if ((wc & 0xFFFFF800) == 0x00D800 || wc >= 0x110000) - return error; - if (wc < 0x010000) - { - if (to_end-to_nxt < 1) - return partial; - *to_nxt++ = static_cast(wc); - } - else - { - if (to_end-to_nxt < 2) - return partial; - *to_nxt++ = static_cast( - 0xD800 - | ((((wc & 0x1F0000) >> 16) - 1) << 6) - | ((wc & 0x00FC00) >> 10)); - *to_nxt++ = static_cast( - 0xDC00 - | (wc & 0x03FF)); - } - } - return ok; + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } -codecvt::result -codecvt::do_in(state_type&, - const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, +__codecvt_utf8::result +__codecvt_utf8::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, intern_type* to, intern_type* to_end, intern_type*& to_nxt) const { - for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) - { - extern_type c1 = *frm_nxt; - if ((c1 & 0xFC00) == 0xDC00) - return error; - if ((c1 & 0xFC00) != 0xD800) - { - *to_nxt = static_cast(c1); - ++frm_nxt; - } - else - { - if (frm_end-frm_nxt < 2) - return partial; - extern_type c2 = frm_nxt[1]; - if ((c2 & 0xFC00) != 0xDC00) - return error; - *to_nxt = static_cast( - ((((c1 & 0x03C0) >> 6) + 1) << 16) - | ((c1 & 0x003F) << 10) - | (c2 & 0x03FF)); - frm_nxt += 2; - } - } - return frm_nxt < frm_end ? partial : ok; + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; } -codecvt::result -codecvt::do_unshift(state_type&, +__codecvt_utf8::result +__codecvt_utf8::do_unshift(state_type&, extern_type* to, extern_type*, extern_type*& to_nxt) const { to_nxt = to; @@ -1916,47 +2889,859 @@ codecvt::do_unshift(state_type&, } int -codecvt::do_encoding() const throw() +__codecvt_utf8::do_encoding() const throw() { return 0; } bool -codecvt::do_always_noconv() const throw() +__codecvt_utf8::do_always_noconv() const throw() { return false; } int -codecvt::do_length(state_type&, +__codecvt_utf8::do_length(state_type&, const extern_type* frm, const extern_type* frm_end, size_t mx) const { - const extern_type* frm_nxt = frm; - for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) - { - extern_type c1 = *frm_nxt; - if ((c1 & 0xFC00) == 0xDC00) - break; - if ((c1 & 0xFC00) != 0xD800) - { - ++frm_nxt; - } - else - { - if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xFC00) != 0xDC00) - break; - frm_nxt += 2; - } - } - return static_cast(frm_nxt - frm); + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); } int -codecvt::do_max_length() const throw() +__codecvt_utf8::do_max_length() const throw() { + if (_Mode_ & consume_header) + return 7; + return 4; +} + +// __codecvt_utf8 + +__codecvt_utf8::result +__codecvt_utf8::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint16_t* _frm = reinterpret_cast(frm); + const uint16_t* _frm_end = reinterpret_cast(frm_end); + const uint16_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs2_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8::result +__codecvt_utf8::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint16_t* _to = reinterpret_cast(to); + uint16_t* _to_end = reinterpret_cast(to_end); + uint16_t* _to_nxt = _to; + result r = utf8_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8::result +__codecvt_utf8::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf8::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf8::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf8::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf8::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 6; + return 3; +} + +// __codecvt_utf8 + +__codecvt_utf8::result +__codecvt_utf8::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8::result +__codecvt_utf8::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8::result +__codecvt_utf8::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf8::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf8::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf8::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf8::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 7; + return 4; +} + +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 6; + return 4; +} + +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return true; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 6; + return 4; +} + +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint16_t* _frm = reinterpret_cast(frm); + const uint16_t* _frm_end = reinterpret_cast(frm_end); + const uint16_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint16_t* _to = reinterpret_cast(to); + uint16_t* _to_end = reinterpret_cast(to_end); + uint16_t* _to_nxt = _to; + result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 4; return 2; } +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint16_t* _frm = reinterpret_cast(frm); + const uint16_t* _frm_end = reinterpret_cast(frm_end); + const uint16_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint16_t* _to = reinterpret_cast(to); + uint16_t* _to_end = reinterpret_cast(to_end); + uint16_t* _to_nxt = _to; + result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return true; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 4; + return 2; +} + +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 6; + return 4; +} + +// __codecvt_utf16 + +__codecvt_utf16::result +__codecvt_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf16::result +__codecvt_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf16::do_always_noconv() const throw() +{ + return true; +} + +int +__codecvt_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 6; + return 4; +} + +// __codecvt_utf8_utf16 + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf8_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf8_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf8_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf8_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 7; + return 4; +} + +// __codecvt_utf8_utf16 + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint16_t* _frm = reinterpret_cast(frm); + const uint16_t* _frm_end = reinterpret_cast(frm_end); + const uint16_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint16_t* _to = reinterpret_cast(to); + uint16_t* _to_end = reinterpret_cast(to_end); + uint16_t* _to_nxt = _to; + result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf8_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf8_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf8_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf8_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 7; + return 4; +} + +// __codecvt_utf8_utf16 + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_out(state_type&, + const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, + extern_type* to, extern_type* to_end, extern_type*& to_nxt) const +{ + const uint32_t* _frm = reinterpret_cast(frm); + const uint32_t* _frm_end = reinterpret_cast(frm_end); + const uint32_t* _frm_nxt = _frm; + uint8_t* _to = reinterpret_cast(to); + uint8_t* _to_end = reinterpret_cast(to_end); + uint8_t* _to_nxt = _to; + result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_in(state_type&, + const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt, + intern_type* to, intern_type* to_end, intern_type*& to_nxt) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + const uint8_t* _frm_nxt = _frm; + uint32_t* _to = reinterpret_cast(to); + uint32_t* _to_end = reinterpret_cast(to_end); + uint32_t* _to_nxt = _to; + result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); + frm_nxt = frm + (_frm_nxt - _frm); + to_nxt = to + (_to_nxt - _to); + return r; +} + +__codecvt_utf8_utf16::result +__codecvt_utf8_utf16::do_unshift(state_type&, + extern_type* to, extern_type*, extern_type*& to_nxt) const +{ + to_nxt = to; + return noconv; +} + +int +__codecvt_utf8_utf16::do_encoding() const throw() +{ + return 0; +} + +bool +__codecvt_utf8_utf16::do_always_noconv() const throw() +{ + return false; +} + +int +__codecvt_utf8_utf16::do_length(state_type&, + const extern_type* frm, const extern_type* frm_end, size_t mx) const +{ + const uint8_t* _frm = reinterpret_cast(frm); + const uint8_t* _frm_end = reinterpret_cast(frm_end); + return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_); +} + +int +__codecvt_utf8_utf16::do_max_length() const throw() +{ + if (_Mode_ & consume_header) + return 7; + return 4; +} + // __narrow_to_utf8<16> __narrow_to_utf8<16>::~__narrow_to_utf8() diff --git a/test/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp b/test/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp index 33fd85b0..3e98cf55 100644 --- a/test/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp +++ b/test/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp @@ -16,6 +16,7 @@ // sanity check #include +#include #include #include @@ -24,11 +25,11 @@ int main() { typedef std::codecvt F32_8; typedef std::codecvt F16_8; - typedef std::codecvt F32_16; + typedef std::codecvt_utf16 F32_16; std::locale l = std::locale(std::locale::classic(), new F32_16); - const F32_8& f32_8 = std::use_facet(l); + const F32_8& f32_8 = std::use_facet(std::locale::classic()); const F32_16& f32_16 = std::use_facet(l); - const F16_8& f16_8 = std::use_facet(l); + const F16_8& f16_8 = std::use_facet(std::locale::classic()); std::mbstate_t mbs = {0}; F32_8::intern_type* c32p; F16_8::intern_type* c16p; @@ -38,23 +39,27 @@ int main() const F32_8::extern_type* c_c8p; F32_8::intern_type c32; F16_8::intern_type c16[2]; + char c16c[4]; + char* c16cp; F32_8::extern_type c8[4]; for (F32_8::intern_type c32x = 0; c32x < 0x110003; ++c32x) { if (0xD800 <= c32x && c32x < 0xE000 || c32x >= 0x110000) { - assert(f32_16.out(mbs, &c32x, &c32x+1, c_c32p, c16+0, c16+2, c16p) == F32_8::error); + assert(f32_16.out(mbs, &c32x, &c32x+1, c_c32p, c16c+0, c16c+4, c16cp) == F32_8::error); assert(f32_8.out(mbs, &c32x, &c32x+1, c_c32p, c8, c8+4, c8p) == F32_8::error); } else { - assert(f32_16.out(mbs, &c32x, &c32x+1, c_c32p, c16, c16+2, c16p) == F32_8::ok); + assert(f32_16.out(mbs, &c32x, &c32x+1, c_c32p, c16c, c16c+4, c16cp) == F32_8::ok); assert(c_c32p-&c32x == 1); if (c32x < 0x10000) - assert(c16p-c16 == 1); + assert(c16cp-c16c == 2); else - assert(c16p-c16 == 2); - c_c16p = c16p; + assert(c16cp-c16c == 4); + for (int i = 0; i < (c16cp - c16c) / 2; ++i) + c16[i] = (unsigned char)c16c[2*i] << 8 | (unsigned char)c16c[2*i+1]; + c_c16p = c16 + (c16cp - c16c) / 2; assert(f16_8.out(mbs, c16, c_c16p, c_c16p, c8, c8+4, c8p) == F32_8::ok); if (c32x < 0x10000) assert(c_c16p-c16 == 1); @@ -104,12 +109,17 @@ int main() assert(c16p-c16 == 1); else assert(c16p-c16 == 2); - c_c16p = c16p; - assert(f32_16.in(mbs, c16, c_c16p, c_c16p, &c32, &c32+1, c32p) == F32_8::ok); + for (int i = 0; i < c16p-c16; ++i) + { + c16c[2*i] = static_cast(c16[i] >> 8); + c16c[2*i+1] = static_cast(c16[i]); + } + const char* c_c16cp = c16c + (c16p-c16)*2; + assert(f32_16.in(mbs, c16c, c_c16cp, c_c16cp, &c32, &c32+1, c32p) == F32_8::ok); if (c32x < 0x10000) - assert(c_c16p-c16 == 1); + assert(c_c16cp-c16c == 2); else - assert(c_c16p-c16 == 2); + assert(c_c16cp-c16c == 4); assert(c32p-&c32 == 1); assert(c32 == c32x); } diff --git a/test/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp b/test/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp index 8aa99b00..101bceb2 100644 --- a/test/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp +++ b/test/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp @@ -33,7 +33,7 @@ int main() const my_facet f(1); char str[200]; output_iterator iter; - tm t; + tm t = {0}; t.tm_sec = 6; t.tm_min = 3; t.tm_hour = 13; diff --git a/test/localization/locale.stdcvt/codecvt_mode.pass.cpp b/test/localization/locale.stdcvt/codecvt_mode.pass.cpp new file mode 100644 index 00000000..92043a8a --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_mode.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// enum codecvt_mode +// { +// consume_header = 4, +// generate_header = 2, +// little_endian = 1 +// }; + +#include +#include + +int main() +{ + assert(std::consume_header == 4); + assert(std::generate_header == 2); + assert(std::little_endian == 1); + std::codecvt_mode e = std::consume_header; + assert(e == 4); +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16.pass.cpp new file mode 100644 index 00000000..25998952 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// Not a portable test + +#include +#include + +int outstanding_news = 0; + +void* operator new(std::size_t s) throw(std::bad_alloc) +{ + ++outstanding_news; + return std::malloc(s); +} + +void operator delete(void* p) throw() +{ + if (p) + { + --outstanding_news; + std::free(p); + } +} + +int main() +{ + assert(outstanding_news == 0); + { + typedef std::codecvt_utf16 C; + C c; + assert(outstanding_news == 0); + } + { + typedef std::codecvt_utf16 C; + std::locale loc(std::locale::classic(), new C); + assert(outstanding_news != 0); + } + assert(outstanding_news == 0); +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp new file mode 100644 index 00000000..8d9c197e --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// bool always_noconv() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp new file mode 100644 index 00000000..a95f78fe --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int encoding() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp new file mode 100644 index 00000000..0572b80e --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp @@ -0,0 +1,739 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// in(stateT& state, +// const externT* from, const externT* from_end, const externT*& from_next, +// internT* to, internT* to_end, internT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(w == 0x40003); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(w == 0x40003); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(w == 0x40003); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char32_t w = 0; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(w == 0x40003); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n+2); + assert(w == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x00); + n[1] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } + { + typedef std::codecvt_utf16 C; + C c; + char16_t w = 0; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n+2); + assert(w == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x1005); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[1] = char(0x00); + n[0] = char(0x56); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x56); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp new file mode 100644 index 00000000..0154ec71 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp @@ -0,0 +1,449 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int length(stateT& state, const externT* from, const externT* from_end, +// size_t max) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 6); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 6); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 6); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 6); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFE, 0xFF, 0xD8, 0xC0, 0xDC, 0x03}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 2); + + n[0] = char(0x10); + n[1] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x04); + n[1] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x00); + n[1] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 0); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[6] = {0xFF, 0xFE, 0xC0, 0xD8, 0x03, 0xDC}; + std::mbstate_t m; + int r = c.length(m, n, n+6, 2); + assert(r == 2); + + n[1] = char(0x10); + n[0] = char(0x05); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x04); + n[0] = char(0x53); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[1] = char(0x00); + n[0] = char(0x56); + r = c.length(m, n, n+2, 2); + assert(r == 2); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp new file mode 100644 index 00000000..35fccc70 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int max_length() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 6); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 2); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf16 C; + C c; + int r = c.max_length(); + assert(r == 6); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp new file mode 100644 index 00000000..e745b5ff --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp @@ -0,0 +1,331 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// out(stateT& state, +// const internT* from, const internT* from_end, const internT*& from_next, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xD8)); + assert(n[1] == char(0xC0)); + assert(n[2] == char(0xDC)); + assert(n[3] == char(0x03)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0x10)); + assert(n[1] == char(0x05)); + assert(n[2] == char(0xDC)); + assert(n[3] == char(0x03)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0x04)); + assert(n[1] == char(0x53)); + assert(n[2] == char(0xDC)); + assert(n[3] == char(0x03)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0x00)); + assert(n[1] == char(0x56)); + assert(n[2] == char(0xDC)); + assert(n[3] == char(0x03)); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0x04)); + assert(n[1] == char(0x53)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0x00)); + assert(n[1] == char(0x56)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[6] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(n[0] == char(0xFE)); + assert(n[1] == char(0xFF)); + assert(n[2] == char(0xD8)); + assert(n[3] == char(0xC0)); + assert(n[4] == char(0xDC)); + assert(n[5] == char(0x03)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xFE)); + assert(n[1] == char(0xFF)); + assert(n[2] == char(0x10)); + assert(n[3] == char(0x05)); + assert(n[4] == char(0xDC)); + assert(n[5] == char(0x03)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xFE)); + assert(n[1] == char(0xFF)); + assert(n[2] == char(0x04)); + assert(n[3] == char(0x53)); + assert(n[4] == char(0xDC)); + assert(n[5] == char(0x03)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xFE)); + assert(n[1] == char(0xFF)); + assert(n[2] == char(0x00)); + assert(n[3] == char(0x56)); + assert(n[4] == char(0xDC)); + assert(n[5] == char(0x03)); + } + + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[1] == char(0xD8)); + assert(n[0] == char(0xC0)); + assert(n[3] == char(0xDC)); + assert(n[2] == char(0x03)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[1] == char(0x10)); + assert(n[0] == char(0x05)); + assert(n[3] == char(0xDC)); + assert(n[2] == char(0x03)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[1] == char(0x04)); + assert(n[0] == char(0x53)); + assert(n[3] == char(0xDC)); + assert(n[2] == char(0x03)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[1] == char(0x00)); + assert(n[0] == char(0x56)); + assert(n[3] == char(0xDC)); + assert(n[2] == char(0x03)); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[1] == char(0)); + assert(n[0] == char(0)); + assert(n[3] == char(0)); + assert(n[2] == char(0)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[1] == char(0)); + assert(n[0] == char(0)); + assert(n[3] == char(0)); + assert(n[2] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[1] == char(0x04)); + assert(n[0] == char(0x53)); + assert(n[3] == char(0)); + assert(n[2] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[1] == char(0x00)); + assert(n[0] == char(0x56)); + assert(n[3] == char(0)); + assert(n[2] == char(0)); + } + { + typedef std::codecvt_utf16 C; + C c; + wchar_t w = 0x40003; + char n[6] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(n[1] == char(0xFE)); + assert(n[0] == char(0xFF)); + assert(n[3] == char(0xD8)); + assert(n[2] == char(0xC0)); + assert(n[5] == char(0xDC)); + assert(n[4] == char(0x03)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[1] == char(0xFE)); + assert(n[0] == char(0xFF)); + assert(n[3] == char(0x10)); + assert(n[2] == char(0x05)); + assert(n[5] == char(0xDC)); + assert(n[4] == char(0x03)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[1] == char(0xFE)); + assert(n[0] == char(0xFF)); + assert(n[3] == char(0x04)); + assert(n[2] == char(0x53)); + assert(n[5] == char(0xDC)); + assert(n[4] == char(0x03)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+6, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[1] == char(0xFE)); + assert(n[0] == char(0xFF)); + assert(n[3] == char(0x00)); + assert(n[2] == char(0x56)); + assert(n[5] == char(0xDC)); + assert(n[4] == char(0x03)); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp new file mode 100644 index 00000000..61ab4648 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// unshift(stateT& state, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8.pass.cpp new file mode 100644 index 00000000..d4f960aa --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// Not a portable test + +#include +#include + +int outstanding_news = 0; + +void* operator new(std::size_t s) throw(std::bad_alloc) +{ + ++outstanding_news; + return std::malloc(s); +} + +void operator delete(void* p) throw() +{ + if (p) + { + --outstanding_news; + std::free(p); + } +} + +int main() +{ + assert(outstanding_news == 0); + { + typedef std::codecvt_utf8 C; + C c; + assert(outstanding_news == 0); + } + { + typedef std::codecvt_utf8 C; + std::locale loc(std::locale::classic(), new C); + assert(outstanding_news != 0); + } + assert(outstanding_news == 0); +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp new file mode 100644 index 00000000..94d2e8f3 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// bool always_noconv() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf8 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf8 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp new file mode 100644 index 00000000..5ee22e3d --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// int encoding() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.encoding(); + assert(r == 0); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp new file mode 100644 index 00000000..8dd453ac --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp @@ -0,0 +1,360 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// result +// in(stateT& state, +// const externT* from, const externT* from_end, const externT*& from_next, +// internT* to, internT* to_end, internT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(w == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+7, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+7); + assert(w == 0x40003); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(w == 0x1005); + + n[0] = char(0xEF); + n[1] = char(0xBB); + n[2] = char(0xBF); + n[3] = char(0xD1); + n[4] = char(0x93); + r = c.in(m, n, n+5, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+5); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(w == 0x40003); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(w == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+7, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+7); + assert(w == 0x40003); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(w == 0x1005); + + n[0] = char(0xEF); + n[1] = char(0xBB); + n[2] = char(0xBF); + n[3] = char(0xD1); + n[4] = char(0x93); + r = c.in(m, n, n+5, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+5); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0; + char n[3] = {0xE1, 0x80, 0x85}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(w == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0; + char n[3] = {0xE1, 0x80, 0x85}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+3, np, &w, &w+1, wp); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(w == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0; + char n[6] = {0xEF, 0xBB, 0xBF, 0xE1, 0x80, 0x85}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(w == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(w == 0x453); + + w = 0x56; + n[0] = char(0x56); + r = c.in(m, n, n+1, np, &w, &w+1, wp); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(w == 0x56); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp new file mode 100644 index 00000000..f06caed2 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp @@ -0,0 +1,244 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// int length(stateT& state, const externT* from, const externT* from_end, +// size_t max) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 4); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 1); + assert(r == 7); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xEF); + n[1] = char(0xBB); + n[2] = char(0xBF); + n[3] = char(0xD1); + n[4] = char(0x93); + r = c.length(m, n, n+5, 3); + assert(r == 5); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 4); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 1); + assert(r == 7); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xEF); + n[1] = char(0xBB); + n[2] = char(0xBF); + n[3] = char(0xD1); + n[4] = char(0x93); + r = c.length(m, n, n+5, 3); + assert(r == 5); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 1); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 3); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 1); + assert(r == 3); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xEF); + n[1] = char(0xBB); + n[2] = char(0xBF); + n[3] = char(0xD1); + n[4] = char(0x93); + r = c.length(m, n, n+5, 3); + assert(r == 5); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 3); + assert(r == 1); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp new file mode 100644 index 00000000..a7a47d08 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// int max_length() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 7); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 3); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 6); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf8 C; + C c; + int r = c.max_length(); + assert(r == 7); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp new file mode 100644 index 00000000..5ed05747 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp @@ -0,0 +1,456 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// result +// out(stateT& state, +// const internT* from, const internT* from_end, const internT*& from_next, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xF1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x80)); + assert(n[3] == char(0x83)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + } + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0x40003; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + } + { + typedef std::codecvt_utf8 C; + C c; + wchar_t w = 0x40003; + char n[7] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+7); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xF1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x80)); + assert(n[6] == char(0x83)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0x40003; + char n[4] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xF1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x80)); + assert(n[3] == char(0x83)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0x83)); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0x40003; + char n[4] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + } + { + typedef std::codecvt_utf8 C; + C c; + char32_t w = 0x40003; + char n[7] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+7); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xF1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x80)); + assert(n[6] == char(0x83)); + + w = 0x1005; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0x83)); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0x1005; + char n[4] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0x85)); + assert(n[3] == char(0)); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0x1005; + char n[4] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == &w); + assert(np == n); + assert(n[0] == char(0)); + assert(n[1] == char(0)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + assert(n[1] == char(0x93)); + assert(n[2] == char(0)); + assert(n[3] == char(0)); + } + { + typedef std::codecvt_utf8 C; + C c; + char16_t w = 0x1005; + char n[7] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0)); + + w = 0x453; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0)); + + w = 0x56; + r = c.out(m, &w, &w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == &w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + assert(n[4] == char(0x93)); + assert(n[5] == char(0x85)); + assert(n[6] == char(0)); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp new file mode 100644 index 00000000..054c1a71 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8 +// : public codecvt +// { +// // unspecified +// }; + +// result +// unshift(stateT& state, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf8 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp new file mode 100644 index 00000000..dfb9923f --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// bool always_noconv() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + bool r = c.always_noconv(); + assert(r == false); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp new file mode 100644 index 00000000..f16e30e5 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int encoding() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.encoding(); + assert(r == 0); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp new file mode 100644 index 00000000..4427ece2 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp @@ -0,0 +1,372 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// in(stateT& state, +// const externT* from, const externT* from_end, const externT*& from_next, +// internT* to, internT* to_end, internT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0}; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + wchar_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0}; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + char32_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0}; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0}; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + char16_t* wp = nullptr; + std::mbstate_t m; + const char* np = nullptr; + std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(w[0] == 0xD8C0); + assert(w[1] == 0xDC03); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.in(m, n, n+3, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(w[0] == 0x1005); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.in(m, n, n+2, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(w[0] == 0x0453); + + n[0] = char(0x56); + r = c.in(m, n, n+1, np, w, w+2, wp); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(w[0] == 0x0056); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp new file mode 100644 index 00000000..2d93657f --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp @@ -0,0 +1,235 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int length(stateT& state, const externT* from, const externT* from_end, +// size_t max) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 2); + assert(r == 7); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 2); + assert(r == 7); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 4); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+4, 2); + assert(r == 0); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 0); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[7] = {0xEF, 0xBB, 0xBF, 0xF1, 0x80, 0x80, 0x83}; + std::mbstate_t m; + int r = c.length(m, n, n+7, 2); + assert(r == 7); + + n[0] = char(0xE1); + n[1] = char(0x80); + n[2] = char(0x85); + r = c.length(m, n, n+3, 2); + assert(r == 3); + + n[0] = char(0xD1); + n[1] = char(0x93); + r = c.length(m, n, n+2, 2); + assert(r == 2); + + n[0] = char(0x56); + r = c.length(m, n, n+1, 2); + assert(r == 1); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp new file mode 100644 index 00000000..b869a285 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// int max_length() const throw(); + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 7); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 7); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 4); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + int r = c.max_length(); + assert(r == 7); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp new file mode 100644 index 00000000..cb221410 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp @@ -0,0 +1,415 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// out(stateT& state, +// const internT* from, const internT* from_end, const internT*& from_next, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(n[0] == char(0xF1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x80)); + assert(n[3] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + wchar_t w[2] = {0xD8C0, 0xDC03}; + char n[7] = {0}; + const wchar_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xF1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x80)); + assert(n[6] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(n[0] == char(0xF1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x80)); + assert(n[3] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char32_t w[2] = {0xD8C0, 0xDC03}; + char n[7] = {0}; + const char32_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xF1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x80)); + assert(n[6] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + } + + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+4); + assert(n[0] == char(0xF1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x80)); + assert(n[3] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+3); + assert(n[0] == char(0xE1)); + assert(n[1] == char(0x80)); + assert(n[2] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0xD8C0, 0xDC03}; + char n[4] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::error); + assert(wp == w); + assert(np == n); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+2); + assert(n[0] == char(0xD1)); + assert(n[1] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+4, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+1); + assert(n[0] == char(0x56)); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char16_t w[2] = {0xD8C0, 0xDC03}; + char n[7] = {0}; + const char16_t* wp = nullptr; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+2); + assert(np == n+7); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xF1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x80)); + assert(n[6] == char(0x83)); + + w[0] = 0x1005; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+6); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xE1)); + assert(n[4] == char(0x80)); + assert(n[5] == char(0x85)); + + w[0] = 0x453; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+5); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0xD1)); + assert(n[4] == char(0x93)); + + w[0] = 0x56; + r = c.out(m, w, w+1, wp, n, n+7, np); + assert(r == std::codecvt_base::ok); + assert(wp == w+1); + assert(np == n+4); + assert(n[0] == char(0xEF)); + assert(n[1] == char(0xBB)); + assert(n[2] == char(0xBF)); + assert(n[3] == char(0x56)); + } +} diff --git a/test/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp b/test/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp new file mode 100644 index 00000000..dc9e6557 --- /dev/null +++ b/test/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// class codecvt_utf8_utf16 +// : public codecvt +// { +// // unspecified +// }; + +// result +// unshift(stateT& state, +// externT* to, externT* to_end, externT*& to_next) const; + +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } + { + typedef std::codecvt_utf8_utf16 C; + C c; + char n[4] = {0}; + std::mbstate_t m; + char* np = nullptr; + std::codecvt_base::result r = c.unshift(m, n, n+4, np); + assert(r == std::codecvt_base::noconv); + } +} diff --git a/test/localization/locale.stdcvt/version.pass.cpp b/test/localization/locale.stdcvt/version.pass.cpp new file mode 100644 index 00000000..916d9780 --- /dev/null +++ b/test/localization/locale.stdcvt/version.pass.cpp @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +#include + +#ifndef _LIBCPP_VERSION +#error _LIBCPP_VERSION not defined +#endif + +int main() +{ +}