[conversions.string]

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@105254 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Howard Hinnant 2010-05-31 20:58:54 +00:00
parent 87d1a8a4d8
commit d23b464e21
10 changed files with 627 additions and 5 deletions

View File

@ -81,9 +81,53 @@ template <class charT> bool isalnum (charT c, const locale& loc);
template <class charT> bool isgraph (charT c, const locale& loc);
template <class charT> charT toupper(charT c, const locale& loc);
template <class charT> charT tolower(charT c, const locale& loc);
template <class Codecvt, class Elem = wchar_t> class wstring_convert;
template<class Codecvt, class Elem = wchar_t,
class Wide_alloc = allocator<Elem>,
class Byte_alloc = allocator<char>>
class wstring_convert
{
public:
typedef basic_string<char, char_traits<char>, Byte_alloc> byte_string;
typedef basic_string<Elem, char_traits<Elem>, Wide_alloc> wide_string;
typedef typename Codecvt::state_type state_type;
typedef typename wide_string::traits_type::int_type int_type;
wstring_convert(Codecvt* pcvt = new Codecvt);
wstring_convert(Codecvt* pcvt, state_type state);
wstring_convert(const byte_string& byte_err,
const wide_string& wide_err = wide_string());
~wstring_convert();
wide_string from_bytes(char byte);
wide_string from_bytes(const char* ptr);
wide_string from_bytes(const byte_string& str);
wide_string from_bytes(const char* first, const char* last);
byte_string to_bytes(Elem wchar);
byte_string to_bytes(const Elem* wptr);
byte_string to_bytes(const wide_string& wstr);
byte_string to_bytes(const Elem* first, const Elem* last);
size_t converted() const;
state_type state() const;
};
template <class Codecvt, class Elem = wchar_t, class Tr = char_traits<Elem>>
class wbuffer_convert;
class wbuffer_convert
: public basic_streambuf<Elem, Tr>
{
public:
typedef typename Tr::state_type state_type;
wbuffer_convert(streambuf* bytebuf = 0, Codecvt* pcvt = new Codecvt,
state_type state = state_type());
streambuf* rdbuf() const;
streambuf* rdbuf(streambuf* bytebuf);
state_type state() const;
};
// 22.4.1 and 22.4.1.3, ctype:
class ctype_base;
@ -3496,6 +3540,292 @@ protected:
extern template class messages_byname<char>;
extern template class messages_byname<wchar_t>;
template<class _Codecvt, class _Elem = wchar_t,
class _Wide_alloc = allocator<_Elem>,
class _Byte_alloc = allocator<char> >
class wstring_convert
{
public:
typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
typedef typename _Codecvt::state_type state_type;
typedef typename wide_string::traits_type::int_type int_type;
private:
byte_string __byte_err_string_;
wide_string __wide_err_string_;
_Codecvt* __cvtptr_;
state_type __cvtstate_;
size_t __cvtcount_;
wstring_convert(const wstring_convert& __wc);
wstring_convert& operator=(const wstring_convert& __wc);
public:
wstring_convert(_Codecvt* __pcvt = new _Codecvt);
wstring_convert(_Codecvt* __pcvt, state_type __state);
wstring_convert(const byte_string& __byte_err,
const wide_string& __wide_err = wide_string());
#ifdef _LIBCPP_MOVE
wstring_convert(wstring_convert&& __wc);
#endif
~wstring_convert();
wide_string from_bytes(char __byte)
{return from_bytes(&__byte, &__byte+1);}
wide_string from_bytes(const char* __ptr)
{return from_bytes(__ptr, __ptr + char_traits<char>::length(__ptr));}
wide_string from_bytes(const byte_string& __str)
{return from_bytes(__str.data(), __str.data() + __str.size());}
wide_string from_bytes(const char* __first, const char* __last);
byte_string to_bytes(_Elem __wchar)
{return to_bytes(&__wchar, &__wchar+1);}
byte_string to_bytes(const _Elem* __wptr)
{return to_bytes(__wptr, __wptr + char_traits<_Elem>::length(__wptr));}
byte_string to_bytes(const wide_string& __wstr)
{return to_bytes(__wstr.data(), __wstr.data() + __wstr.size());}
byte_string to_bytes(const _Elem* __first, const _Elem* __last);
size_t converted() const {return __cvtcount_;}
state_type state() const {return __cvtstate_;}
};
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
inline
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
wstring_convert(_Codecvt* __pcvt)
: __cvtptr_(__pcvt), __cvtstate_(), __cvtcount_(0)
{
}
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
inline
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
wstring_convert(_Codecvt* __pcvt, state_type __state)
: __cvtptr_(__pcvt), __cvtstate_(__state), __cvtcount_(0)
{
}
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
wstring_convert(const byte_string& __byte_err, const wide_string& __wide_err)
: __byte_err_string_(__byte_err), __wide_err_string_(__wide_err),
__cvtstate_(), __cvtcount_(0)
{
__cvtptr_ = new _Codecvt;
}
#ifdef _LIBCPP_MOVE
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
inline
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
wstring_convert(wstring_convert&& __wc)
: __byte_err_string_(_STD::move(__wc.__byte_err_string_)),
__wide_err_string_(_STD::move(__wc.__wide_err_string_)),
__cvtptr_(__wc.__cvtptr_),
__cvtstate_(__wc.__cvtstate_), __cvtcount_(__wc.__cvtstate_)
{
__wc.__cvtptr_ = nullptr;
}
#endif
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::~wstring_convert()
{
delete __cvtptr_;
}
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
typename wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::wide_string
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
from_bytes(const char* __frm, const char* __frm_end)
{
__cvtcount_ = 0;
if (__cvtptr_ != nullptr)
{
wide_string __ws(2*(__frm_end - __frm), _Elem());
__ws.resize(__ws.capacity());
codecvt_base::result __r = codecvt_base::ok;
state_type __st = __cvtstate_;
if (__frm != __frm_end)
{
_Elem* __to = &__ws[0];
_Elem* __to_end = __to + __ws.size();
const char* __frm_nxt;
do
{
_Elem* __to_nxt;
__r = __cvtptr_->in(__st, __frm, __frm_end, __frm_nxt,
__to, __to_end, __to_nxt);
__cvtcount_ += __frm_nxt - __frm;
if (__frm_nxt == __frm)
{
__r = codecvt_base::error;
}
else if (__r == codecvt_base::noconv)
{
__ws.resize(__to - &__ws[0]);
// This only gets executed if _Elem is char
__ws.append((const _Elem*)__frm, (const _Elem*)__frm_end);
__frm = __frm_nxt;
__r = codecvt_base::ok;
}
else if (__r == codecvt_base::ok)
{
__ws.resize(__to_nxt - &__ws[0]);
__frm = __frm_nxt;
}
else if (__r == codecvt_base::partial)
{
ptrdiff_t __s = __to_nxt - &__ws[0];
__ws.resize(2 * __s);
__to = &__ws[0] + __s;
__to_end = &__ws[0] + __ws.size();
__frm = __frm_nxt;
}
} while (__r == codecvt_base::partial && __frm_nxt < __frm_end);
}
if (__r == codecvt_base::ok)
return __ws;
}
if (__wide_err_string_.empty())
throw range_error("wstring_convert: from_bytes error");
return __wide_err_string_;
}
template<class _Codecvt, class _Elem, class _Wide_alloc, class _Byte_alloc>
typename wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::byte_string
wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::
to_bytes(const _Elem* __frm, const _Elem* __frm_end)
{
__cvtcount_ = 0;
if (__cvtptr_ != nullptr)
{
byte_string __bs(2*(__frm_end - __frm), char());
__bs.resize(__bs.capacity());
codecvt_base::result __r = codecvt_base::ok;
state_type __st = __cvtstate_;
if (__frm != __frm_end)
{
char* __to = &__bs[0];
char* __to_end = __to + __bs.size();
const _Elem* __frm_nxt;
do
{
char* __to_nxt;
__r = __cvtptr_->out(__st, __frm, __frm_end, __frm_nxt,
__to, __to_end, __to_nxt);
__cvtcount_ += __frm_nxt - __frm;
if (__frm_nxt == __frm)
{
__r = codecvt_base::error;
}
else if (__r == codecvt_base::noconv)
{
__bs.resize(__to - &__bs[0]);
// This only gets executed if _Elem is char
__bs.append((const char*)__frm, (const char*)__frm_end);
__frm = __frm_nxt;
__r = codecvt_base::ok;
}
else if (__r == codecvt_base::ok)
{
__bs.resize(__to_nxt - &__bs[0]);
__frm = __frm_nxt;
}
else if (__r == codecvt_base::partial)
{
ptrdiff_t __s = __to_nxt - &__bs[0];
__bs.resize(2 * __s);
__to = &__bs[0] + __s;
__to_end = &__bs[0] + __bs.size();
__frm = __frm_nxt;
}
} while (__r == codecvt_base::partial && __frm_nxt < __frm_end);
}
if (__r == codecvt_base::ok)
{
size_t __s = __bs.size();
__bs.resize(__bs.capacity());
char* __to = &__bs[0] + __s;
char* __to_end = __to + __bs.size();
do
{
char* __to_nxt;
__r = __cvtptr_->unshift(__st, __to, __to_end, __to_nxt);
if (__r == codecvt_base::noconv)
{
__bs.resize(__to - &__bs[0]);
__r = codecvt_base::ok;
}
else if (__r == codecvt_base::ok)
{
__bs.resize(__to_nxt - &__bs[0]);
}
else if (__r == codecvt_base::partial)
{
ptrdiff_t __s = __to_nxt - &__bs[0];
__bs.resize(2 * __s);
__to = &__bs[0] + __s;
__to_end = &__bs[0] + __bs.size();
}
} while (__r == codecvt_base::partial);
if (__r == codecvt_base::ok)
return __bs;
}
}
if (__byte_err_string_.empty())
throw range_error("wstring_convert: to_bytes error");
return __byte_err_string_;
}
template <class _Codecvt, class _Elem = wchar_t, class _Tr = char_traits<_Elem> >
class wbuffer_convert
: public basic_streambuf<_Elem, _Tr>
{
public:
// types:
typedef _Elem char_type;
typedef _Tr traits_type;
typedef typename traits_type::int_type int_type;
typedef typename traits_type::pos_type pos_type;
typedef typename traits_type::off_type off_type;
typedef typename _Codecvt::state_type state_type;
private:
streambuf* __bufptr_;
_Codecvt* __cvtptr_;
state_type __cvtstate_;
public:
wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
state_type __state = state_type())
: __bufptr_(__bytebuf), __cvtptr_(__pcvt), __cvtstate_(__state) {}
~wbuffer_convert() {delete __cvtptr_;}
streambuf* rdbuf() const {return __bufptr_;}
streambuf* rdbuf(streambuf* __bytebuf)
{
streambuf* __r = __bufptr_;
__bufptr_ = __bytebuf;
return __r;
}
state_type state() const {return __cvtstate_;}
protected:
virtual int_type overflow (int_type __c = traits_type::eof());
};
template <class _Codecvt, class _Elem, class _Tr>
typename wbuffer_convert<_Codecvt, _Elem, _Tr>::int_type
wbuffer_convert<_Codecvt, _Elem, _Tr>::overflow(int_type __c)
{
}
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_LOCALE

View File

@ -3197,7 +3197,7 @@ __codecvt_utf16<wchar_t, true>::do_encoding() const throw()
bool
__codecvt_utf16<wchar_t, true>::do_always_noconv() const throw()
{
return true;
return false;
}
int
@ -3347,7 +3347,7 @@ __codecvt_utf16<char16_t, true>::do_encoding() const throw()
bool
__codecvt_utf16<char16_t, true>::do_always_noconv() const throw()
{
return true;
return false;
}
int
@ -3497,7 +3497,7 @@ __codecvt_utf16<char32_t, true>::do_encoding() const throw()
bool
__codecvt_utf16<char32_t, true>::do_always_noconv() const throw()
{
return true;
return false;
}
int

View File

@ -0,0 +1,32 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// size_t converted() const;
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
Myconv myconv;
assert(myconv.converted() == 0);
std::string bs = myconv.to_bytes(L"\x40003");
assert(myconv.converted() == 1);
bs = myconv.to_bytes(L"\x40003\x65");
assert(myconv.converted() == 2);
std::wstring ws = myconv.from_bytes("\xF1\x80\x80\x83");
assert(myconv.converted() == 4);
}

View File

@ -0,0 +1,34 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// wstring_convert(Codecvt* pcvt = new Codecvt);
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
Myconv myconv;
assert(myconv.converted() == 0);
}
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
Myconv myconv(new Codecvt);
assert(myconv.converted() == 0);
}
}

View File

@ -0,0 +1,28 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// wstring_convert(Codecvt* pcvt, state_type state);
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
Myconv myconv(new Codecvt, std::mbstate_t());
assert(myconv.converted() == 0);
}
}

View File

@ -0,0 +1,64 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// wstring_convert(const byte_string& byte_err,
// const wide_string& wide_err = wide_string());
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
{
Myconv myconv;
try
{
myconv.to_bytes(L"\xDA83");
assert(false);
}
catch (const std::range_error&)
{
}
try
{
myconv.from_bytes('\xA5');
assert(false);
}
catch (const std::range_error&)
{
}
}
{
Myconv myconv("byte error");
std::string bs = myconv.to_bytes(L"\xDA83");
assert(bs == "byte error");
try
{
myconv.from_bytes('\xA5');
assert(false);
}
catch (const std::range_error&)
{
}
}
{
Myconv myconv("byte error", L"wide error");
std::string bs = myconv.to_bytes(L"\xDA83");
assert(bs == "byte error");
std::wstring ws = myconv.from_bytes('\xA5');
assert(ws == L"wide error");
}
}

View File

@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// wide_string from_bytes(char byte);
// wide_string from_bytes(const char* ptr);
// wide_string from_bytes(const byte_string& str);
// wide_string from_bytes(const char* first, const char* last);
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
{
std::wstring_convert<std::codecvt_utf8<wchar_t> > myconv;
std::string bs("\xF1\x80\x80\x83");
std::wstring ws = myconv.from_bytes('a');
assert(ws == L"a");
ws = myconv.from_bytes(bs.c_str());
assert(ws == L"\x40003");
ws = myconv.from_bytes(bs);
assert(ws == L"\x40003");
ws = myconv.from_bytes(bs.data(), bs.data() + bs.size());
assert(ws == L"\x40003");
}
}

View File

@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// state_type state() const;
#include <locale>
#include <codecvt>
int main()
{
typedef std::codecvt_utf8<wchar_t> Codecvt;
typedef std::wstring_convert<Codecvt> Myconv;
Myconv myconv;
std::mbstate_t s = myconv.state();
}

View File

@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// wstring_convert<Codecvt, Elem, Wide_alloc, Byte_alloc>
// byte_string to_bytes(Elem wchar);
// byte_string to_bytes(const Elem* wptr);
// byte_string to_bytes(const wide_string& wstr);
// byte_string to_bytes(const Elem* first, const Elem* last);
#include <locale>
#include <codecvt>
#include <cassert>
int main()
{
{
std::wstring_convert<std::codecvt_utf8<wchar_t> > myconv;
std::wstring ws(1, L'\x40003');
std::string bs = myconv.to_bytes(ws[0]);
assert(bs == "\xF1\x80\x80\x83");
bs = myconv.to_bytes(ws.c_str());
assert(bs == "\xF1\x80\x80\x83");
bs = myconv.to_bytes(ws);
assert(bs == "\xF1\x80\x80\x83");
bs = myconv.to_bytes(ws.data(), ws.data() + ws.size());
assert(bs == "\xF1\x80\x80\x83");
}
}

View File

@ -0,0 +1,35 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <locale>
// template<class Codecvt, class Elem = wchar_t,
// class Wide_alloc = allocator<Elem>,
// class Byte_alloc = allocator<char>>
// class wstring_convert
// {
// public:
// typedef basic_string<char, char_traits<char>, Byte_alloc> byte_string;
// typedef basic_string<Elem, char_traits<Elem>, Wide_alloc> wide_string;
// typedef typename Codecvt::state_type state_type;
// typedef typename wide_string::traits_type::int_type int_type;
#include <locale>
#include <codecvt>
int main()
{
{
typedef std::wstring_convert<std::codecvt_utf8<wchar_t> > myconv;
static_assert((std::is_same<myconv::byte_string, std::string>::value), "");
static_assert((std::is_same<myconv::wide_string, std::wstring>::value), "");
static_assert((std::is_same<myconv::state_type, std::mbstate_t>::value), "");
static_assert((std::is_same<myconv::int_type, std::char_traits<wchar_t>::int_type>::value), "");
}
}