9 #ifndef BOOST_NOWIDE_UTF_HPP_INCLUDED 10 #define BOOST_NOWIDE_UTF_HPP_INCLUDED 47 if(0xD800 <= v && v <= 0xDFFF)
52 #ifdef BOOST_NOWIDE_DOXYGEN 53 template<
typename CharType,
int size = sizeof(CharType)>
77 template<
typename Iterator>
121 template<
typename Iterator>
128 template<
typename Iterator>
134 template<
typename CharType,
int size = sizeof(CharType)>
137 template<
typename CharType>
144 unsigned char c = ci;
147 if(BOOST_UNLIKELY(c < 194))
153 if(BOOST_LIKELY(c <= 244))
165 }
else if(value <= 0x7FF)
168 }
else if(BOOST_LIKELY(value <= 0xFFFF))
179 unsigned char c = ci;
180 return (c & 0xC0) == 0x80;
188 template<
typename Iterator>
191 if(BOOST_UNLIKELY(p == e))
194 unsigned char lead = *p++;
199 if(BOOST_UNLIKELY(trail_size < 0))
207 code_point c = lead & ((1 << (6 - trail_size)) - 1);
214 if(BOOST_UNLIKELY(p == e))
219 c = (c << 6) | (tmp & 0x3F);
220 BOOST_NOWIDE_FALLTHROUGH;
222 if(BOOST_UNLIKELY(p == e))
227 c = (c << 6) | (tmp & 0x3F);
228 BOOST_NOWIDE_FALLTHROUGH;
230 if(BOOST_UNLIKELY(p == e))
235 c = (c << 6) | (tmp & 0x3F);
250 template<
typename Iterator>
253 unsigned char lead = *p++;
261 else if(BOOST_LIKELY(lead < 240))
266 code_point c = lead & ((1 << (6 - trail_size)) - 1);
270 case 3: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
271 case 2: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
272 case 1: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F);
278 template<
typename Iterator>
283 *out++ = static_cast<char_type>(value);
284 }
else if(value <= 0x7FF)
286 *out++ = static_cast<char_type>((value >> 6) | 0xC0);
287 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
288 }
else if(BOOST_LIKELY(value <= 0xFFFF))
290 *out++ = static_cast<char_type>((value >> 12) | 0xE0);
291 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
292 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
295 *out++ = static_cast<char_type>((value >> 18) | 0xF0);
296 *out++ = static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
297 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
298 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
304 template<
typename CharType>
305 struct utf_traits<CharType, 2>
310 static bool is_first_surrogate(uint16_t x)
312 return 0xD800 <= x && x <= 0xDBFF;
314 static bool is_second_surrogate(uint16_t x)
316 return 0xDC00 <= x && x <= 0xDFFF;
318 static code_point combine_surrogate(uint16_t w1, uint16_t w2)
320 return ((
code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
324 if(is_first_surrogate(c))
326 if(is_second_surrogate(c))
335 return is_second_surrogate(c);
342 return !is_second_surrogate(c);
345 template<
typename It>
348 if(BOOST_UNLIKELY(current == last))
350 uint16_t w1 = *current++;
351 if(BOOST_LIKELY(w1 < 0xD800 || 0xDFFF < w1))
359 uint16_t w2 = *current++;
360 if(w2 < 0xDC00 || 0xDFFF < w2)
362 return combine_surrogate(w1, w2);
364 template<
typename It>
367 uint16_t w1 = *current++;
368 if(BOOST_LIKELY(w1 < 0xD800 || 0xDFFF < w1))
372 uint16_t w2 = *current++;
373 return combine_surrogate(w1, w2);
379 return u >= 0x10000 ? 2 : 1;
381 template<
typename It>
384 if(BOOST_LIKELY(u <= 0xFFFF))
386 *out++ = static_cast<char_type>(u);
390 *out++ = static_cast<char_type>(0xD800 | (u >> 10));
391 *out++ = static_cast<char_type>(0xDC00 | (u & 0x3FF));
397 template<
typename CharType>
398 struct utf_traits<CharType, 4>
416 template<
typename It>
422 template<
typename It>
425 if(BOOST_UNLIKELY(current == last))
437 template<
typename It>
440 *out++ = static_cast<char_type>(u);
static const int max_width
Definition: utf.hpp:87
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57
static bool is_trail(char_type c)
Namespace that holds basic operations on UTF encoded sequences.
Definition: convert.hpp:20
static Iterator encode(code_point value, Iterator out)
static bool is_lead(char_type c)
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:33
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:28
CharType char_type
Definition: utf.hpp:62
static code_point decode_valid(Iterator &p)
static int trail_length(char_type c)
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:38
static int width(code_point value)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:43
static code_point decode(Iterator &p, Iterator e)