etk/etk/UniChar.cpp

296 lines
6.8 KiB
C++

/**
* @author Edouard DUPIN
*
* @copyright 2011, Edouard DUPIN, all right reserved
*
* @license BSD v3 (see license file)
*/
#include <etk/types.h>
#include <etk/UniChar.h>
#include <etk/unicode.h>
#include <etk/DebugInternal.h>
#include <etk/Stream.h>
#include <etk/Vector.h>
#include <etk/Char.h>
const etk::UniChar etk::UniChar::Null('\0');
const etk::UniChar etk::UniChar::Return('\n');
const etk::UniChar etk::UniChar::CarrierReturn('\r');
const etk::UniChar etk::UniChar::Tabulation('\t');
const etk::UniChar etk::UniChar::Suppress((const char)127);
const etk::UniChar etk::UniChar::Delete((const char)8);
const etk::UniChar etk::UniChar::Space(' ');
const etk::UniChar etk::UniChar::Escape((const char)27);
void etk::UniChar::Lower(void)
{
if( m_value>=(uint32_t)'A'
&& m_value<=(uint32_t)'Z') {
m_value += (uint32_t)'a' - (uint32_t)'A';
}
}
etk::UniChar etk::UniChar::ToLower(void) const
{
if( m_value>=(uint32_t)'A'
&& m_value<=(uint32_t)'Z') {
return m_value + (uint32_t)'a' - (uint32_t)'A';
}
return m_value;
}
void etk::UniChar::Upper(void)
{
if( m_value>=(uint32_t)'a'
&& m_value<=(uint32_t)'z') {
m_value += (uint32_t)'A' - (uint32_t)'a';
}
}
etk::UniChar etk::UniChar::ToUpper(void) const
{
if( m_value>=(uint32_t)'a'
&& m_value<=(uint32_t)'z') {
return m_value + (uint32_t)'A' - (uint32_t)'a';
}
return m_value;
}
bool etk::UniChar::CompareNoCase(const etk::UniChar& _obj) const
{
return ToUpper() == _obj.ToUpper();
}
etk::UniChar etk::UniChar::ChangeOrder(void) const
{
if (m_value >= 'A' && m_value <= 'Z') {
return (m_value - (uint32_t)'A')*2 + 'A';
}
if (m_value >= 'a' && m_value <= 'z') {
return (m_value - (uint32_t)'a')*2 + 'A' + 1;
}
if (m_value >= ':' && m_value <= '@') {
return m_value + 52;
}
if (m_value >= '[' && m_value <= '`') {
return m_value +26;
}
return m_value;
}
bool etk::UniChar::IsWhiteChar(void) const
{
if( m_value == ' '
|| m_value == '\t'
|| m_value == '\n'
|| m_value == '\r') {
return true;
}
return false;
}
bool etk::UniChar::IsInteger(void) const
{
if( m_value>=(uint32_t)'0'
&& m_value<=(uint32_t)'9') {
return true;
}
return false;
}
int32_t etk::UniChar::ToInt32(void) const
{
return m_value - (uint32_t)'0';
}
/*
etk::CCout& etk::operator <<(etk::CCout& _os, const etk::UniChar& _obj)
{
char output_UTF8[8];
unicode::convertUnicodeToUtf8(_obj, output_UTF8);
_os << &output_UTF8[0];
return _os;
}
*/
uint32_t etk::UniChar::GetUtf8(void) const
{
uint32_t output = 0;
if (m_value <= 127) {
output = m_value;
} else if (m_value <= 2047) {
// output ==> 00000000 00000000 110xxxxx 10xxxxxx
// input ==> -------- -------- -----222 22111111
output = 0x0000C080;
output+= (m_value & 0x000007C0)<<2;
output+= m_value & 0x0000003F;
} else if (m_value <= 65535) {
// output ==> 00000000 1110xxxx 10xxxxxx 10xxxxxx
// input ==> -------- -------- 33332222 22111111
output = 0x00E08080;
output+= (m_value & 0x0000F000)<<4;
output+= (m_value & 0x00000FC0)<<2;
output+= m_value & 0x0000003F;
} else if (m_value <= 1114111) {
// output ==> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
// input ==> -------- ---44433 33332222 22111111
output = 0xF0808080;
output+= (m_value & 0x001C0000)<<6;
output+= (m_value & 0x0003F000)<<4;
output+= (m_value & 0x00000FC0)<<2;
output+= m_value & 0x0000003F;
} else {
TK_ERROR("NON UTF8 caracter input...");
return 0;
}
//printf("utf8convertion : %d=%08x ==> %08x\n",value, value, output);
return output;
}
int8_t etk::UniChar::GetUtf8(char _output[5]) const
{
uint32_t value = GetUtf8();
if (0xFF >= value) {
_output[0] = (char)value;
_output[1] = '\0';
return 1;
} else if (0xFFFF >= value) {
_output[0] = (char)((value>>8) & 0x000000FF);
_output[1] = (char)value;
_output[2] = '\0';
return 2;
} else if (0xFFFFFF >= value) {
_output[0] = (char)((value>>16) & 0x000000FF);
_output[1] = (char)((value>>8) & 0x000000FF);
_output[2] = (char)value;
_output[3] = '\0';
return 3;
} else {
_output[0] = (char)((value>>24) & 0x000000FF);
_output[1] = (char)((value>>16) & 0x000000FF);
_output[2] = (char)((value>>8) & 0x000000FF);
_output[3] = (char)value;
_output[4] = '\0';
return 4;
}
}
/*
etk::Vector<int8_t> etk::UniChar::GetUtf8(void) const
{
etk::Vector<int8_t> ret;
uint32_t value = GetUtf8();
if (0xFF >= value) {
ret.PushBack((char)value);
} else if (0xFFFF >= value) {
ret.PushBack((char)((value>>8) & 0x000000FF));
ret.PushBack((char)value);
} else if (0xFFFFFF >= value) {
ret.PushBack((char)((value>>16) & 0x000000FF));
ret.PushBack((char)((value>>8) & 0x000000FF));
ret.PushBack((char)value);
} else {
ret.PushBack((char)((value>>24) & 0x000000FF));
ret.PushBack((char)((value>>16) & 0x000000FF));
ret.PushBack((char)((value>>8) & 0x000000FF));
ret.PushBack((char)value);
}
return ret;
}
*/
uint8_t SizeElement(const char* _data, int32_t _lenMax)
{
uint8_t size = 0;
TK_ASSERT(0 <= _lenMax, "size can not be < 0 ...");
if (0 > _lenMax) {
return 0;
}
//4 case
if( _lenMax >= 1
&& (_data[0] & 0x80) == 0x00 ) {
// One Char Element
size = 1;
} else if( _lenMax >= 2
&& (_data[0] & 0xE0) == 0xC0
&& (_data[1] & 0xC0) == 0x80) {
size = 2;
} else if( _lenMax >= 3
&& (_data[0] & 0xF0) == 0xE0
&& (_data[1] & 0xC0) == 0x80
&& (_data[2] & 0xC0) == 0x80) {
size = 3;
} else if( _lenMax >= 4
&& (_data[0] & 0xF8) == 0xF0
&& (_data[1] & 0xC0) == 0x80
&& (_data[2] & 0xC0) == 0x80
&& (_data[3] & 0xC0) == 0x80) {
size = 4;
}
return size;
}
int8_t etk::UniChar::SetUtf8(const char* _input)
{
m_value = 0;
if (NULL == _input) {
return 0;
}
int32_t len = strlen(_input);
len = SizeElement(_input, len);
switch (len) {
default:
// case 0 : An error occured...
m_value = _input[0];
return 0;
case 1:
m_value = (uint8_t)(_input[0]) & 0x7F;
return 1;
case 2:
m_value = (((uint8_t)_input[0]) & 0x1F)<< 6;
m_value += ((uint8_t)_input[1]) & 0x3F;
return 2;
case 3:
m_value = (((uint8_t)_input[0]) & 0x0F)<< 12;
m_value += (((uint8_t)_input[1]) & 0x3F)<< 6;
m_value += ((uint8_t)_input[2]) & 0x3F;
return 3;
case 4:
m_value = (((uint8_t)_input[0]) & 0x07)<< 18;
m_value += (((uint8_t)_input[1]) & 0x3F)<< 12;
m_value += (((uint8_t)_input[2]) & 0x3F)<< 6;
m_value += ((uint8_t)_input[3]) & 0x3F;
return 4;
}
}
int8_t etk::UniChar::TheoricUTF8Len(const char _input)
{
if((_input&0x80) == 0x00 ) {
return 1;
}
if((_input&0xE0) == 0xC0) {
return 2;
}
if((_input&0xF0) == 0xE0) {
return 3;
}
if((_input&0xF8) == 0xF0) {
return 4;
}
return 1;
}
bool etk::UniChar::TheoricUTF8First(const char _input)
{
if((_input&0x80) == 0x80 ) {
return false;
}
return true;
}