etk/etk/unicode.cpp

675 lines
20 KiB
C++

/**
* @author Edouard DUPIN
*
* @copyright 2011, Edouard DUPIN, all right reserved
*
* @license BSD v3 (see license file)
*/
// see : http://unicode.org/fr/charts/symbols.html#CombiningDiacriticalMarks
#include <etk/types.h>
#include <etk/Debug.h>
#include <etk/unicodeTable.h>
#include <etk/unicode.h>
void unicode::convertIsoToUnicode(charset_te _inputCharset, const char _input_ISO, etk::UChar & _output_Unicode)
{
switch(_inputCharset)
{
case EDN_CHARSET_ISO_8859_1: _output_Unicode.set(tableIso8859_1[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_2: _output_Unicode.set(tableIso8859_2[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_3: _output_Unicode.set(tableIso8859_3[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_4: _output_Unicode.set(tableIso8859_4[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_5: _output_Unicode.set(tableIso8859_5[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_6: _output_Unicode.set(tableIso8859_6[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_7: _output_Unicode.set(tableIso8859_7[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_8: _output_Unicode.set(tableIso8859_8[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_9: _output_Unicode.set(tableIso8859_9[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_10: _output_Unicode.set(tableIso8859_10[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_11: _output_Unicode.set(tableIso8859_11[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_13: _output_Unicode.set(tableIso8859_13[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_14: _output_Unicode.set(tableIso8859_14[(uint32_t)_input_ISO&0xFF]); break;
case EDN_CHARSET_ISO_8859_15: _output_Unicode.set(tableIso8859_15[(uint32_t)_input_ISO&0xFF]); break;
default :
TK_WARNING("Unknow charset ... " << _inputCharset);
_output_Unicode = '?';
break;
}
}
void unicode::convertUnicodeToIso(charset_te _inputCharset, const etk::UChar _input_Unicode, char & _output_ISO)
{
const uint32_t *tmpTable = NULL;
switch(_inputCharset)
{
case EDN_CHARSET_ISO_8859_1: tmpTable = tableIso8859_1; break;
case EDN_CHARSET_ISO_8859_2: tmpTable = tableIso8859_2; break;
case EDN_CHARSET_ISO_8859_3: tmpTable = tableIso8859_3; break;
case EDN_CHARSET_ISO_8859_4: tmpTable = tableIso8859_4; break;
case EDN_CHARSET_ISO_8859_5: tmpTable = tableIso8859_5; break;
case EDN_CHARSET_ISO_8859_6: tmpTable = tableIso8859_6; break;
case EDN_CHARSET_ISO_8859_7: tmpTable = tableIso8859_7; break;
case EDN_CHARSET_ISO_8859_8: tmpTable = tableIso8859_8; break;
case EDN_CHARSET_ISO_8859_9: tmpTable = tableIso8859_9; break;
case EDN_CHARSET_ISO_8859_10: tmpTable = tableIso8859_10; break;
case EDN_CHARSET_ISO_8859_11: tmpTable = tableIso8859_11; break;
case EDN_CHARSET_ISO_8859_13: tmpTable = tableIso8859_13; break;
case EDN_CHARSET_ISO_8859_14: tmpTable = tableIso8859_14; break;
case EDN_CHARSET_ISO_8859_15: tmpTable = tableIso8859_15; break;
default :
TK_WARNING("Unknow charset ... " << _inputCharset);
_output_ISO = '?';
return;
}
int32_t i;
for (i=0; i<256; i++) {
if (tmpTable[i] == _input_Unicode.get()) {
_output_ISO = (char)i;
return;
}
}
}
int32_t unicode::convertIsoToUnicode(charset_te _inputCharset, const etk::Vector<char>& _input_ISO, etk::Vector<etk::UChar>& _output_Unicode)
{
_output_Unicode.clear();
etk::UChar output;
for(int32_t iii=0; iii<_input_ISO.size(); iii++) {
convertIsoToUnicode(_inputCharset, (char)_input_ISO[iii], output);
_output_Unicode.pushBack(output);
}
if (_output_Unicode.size() == 0) {
_output_Unicode.pushBack(0);
} else if (_output_Unicode[_output_Unicode.size()-1] != 0) {
_output_Unicode.pushBack(0);
}
return _output_Unicode.size();
}
int32_t unicode::convertIsoToUnicode(charset_te _inputCharset, const etk::Vector<int8_t>& _input_ISO, etk::Vector<etk::UChar>& _output_Unicode)
{
_output_Unicode.clear();
etk::UChar output;
for(int32_t iii=0; iii<_input_ISO.size(); iii++) {
convertIsoToUnicode(_inputCharset, (char)_input_ISO[iii], output);
_output_Unicode.pushBack(output);
}
if (_output_Unicode.size() == 0) {
_output_Unicode.pushBack(0);
} else if (_output_Unicode[_output_Unicode.size()-1] != 0) {
_output_Unicode.pushBack(0);
}
return _output_Unicode.size();
}
int32_t unicode::convertUnicodeToIso(charset_te _inputCharset, const etk::Vector<etk::UChar>& _input_Unicode, etk::Vector<char>& _output_ISO)
{
_output_ISO.clear();
char output[10];
for(int32_t iii=0; iii<_input_Unicode.size(); iii++) {
_input_Unicode[iii].getUtf8(output);
char * tmp = output;
while(*tmp != '\0') {
_output_ISO.pushBack(*tmp);
tmp++;
}
}
_output_ISO.pushBack(0);
return _output_ISO.size();
}
int32_t unicode::convertUnicodeToIso(charset_te _inputCharset, const etk::Vector<etk::UChar>& _input_Unicode, etk::Vector<int8_t>& _output_ISO)
{
_output_ISO.clear();
char output[10];
for(int32_t iii=0; iii<_input_Unicode.size(); iii++) {
_input_Unicode[iii].getUtf8(output);
char * tmp = output;
while(*tmp != '\0') {
_output_ISO.pushBack(*tmp);
tmp++;
}
}
_output_ISO.pushBack(0);
return _output_ISO.size();
}
int32_t unicode::convertUnicodeToUtf8(const etk::Vector<etk::UChar>& _input_Unicode, etk::Vector<char>& _output_UTF8)
{
char output[10];
for (int32_t iii=0; iii<_input_Unicode.size(); iii++) {
_input_Unicode[iii].getUtf8(output);
char * tmp = output ;
while (*tmp != '\0') {
_output_UTF8.pushBack(*tmp);
tmp++;
}
}
_output_UTF8.pushBack('\0');
return _output_UTF8.size()-1;
}
int32_t unicode::convertUnicodeToUtf8(const etk::Vector<etk::UChar>& _input_Unicode, etk::Vector<int8_t>& _output_UTF8)
{
char output[10];
for (int32_t iii=0; iii<_input_Unicode.size(); iii++) {
_input_Unicode[iii].getUtf8(output);
char * tmp = output ;
while (*tmp != '\0') {
_output_UTF8.pushBack((int8_t)*tmp);
tmp++;
}
}
_output_UTF8.pushBack('\0');
return _output_UTF8.size()-1;
}
int32_t unicode::convertUtf8ToUnicode(const etk::Vector<char>& _input_UTF8, etk::Vector<etk::UChar>& _output_Unicode)
{
char tmpData[20];
int32_t pos = 0;
while (pos < _input_UTF8.size()) {
int32_t lenMax = _input_UTF8.size() - pos;
//4 case
if ( 1<=lenMax
&& 0x00 == (_input_UTF8[pos+0] & 0x80) )
{
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = '\0';
pos += 1;
} else if ( 2<=lenMax
&& 0xC0 == (_input_UTF8[pos+0] & 0xE0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0) ) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = '\0';
pos += 2;
} else if ( 3<=lenMax
&& 0xE0 == (_input_UTF8[pos+0] & 0xF0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = '\0';
pos += 3;
} else if ( 4<=lenMax
&& 0xF0 == (_input_UTF8[pos+0] & 0xF8)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)
&& 0x80 == (_input_UTF8[pos+3] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = _input_UTF8[pos+3];
tmpData[4] = '\0';
pos += 4;
} else {
tmpData[0] = '\0';
pos += 1;
}
etk::UChar tmpUnicode;
tmpUnicode.setUtf8(tmpData);
_output_Unicode.pushBack(tmpUnicode);
}
return 0;
}
int32_t unicode::convertUtf8ToUnicode(const etk::Vector<int8_t>& _input_UTF8, etk::Vector<etk::UChar>& _output_Unicode)
{
char tmpData[20];
int32_t pos = 0;
while (pos < _input_UTF8.size()) {
int32_t lenMax = _input_UTF8.size() - pos;
//4 case
if ( 1<=lenMax
&& 0x00 == (_input_UTF8[pos+0] & 0x80) )
{
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = '\0';
pos += 1;
} else if ( 2<=lenMax
&& 0xC0 == (_input_UTF8[pos+0] & 0xE0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0) ) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = '\0';
pos += 2;
} else if ( 3<=lenMax
&& 0xE0 == (_input_UTF8[pos+0] & 0xF0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = '\0';
pos += 3;
} else if ( 4<=lenMax
&& 0xF0 == (_input_UTF8[pos+0] & 0xF8)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)
&& 0x80 == (_input_UTF8[pos+3] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = _input_UTF8[pos+3];
tmpData[4] = '\0';
pos += 4;
} else {
tmpData[0] = '\0';
pos += 1;
}
etk::UChar tmpUnicode;
tmpUnicode.setUtf8(tmpData);
_output_Unicode.pushBack(tmpUnicode);
}
return 0;
}
int32_t unicode::convertUtf8ToUnicode(const char * _input_UTF8, etk::Vector<etk::UChar>& _output_Unicode)
{
char tmpData[20];
int32_t pos = 0;
if (NULL == _input_UTF8) {
return 0;
}
int32_t len = strlen(_input_UTF8);
while (pos < len) {
int32_t lenMax = len - pos;
//4 case
if ( 1<=lenMax
&& 0x00 == (_input_UTF8[pos+0] & 0x80) )
{
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = '\0';
pos += 1;
} else if ( 2<=lenMax
&& 0xC0 == (_input_UTF8[pos+0] & 0xE0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0) ) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = '\0';
pos += 2;
} else if ( 3<=lenMax
&& 0xE0 == (_input_UTF8[pos+0] & 0xF0)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = '\0';
pos += 3;
} else if ( 4<=lenMax
&& 0xF0 == (_input_UTF8[pos+0] & 0xF8)
&& 0x80 == (_input_UTF8[pos+1] & 0xC0)
&& 0x80 == (_input_UTF8[pos+2] & 0xC0)
&& 0x80 == (_input_UTF8[pos+3] & 0xC0)) {
tmpData[0] = _input_UTF8[pos+0];
tmpData[1] = _input_UTF8[pos+1];
tmpData[2] = _input_UTF8[pos+2];
tmpData[3] = _input_UTF8[pos+3];
tmpData[4] = '\0';
pos += 4;
} else {
tmpData[0] = '\0';
pos += 1;
}
etk::UChar tmpUnicode;
tmpUnicode.setUtf8(tmpData);
_output_Unicode.pushBack(tmpUnicode);
}
return 0;
}
// Transform ISO <==> UTF-8
void unicode::convertIsoToUtf8(charset_te _inputCharset, const char _input_ISO, char * _output_UTF8)
{
etk::UChar tmpUnicode;
// concert Iso in UniCode
convertIsoToUnicode(_inputCharset, _input_ISO, tmpUnicode );
// convert UniCode in Utf-8
tmpUnicode.getUtf8(_output_UTF8);
}
void unicode::convertUtf8ToIso(charset_te _inputCharset, const char * _input_UTF8, char & _output_ISO)
{
etk::UChar tmpUnicode;
// convert Utf-8 in UniCode
tmpUnicode.setUtf8(_input_UTF8);
// concert UniCode in Iso
convertUnicodeToIso(_inputCharset, tmpUnicode, _output_ISO);
}
int32_t unicode::convertIsoToUtf8(charset_te _inputCharset, const etk::Vector<char>& _input_ISO, etk::Vector<char>& _output_UTF8)
{
TK_WARNING("TODO : not coded...");
return 0;
}
int32_t unicode::convertUtf8ToIso(charset_te _inputCharset, const etk::Vector<char>& _input_UTF8, etk::Vector<char>& _output_ISO)
{
TK_WARNING("TODO : not coded...");
return 0;
}
void unicode::utf8_SizeElement(const char * _data, int32_t _lenMax , uint8_t &_size, bool &_baseValid)
{
TK_ASSERT(0 <= _lenMax, "size can not be < 0 ...");
if (0 > _lenMax) {
_size = 0;
_baseValid = false;
return;
}
//4 case
if ( 1<=_lenMax
&& 0x00 == (_data[0] & 0x80) ) {
// One Char Element
_size = 1;
_baseValid = true;
} else if( 2<=_lenMax
&& 0xC0 == (_data[0] & 0xE0)
&& 0x80 == (_data[1] & 0xC0) ) {
_size = 2;
_baseValid = true;
} else if( 3<=_lenMax
&& 0xE0 == (_data[0] & 0xF0)
&& 0x80 == (_data[1] & 0xC0)
&& 0x80 == (_data[2] & 0xC0)) {
_size = 3;
_baseValid = true;
} else if( 4<=_lenMax
&& 0xF0 == (_data[0] & 0xF8)
&& 0x80 == (_data[1] & 0xC0)
&& 0x80 == (_data[2] & 0xC0)
&& 0x80 == (_data[3] & 0xC0)) {
_size = 4;
_baseValid = true;
} else {
// return only one error Caracter ...
_baseValid = false;
_size = 1;
}
}
#if 0 // Remove for the moment ...
/**
* @brief Get the number of element of the previous UTF8 char (in the curent Buffer)
*
* @param[in] data pointer on the curent CHAR string (pointer on the allocated buffer) (the curent char is not check)
* @param[out] size Nb of char use in this UTF8 [0..4]
* @param[out] baseValid true : the ase format of the UTF8 is CORRECT
*
* @return ---
*
*/
static void Utf8_SizePreviousElement(const char * data, int32_t lenMax, uint8_t &size, bool &baseValid)
{
EDN_ASSERT(0 <= lenMax, "size can not be < 0 ...");
if (0 > lenMax) {
size = 0;
baseValid = false;
return;
}
//4 case
if ( 1<=lenMax
&& 0x00 == (data[-1] & 0x80) ) {
// One Char Element
size = 1;
baseValid = true;
} else if( 2<=lenMax
&& 0xC0 == (data[-2] & 0xE0)
&& 0x80 == (data[-1] & 0xC0) ) {
size = 2;
baseValid = true;
} else if( 3<=lenMax
&& 0xE0 == (data[-3] & 0xF0)
&& 0x80 == (data[-2] & 0xC0)
&& 0x80 == (data[-1] & 0xC0)) {
size = 3;
baseValid = true;
} else if( 4<=lenMax
&& 0xF0 == (data[-4] & 0xF8)
&& 0x80 == (data[-3] & 0xC0)
&& 0x80 == (data[-2] & 0xC0)
&& 0x80 == (data[-1] & 0xC0)) {
size = 4;
baseValid = true;
} else {
// return only one error Caracter ...
baseValid = false;
size = 1;
}
}
#endif
/**
* @brief
*
* @param[in,out]
*
* @return
*
*/
/*
static uint32_t Utf8_GetValue(UTF8Element_ts &Element)
{
uint32_t value = 0;
const char * data = m_data + Element.CharPosition;
//4 case
switch(Element.CharSize)
{
case 1:
value = data[0] & 0x7F;
break;
case 2:
value = (data[0] & 0x1F)<< 6;
value += data[1] & 0x3F;
break;
case 3:
value = (data[0] & 0x0F)<< 12;
value += (data[1] & 0x3F)<< 6;
value += data[2] & 0x3F;
break;
case 4:
value = (data[0] & 0x07)<< 18;
value += (data[1] & 0x3F)<< 12;
value += (data[2] & 0x3F)<< 6;
value += data[3] & 0x3F;
break;
default:
// return only one error Caracter ...
EDN_ASSERT(false, "impossible case....");
break;
}
// check the validity of the UTF8 ...
if( ( 0xD800 <= value
&& 0xDFFF >= value )
|| ( 0xFDD0 <= value
&& 0xFDEF >= value )
|| ( 0xFFFE <= value
&& 0xFFFF >= value )
|| ( 0x1FFFE <= value
&& 0x1FFFF >= value )
|| ( 0x2FFFE <= value
&& 0xDFFFF >= value )
|| ( 0xEFFFE <= value
&& 0xEFFFF >= value )
|| ( 0xFFFFE <= value
&& 0xFFFFF >= value )
|| ( 0x10FFFE <= value
&& 0x10FFFF >= value ) )
{
// overwrite the UTF8 validity ==> this is not a diaplayable element
Element.ValidUTF8 = false;
return value;
}
return value;
}
*/
int32_t unicode::strUtf8Len(const char *input_UTF8)
{
int32_t count = 0;
int32_t size = strlen(input_UTF8);
uint8_t tmpSize;
bool baseValid;
while (size > 0) {
utf8_SizeElement(input_UTF8, size , tmpSize, baseValid);
input_UTF8 += tmpSize;
size -= tmpSize;
count++;
}
return count;
}
// **************************************************************************************************************
// simple convertion optention
// **************************************************************************************************************
#if 0
Procedure de recuperation des charset sans ce casser les ...
// generate the basic file
FILE * mfile = NULL;
mfile = fopen("fichierIsoBase", "wb");
if (NULL == mfile) {
EDN_ERROR("Error to create file");
return false;
}
char newline = '\n';
for(int32_t i=0x20; i<0x100; i++) {
char plop = i;
fwrite(&plop, sizeof(char), 1, mfile);
fwrite(&newline, sizeof(char), 1, mfile);
}
fclose(mfile);
// console script to convert files :
iconv -c --from-code=ISO-8859-1 --to-code=UTF-8 -o fichierUTF8_iso-1 fichierIsoBase
iconv -c --from-code=ISO-8859-2 --to-code=UTF-8 -o fichierUTF8_iso-2 fichierIsoBase
iconv -c --from-code=ISO-8859-3 --to-code=UTF-8 -o fichierUTF8_iso-3 fichierIsoBase
iconv -c --from-code=ISO-8859-4 --to-code=UTF-8 -o fichierUTF8_iso-4 fichierIsoBase
iconv -c --from-code=ISO-8859-5 --to-code=UTF-8 -o fichierUTF8_iso-5 fichierIsoBase
iconv -c --from-code=ISO-8859-6 --to-code=UTF-8 -o fichierUTF8_iso-6 fichierIsoBase
iconv -c --from-code=ISO-8859-7 --to-code=UTF-8 -o fichierUTF8_iso-7 fichierIsoBase
iconv -c --from-code=ISO-8859-8 --to-code=UTF-8 -o fichierUTF8_iso-8 fichierIsoBase
iconv -c --from-code=ISO-8859-9 --to-code=UTF-8 -o fichierUTF8_iso-9 fichierIsoBase
iconv -c --from-code=ISO-8859-10 --to-code=UTF-8 -o fichierUTF8_iso-10 fichierIsoBase
iconv -c --from-code=ISO-8859-11 --to-code=UTF-8 -o fichierUTF8_iso-11 fichierIsoBase
iconv -c --from-code=ISO-8859-12 --to-code=UTF-8 -o fichierUTF8_iso-12 fichierIsoBase
iconv -c --from-code=ISO-8859-13 --to-code=UTF-8 -o fichierUTF8_iso-13 fichierIsoBase
iconv -c --from-code=ISO-8859-14 --to-code=UTF-8 -o fichierUTF8_iso-14 fichierIsoBase
iconv -c --from-code=ISO-8859-15 --to-code=UTF-8 -o fichierUTF8_iso-15 fichierIsoBase
// NOTE : Le format 12 n'existe pas ...
FILE * mfileout = NULL;
mfileout = fopen("outputGeneration.c", "wb");
if (NULL == mfileout) {
EDN_ERROR("Error to create file");
return false;
}
char * inputFileData[] = {
"fichierUTF8_iso-1",
"fichierUTF8_iso-2",
// "fichierUTF8_iso-3",
"fichierUTF8_iso-4",
"fichierUTF8_iso-5",
/* "fichierUTF8_iso-6",
"fichierUTF8_iso-7",
"fichierUTF8_iso-8",
"fichierUTF8_iso-9",
"fichierUTF8_iso-10",
"fichierUTF8_iso-11",
"fichierUTF8_iso-13",
"fichierUTF8_iso-14",
*/
"fichierUTF8_iso-15"
};
for (int32_t k=0; k<5; k++) {
FILE * mfile = NULL;
mfile = fopen(inputFileData[k], "rb");
if (NULL == mfile) {
EDN_ERROR("Error to open file");
return false;
}
char data[255] ;
fprintf(mfileout, "\tTYPESTRUCT_TS %s[] = {\n\t\t", inputFileData[k]);
for(int32_t i=0x0; i<0x10; i++) {
fprintf(mfileout, "0x%08X, ", i);
}
fprintf(mfileout, "\n\t\t");
for(int32_t i=0x10; i<0x20; i++) {
fprintf(mfileout, "0x%08X, ", i);
}
for(int32_t i=0x20; i<0x100; i++) {
if (0==i%16) {
fprintf(mfileout, "\n\t\t");
}
fgets(data, 25, mfile );
data[strlen(data)-1] = '\0';
EDN_INFO("sizeofLine=" << strlen(data) << " data=\"" << data << "\"");
// convert in int :
int32_t valUTF8 = 0;
int32_t valUnicode = 0;
switch (strlen(data)) {
case 1:
valUTF8 = (uint8_t) (data[0]);
valUnicode = (uint8_t)(data[0]) & 0x7F;
break;
case 2:
valUTF8 = (uint8_t) (data[0]) << 8;
valUTF8 += (uint8_t) (data[1]);
valUnicode = (((uint8_t)data[0]) & 0x1F)<< 6;
valUnicode += ((uint8_t)data[1]) & 0x3F;
break;
case 3:
valUTF8 = (uint8_t) (data[0]) << 16;
valUTF8 += (uint8_t) (data[1]) << 8;
valUTF8 += (uint8_t) (data[2]);
valUnicode = (((uint8_t)data[0]) & 0x0F)<< 12;
valUnicode += (((uint8_t)data[1]) & 0x3F)<< 6;
valUnicode += ((uint8_t)data[2]) & 0x3F;
break;
default:
valUTF8 = (uint8_t) (data[0]) <<24;
valUTF8 += (uint8_t) (data[1]) << 16;
valUTF8 += (uint8_t) (data[2]) << 8;
valUTF8 += (uint8_t) (data[3]);
valUnicode = (((uint8_t)data[0]) & 0x07)<< 18;
valUnicode += (((uint8_t)data[1]) & 0x3F)<< 12;
valUnicode += (((uint8_t)data[2]) & 0x3F)<< 6;
valUnicode += ((uint8_t)data[3]) & 0x3F;
break;
}
fprintf(mfileout, "0x%08X, ", valUnicode);
}
fprintf(mfileout, "\n\t};\n\n");
fclose(mfile);
}
fclose(mfileout);
#endif