etk/etk/UChar.h

191 lines
5.7 KiB
C++

/**
* @author Edouard DUPIN
*
* @copyright 2011, Edouard DUPIN, all right reserved
*
* @license BSD v3 (see license file)
*/
#ifndef __ETK_UNI_CHAR_H__
#define __ETK_UNI_CHAR_H__
namespace etk {
//in the unicode section we have : [E000..F8FF] private area ==> we will store element in this area:
// internal define to permit to have all needed system
enum regExpPrivateSection {
REGEXP_OPCODE_PTHESE_IN=0xE000, /* ( */
REGEXP_OPCODE_PTHESE_OUT,/* ) */
REGEXP_OPCODE_BRACKET_IN,/* [ */
REGEXP_OPCODE_BRACKET_OUT,/* ] */
REGEXP_OPCODE_BRACE_IN,/* { */
REGEXP_OPCODE_BRACE_OUT,/* } */
REGEXP_OPCODE_TO,/* - */
REGEXP_OPCODE_STAR,/* * */
REGEXP_OPCODE_DOT,/* . */
REGEXP_OPCODE_QUESTION,/* ? */
REGEXP_OPCODE_PLUS,/* + */
REGEXP_OPCODE_PIPE,/* | */
REGEXP_OPCODE_START_OF_LINE,/* ^ this is also NOT, but not manage */
REGEXP_OPCODE_END_OF_LINE,/* $ */
REGEXP_OPCODE_DIGIT,/* \d */
REGEXP_OPCODE_DIGIT_NOT,/* \D */
REGEXP_OPCODE_LETTER,/* \l */
REGEXP_OPCODE_LETTER_NOT,/* \L */
REGEXP_OPCODE_SPACE,/* \s */
REGEXP_OPCODE_SPACE_NOT,/* \S */
REGEXP_OPCODE_WORD,/* \w */
REGEXP_OPCODE_WORD_NOT,/* \W */
REGEXP_OPCODE_NO_CHAR,/* \@ */
REGEXP_OPCODE_ERROR, // not used
};
class UChar {
public: // classic unicar code :
static const UChar Null; //!< '\0'
static const UChar Return; //!< '\n'
static const UChar CarrierReturn; //!< '\r' CR
static const UChar Tabulation; //!< '\t' TAB
static const UChar Suppress; //!< BS (SUPPRESS)
static const UChar Delete; //!< DEL
static const UChar Space; //!< ' ' SPACE
static const UChar Escape; //!< ESC Escape
private:
uint32_t m_value;
public:
// note : No preset at this element to prevent unneded set
UChar(void) {
};
UChar(const etk::UChar& _obj) :
m_value(_obj.m_value) {
};
UChar(const char _obj) :
m_value((uint32_t)_obj){
};
UChar(const enum regExpPrivateSection _obj) :
m_value((uint32_t)_obj) {
};
~UChar(void) {}
/*****************************************************
* = assigment
*****************************************************/
const etk::UChar& operator= (const etk::UChar& _obj ) {
m_value = _obj.m_value;
return *this;
};
/*****************************************************
* == operator
*****************************************************/
bool operator== (const etk::UChar& _obj) const {
return m_value == _obj.m_value;
};
bool compareNoCase(const etk::UChar& _obj) const;
/*****************************************************
* != operator
*****************************************************/
bool operator!= (const etk::UChar& _obj) const {
return m_value != _obj.m_value;
};
/*****************************************************
* > < >= <= operator
*****************************************************/
bool operator> (const etk::UChar& _obj) const {
return m_value > _obj.m_value;
};
bool operator>= (const etk::UChar& _obj) const {
return m_value >= _obj.m_value;
};
bool operator< (const etk::UChar& _obj) const {
return m_value < _obj.m_value;
};
bool operator<= (const etk::UChar& _obj) const {
return m_value <= _obj.m_value;
};
/*****************************************************
* += operator
*****************************************************/
const etk::UChar& operator+= (const etk::UChar& _obj) {
m_value += _obj.m_value;
return *this;
};
/*****************************************************
* + operator
*****************************************************/
etk::UChar operator+ (const etk::UChar& _obj) const {
etk::UChar tmp = *this;
tmp += _obj;
return tmp;
};
/*****************************************************
* -= operator
*****************************************************/
const etk::UChar& operator-= (const etk::UChar& _obj) {
if (_obj.m_value >= m_value) {
m_value = 0;
} else {
m_value -= _obj.m_value;
}
return *this;
};
/*****************************************************
* - operator
*****************************************************/
etk::UChar operator- (const etk::UChar& _obj) const {
etk::UChar tmp = *this;
tmp -= _obj;
return tmp;
};
/*****************************************************
* () operator
*****************************************************/
//operator uint32_t() const { return m_value; };
/**
* @brief check if the curent element is white or not : '\t' '\n' '\r' ' '
* @return tue if it is white char
*/
bool isWhiteChar(void) const;
bool isSpecialChar(void) const;
/**
* @brief check if the curent element is number or not
* @return tue if it is a number char
*/
bool isInteger(void) const;
int32_t toInt32(void) const;
void lower(void);
UChar toLower(void) const;
void upper(void);
UChar toUpper(void) const;
UChar changeOrder(void) const;
uint32_t get(void) const { return m_value; };
void set(uint32_t _val) { m_value = _val; };
uint32_t getUtf8(void) const;
int8_t getUtf8(char _output[5]) const;
//etk::Vector<int8_t> GetUtf8(void) const;
int8_t setUtf8(const char* _input);
public:
/**
* @brief Get the size of an utf8 char with his first char.
* @param[in] _input Char to parse
* @return number of char needed
*/
static int8_t theoricUTF8Len(const char _input);
/**
* @brief When parsing a string in a reverse mode, we need to know if we get the first char
* @param[in] _input Char to parse.
* @return true if it was the first char.
*/
static bool theoricUTF8First(const char _input);
};
};
#endif