2008-09-18 13:08:06 +02:00
|
|
|
/*************************************************
|
|
|
|
* Unicode Property Table handler *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
#ifndef _UCP_H
|
|
|
|
#define _UCP_H
|
|
|
|
|
|
|
|
/* This file contains definitions of the property values that are returned by
|
|
|
|
the function _pcre_ucp_findprop(). New values that are added for new releases
|
|
|
|
of Unicode should always be at the end of each enum, for backwards
|
|
|
|
compatibility. */
|
|
|
|
|
|
|
|
/* These are the general character categories. */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
ucp_C, /* Other */
|
|
|
|
ucp_L, /* Letter */
|
|
|
|
ucp_M, /* Mark */
|
|
|
|
ucp_N, /* Number */
|
|
|
|
ucp_P, /* Punctuation */
|
|
|
|
ucp_S, /* Symbol */
|
|
|
|
ucp_Z /* Separator */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* These are the particular character types. */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
ucp_Cc, /* Control */
|
|
|
|
ucp_Cf, /* Format */
|
|
|
|
ucp_Cn, /* Unassigned */
|
|
|
|
ucp_Co, /* Private use */
|
|
|
|
ucp_Cs, /* Surrogate */
|
|
|
|
ucp_Ll, /* Lower case letter */
|
|
|
|
ucp_Lm, /* Modifier letter */
|
|
|
|
ucp_Lo, /* Other letter */
|
|
|
|
ucp_Lt, /* Title case letter */
|
|
|
|
ucp_Lu, /* Upper case letter */
|
|
|
|
ucp_Mc, /* Spacing mark */
|
|
|
|
ucp_Me, /* Enclosing mark */
|
|
|
|
ucp_Mn, /* Non-spacing mark */
|
|
|
|
ucp_Nd, /* Decimal number */
|
|
|
|
ucp_Nl, /* Letter number */
|
|
|
|
ucp_No, /* Other number */
|
|
|
|
ucp_Pc, /* Connector punctuation */
|
|
|
|
ucp_Pd, /* Dash punctuation */
|
|
|
|
ucp_Pe, /* Close punctuation */
|
|
|
|
ucp_Pf, /* Final punctuation */
|
|
|
|
ucp_Pi, /* Initial punctuation */
|
|
|
|
ucp_Po, /* Other punctuation */
|
|
|
|
ucp_Ps, /* Open punctuation */
|
|
|
|
ucp_Sc, /* Currency symbol */
|
|
|
|
ucp_Sk, /* Modifier symbol */
|
|
|
|
ucp_Sm, /* Mathematical symbol */
|
|
|
|
ucp_So, /* Other symbol */
|
|
|
|
ucp_Zl, /* Line separator */
|
|
|
|
ucp_Zp, /* Paragraph separator */
|
|
|
|
ucp_Zs /* Space separator */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* These are the script identifications. */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
ucp_Arabic,
|
|
|
|
ucp_Armenian,
|
|
|
|
ucp_Bengali,
|
|
|
|
ucp_Bopomofo,
|
|
|
|
ucp_Braille,
|
|
|
|
ucp_Buginese,
|
|
|
|
ucp_Buhid,
|
|
|
|
ucp_Canadian_Aboriginal,
|
|
|
|
ucp_Cherokee,
|
|
|
|
ucp_Common,
|
|
|
|
ucp_Coptic,
|
|
|
|
ucp_Cypriot,
|
|
|
|
ucp_Cyrillic,
|
|
|
|
ucp_Deseret,
|
|
|
|
ucp_Devanagari,
|
|
|
|
ucp_Ethiopic,
|
|
|
|
ucp_Georgian,
|
|
|
|
ucp_Glagolitic,
|
|
|
|
ucp_Gothic,
|
|
|
|
ucp_Greek,
|
|
|
|
ucp_Gujarati,
|
|
|
|
ucp_Gurmukhi,
|
|
|
|
ucp_Han,
|
|
|
|
ucp_Hangul,
|
|
|
|
ucp_Hanunoo,
|
|
|
|
ucp_Hebrew,
|
|
|
|
ucp_Hiragana,
|
|
|
|
ucp_Inherited,
|
|
|
|
ucp_Kannada,
|
|
|
|
ucp_Katakana,
|
|
|
|
ucp_Kharoshthi,
|
|
|
|
ucp_Khmer,
|
|
|
|
ucp_Lao,
|
|
|
|
ucp_Latin,
|
|
|
|
ucp_Limbu,
|
|
|
|
ucp_Linear_B,
|
|
|
|
ucp_Malayalam,
|
|
|
|
ucp_Mongolian,
|
|
|
|
ucp_Myanmar,
|
|
|
|
ucp_New_Tai_Lue,
|
|
|
|
ucp_Ogham,
|
|
|
|
ucp_Old_Italic,
|
|
|
|
ucp_Old_Persian,
|
|
|
|
ucp_Oriya,
|
|
|
|
ucp_Osmanya,
|
|
|
|
ucp_Runic,
|
|
|
|
ucp_Shavian,
|
|
|
|
ucp_Sinhala,
|
|
|
|
ucp_Syloti_Nagri,
|
|
|
|
ucp_Syriac,
|
|
|
|
ucp_Tagalog,
|
|
|
|
ucp_Tagbanwa,
|
|
|
|
ucp_Tai_Le,
|
|
|
|
ucp_Tamil,
|
|
|
|
ucp_Telugu,
|
|
|
|
ucp_Thaana,
|
|
|
|
ucp_Thai,
|
|
|
|
ucp_Tibetan,
|
|
|
|
ucp_Tifinagh,
|
|
|
|
ucp_Ugaritic,
|
|
|
|
ucp_Yi,
|
|
|
|
/* New for Unicode 5.0: */
|
|
|
|
ucp_Balinese,
|
|
|
|
ucp_Cuneiform,
|
|
|
|
ucp_Nko,
|
|
|
|
ucp_Phags_Pa,
|
|
|
|
ucp_Phoenician,
|
|
|
|
/* New for Unicode 5.1: */
|
|
|
|
ucp_Carian,
|
|
|
|
ucp_Cham,
|
|
|
|
ucp_Kayah_Li,
|
|
|
|
ucp_Lepcha,
|
|
|
|
ucp_Lycian,
|
|
|
|
ucp_Lydian,
|
|
|
|
ucp_Ol_Chiki,
|
|
|
|
ucp_Rejang,
|
|
|
|
ucp_Saurashtra,
|
|
|
|
ucp_Sundanese,
|
|
|
|
ucp_Vai
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* End of ucp.h */
|