mirror of
https://github.com/pocoproject/poco.git
synced 2025-10-16 18:56:52 +02:00
GH #1586: Upgrade bundled PCRE to 8.40
This commit is contained in:
@@ -40,7 +40,7 @@ public:
|
|||||||
// Implementation note: the following definitions must be kept
|
// Implementation note: the following definitions must be kept
|
||||||
// in sync with those from ucp.h (PCRE).
|
// in sync with those from ucp.h (PCRE).
|
||||||
enum CharacterCategory
|
enum CharacterCategory
|
||||||
/// Unicode 5.0 character categories.
|
/// Unicode character categories.
|
||||||
{
|
{
|
||||||
UCP_OTHER,
|
UCP_OTHER,
|
||||||
UCP_LETTER,
|
UCP_LETTER,
|
||||||
@@ -52,7 +52,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum CharacterType
|
enum CharacterType
|
||||||
/// Unicode 5.0 character types.
|
/// Unicode character types.
|
||||||
{
|
{
|
||||||
UCP_CONTROL,
|
UCP_CONTROL,
|
||||||
UCP_FORMAT,
|
UCP_FORMAT,
|
||||||
@@ -87,7 +87,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum Script
|
enum Script
|
||||||
/// Unicode 5.0 scripts.
|
/// Unicode 7.0 script identifiers.
|
||||||
{
|
{
|
||||||
UCP_ARABIC,
|
UCP_ARABIC,
|
||||||
UCP_ARMENIAN,
|
UCP_ARMENIAN,
|
||||||
@@ -150,11 +150,13 @@ public:
|
|||||||
UCP_TIFINAGH,
|
UCP_TIFINAGH,
|
||||||
UCP_UGARITIC,
|
UCP_UGARITIC,
|
||||||
UCP_YI,
|
UCP_YI,
|
||||||
|
// Unicode 5.0
|
||||||
UCP_BALINESE,
|
UCP_BALINESE,
|
||||||
UCP_CUNEIFORM,
|
UCP_CUNEIFORM,
|
||||||
UCP_NKO,
|
UCP_NKO,
|
||||||
UCP_PHAGS_PA,
|
UCP_PHAGS_PA,
|
||||||
UCP_PHOENICIAN,
|
UCP_PHOENICIAN,
|
||||||
|
// Unicode 5.1
|
||||||
UCP_CARIAN,
|
UCP_CARIAN,
|
||||||
UCP_CHAM,
|
UCP_CHAM,
|
||||||
UCP_KAYAH_LI,
|
UCP_KAYAH_LI,
|
||||||
@@ -165,7 +167,59 @@ public:
|
|||||||
UCP_REJANG,
|
UCP_REJANG,
|
||||||
UCP_SAURASHTRA,
|
UCP_SAURASHTRA,
|
||||||
UCP_SUNDANESE,
|
UCP_SUNDANESE,
|
||||||
UCP_VAI
|
UCP_VAI,
|
||||||
|
// Unicode 5.2
|
||||||
|
UCP_AVESTAN,
|
||||||
|
UCP_BAMUM,
|
||||||
|
UCP_EGYPTIAN_HIEROGLYPHS,
|
||||||
|
UCP_IMPERIAL_ARAMAIC,
|
||||||
|
UCP_INSCRIPTIONAL_PAHLAVI,
|
||||||
|
UCP_INSCRIPTIONAL_PARTHIAN,
|
||||||
|
UCP_JAVANESE,
|
||||||
|
UCP_KAITHI,
|
||||||
|
UCP_LISU,
|
||||||
|
UCP_MEETEI_MAYEK,
|
||||||
|
UCP_OLD_SOUTH_ARABIAN,
|
||||||
|
UCP_OLD_TURKIC,
|
||||||
|
UCP_SAMARITAN,
|
||||||
|
UCP_TAI_THAM,
|
||||||
|
UCP_TAI_VIET,
|
||||||
|
// Unicode 6.0
|
||||||
|
UCP_BATAK,
|
||||||
|
UCP_BRAHMI,
|
||||||
|
UCP_MANDAIC,
|
||||||
|
// Unicode 6.1
|
||||||
|
UCP_CHAKMA,
|
||||||
|
UCP_MEROITIC_CURSIVE,
|
||||||
|
UCP_MEROITIC_HIEROGLYPHS,
|
||||||
|
UCP_MIAO,
|
||||||
|
UCP_SHARADA,
|
||||||
|
UCP_SORA_SOMPENG,
|
||||||
|
UCP_TAKRI,
|
||||||
|
// Unicode 7.0
|
||||||
|
UCP_BASSA_VAH,
|
||||||
|
UCP_CAUCASIAN_ALBANIAN,
|
||||||
|
UCP_DUPLOYAN,
|
||||||
|
UCP_ELBASAN,
|
||||||
|
UCP_GRANTHA,
|
||||||
|
UCP_KHOJKI,
|
||||||
|
UCP_KHUDAWADI,
|
||||||
|
UCP_LINEAR_A,
|
||||||
|
UCP_MAHAJANI,
|
||||||
|
UCP_MANICHAEAN,
|
||||||
|
UCP_MENDE_KIKAKUI,
|
||||||
|
UCP_MODI,
|
||||||
|
UCP_MRO,
|
||||||
|
UCP_NABATAEAN,
|
||||||
|
UCP_OLD_NORTH_ARABIAN,
|
||||||
|
UCP_OLD_PERMIC,
|
||||||
|
UCP_PAHAWH_HMONG,
|
||||||
|
UCP_PALMYRENE,
|
||||||
|
UCP_PSALTER_PAHLAVI,
|
||||||
|
UCP_PAU_CIN_HAU,
|
||||||
|
UCP_SIDDHAM,
|
||||||
|
UCP_TIRHUTA,
|
||||||
|
UCP_WARANG_CITI
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
|
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
/* The current PCRE version information. */
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
#define PCRE_MAJOR 8
|
#define PCRE_MAJOR 8
|
||||||
#define PCRE_MINOR 35
|
#define PCRE_MINOR 40
|
||||||
#define PCRE_PRERELEASE
|
#define PCRE_PRERELEASE
|
||||||
#define PCRE_DATE 2014-04-04
|
#define PCRE_DATE 2017-01-11
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE, the appropriate
|
imported have to be identified as such. When building PCRE, the appropriate
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -283,7 +283,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||||||
#define PACKAGE_NAME "PCRE"
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE 8.35"
|
#define PACKAGE_STRING "PCRE 8.40"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre"
|
#define PACKAGE_TARNAME "pcre"
|
||||||
@@ -292,7 +292,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "8.35"
|
#define PACKAGE_VERSION "8.40"
|
||||||
|
|
||||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
@@ -394,7 +394,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "8.35"
|
#define VERSION "8.40"
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
@@ -2735,9 +2735,10 @@ for (;;)
|
|||||||
condcode == OP_DNRREF)
|
condcode == OP_DNRREF)
|
||||||
return PCRE_ERROR_DFA_UCOND;
|
return PCRE_ERROR_DFA_UCOND;
|
||||||
|
|
||||||
/* The DEFINE condition is always false */
|
/* The DEFINE condition is always false, and the assertion (?!) is
|
||||||
|
converted to OP_FAIL. */
|
||||||
|
|
||||||
if (condcode == OP_DEF)
|
if (condcode == OP_DEF || condcode == OP_FAIL)
|
||||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
|
||||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||||
@@ -3241,7 +3242,7 @@ md->callout_data = NULL;
|
|||||||
|
|
||||||
if (extra_data != NULL)
|
if (extra_data != NULL)
|
||||||
{
|
{
|
||||||
unsigned int flags = extra_data->flags;
|
unsigned long int flags = extra_data->flags;
|
||||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||||
study = (const pcre_study_data *)extra_data->study_data;
|
study = (const pcre_study_data *)extra_data->study_data;
|
||||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
|
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
|
||||||
|
@@ -1137,88 +1137,81 @@ for (;;)
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (offset < md->offset_max)
|
if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
|
||||||
|
|
||||||
|
matched_once = FALSE;
|
||||||
|
code_offset = (int)(ecode - md->start_code);
|
||||||
|
|
||||||
|
save_offset1 = md->offset_vector[offset];
|
||||||
|
save_offset2 = md->offset_vector[offset+1];
|
||||||
|
save_offset3 = md->offset_vector[md->offset_end - number];
|
||||||
|
save_capture_last = md->capture_last;
|
||||||
|
|
||||||
|
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
||||||
|
|
||||||
|
/* Each time round the loop, save the current subject position for use
|
||||||
|
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||||
|
restart it with a new subject starting position, remembering that we had
|
||||||
|
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||||
|
usual. If we haven't matched any alternatives in any iteration, check to
|
||||||
|
see if a previous iteration matched. If so, the group has matched;
|
||||||
|
continue from afterwards. Otherwise it has failed; restore the previous
|
||||||
|
capture values before returning NOMATCH. */
|
||||||
|
|
||||||
|
for (;;)
|
||||||
{
|
{
|
||||||
matched_once = FALSE;
|
md->offset_vector[md->offset_end - number] =
|
||||||
code_offset = (int)(ecode - md->start_code);
|
(int)(eptr - md->start_subject);
|
||||||
|
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
||||||
save_offset1 = md->offset_vector[offset];
|
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
||||||
save_offset2 = md->offset_vector[offset+1];
|
eptrb, RM63);
|
||||||
save_offset3 = md->offset_vector[md->offset_end - number];
|
if (rrc == MATCH_KETRPOS)
|
||||||
save_capture_last = md->capture_last;
|
|
||||||
|
|
||||||
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
|
||||||
|
|
||||||
/* Each time round the loop, save the current subject position for use
|
|
||||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
|
||||||
restart it with a new subject starting position, remembering that we had
|
|
||||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
|
||||||
usual. If we haven't matched any alternatives in any iteration, check to
|
|
||||||
see if a previous iteration matched. If so, the group has matched;
|
|
||||||
continue from afterwards. Otherwise it has failed; restore the previous
|
|
||||||
capture values before returning NOMATCH. */
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
{
|
||||||
md->offset_vector[md->offset_end - number] =
|
offset_top = md->end_offset_top;
|
||||||
(int)(eptr - md->start_subject);
|
ecode = md->start_code + code_offset;
|
||||||
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
save_capture_last = md->capture_last;
|
||||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
matched_once = TRUE;
|
||||||
eptrb, RM63);
|
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||||
if (rrc == MATCH_KETRPOS)
|
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||||
{
|
{
|
||||||
offset_top = md->end_offset_top;
|
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||||
eptr = md->end_match_ptr;
|
break;
|
||||||
ecode = md->start_code + code_offset;
|
|
||||||
save_capture_last = md->capture_last;
|
|
||||||
matched_once = TRUE;
|
|
||||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
eptr = md->end_match_ptr;
|
||||||
/* See comment in the code for capturing groups above about handling
|
continue;
|
||||||
THEN. */
|
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
|
||||||
{
|
|
||||||
next = ecode + GET(ecode,1);
|
|
||||||
if (md->start_match_ptr < next &&
|
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
|
||||||
rrc = MATCH_NOMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
|
||||||
md->capture_last = save_capture_last;
|
|
||||||
ecode += GET(ecode, 1);
|
|
||||||
if (*ecode != OP_ALT) break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!matched_once)
|
/* See comment in the code for capturing groups above about handling
|
||||||
|
THEN. */
|
||||||
|
|
||||||
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
md->offset_vector[offset] = save_offset1;
|
next = ecode + GET(ecode,1);
|
||||||
md->offset_vector[offset+1] = save_offset2;
|
if (md->start_match_ptr < next &&
|
||||||
md->offset_vector[md->offset_end - number] = save_offset3;
|
(*ecode == OP_ALT || *next == OP_ALT))
|
||||||
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allow_zero || matched_once)
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
{
|
md->capture_last = save_capture_last;
|
||||||
ecode += 1 + LINK_SIZE;
|
ecode += GET(ecode, 1);
|
||||||
break;
|
if (*ecode != OP_ALT) break;
|
||||||
}
|
|
||||||
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
if (!matched_once)
|
||||||
as a non-capturing bracket. */
|
{
|
||||||
|
md->offset_vector[offset] = save_offset1;
|
||||||
|
md->offset_vector[offset+1] = save_offset2;
|
||||||
|
md->offset_vector[md->offset_end - number] = save_offset3;
|
||||||
|
}
|
||||||
|
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
if (allow_zero || matched_once)
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
{
|
||||||
|
ecode += 1 + LINK_SIZE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
|
||||||
|
|
||||||
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
||||||
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
||||||
@@ -1242,10 +1235,15 @@ for (;;)
|
|||||||
if (rrc == MATCH_KETRPOS)
|
if (rrc == MATCH_KETRPOS)
|
||||||
{
|
{
|
||||||
offset_top = md->end_offset_top;
|
offset_top = md->end_offset_top;
|
||||||
eptr = md->end_match_ptr;
|
|
||||||
ecode = md->start_code + code_offset;
|
ecode = md->start_code + code_offset;
|
||||||
matched_once = TRUE;
|
matched_once = TRUE;
|
||||||
mstart = md->start_match_ptr; /* In case \K reset it */
|
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||||
|
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||||
|
{
|
||||||
|
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
eptr = md->end_match_ptr;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1379,6 +1377,7 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_DEF: /* DEFINE - always false */
|
case OP_DEF: /* DEFINE - always false */
|
||||||
|
case OP_FAIL: /* From optimized (?!) condition */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||||
@@ -1395,8 +1394,11 @@ for (;;)
|
|||||||
condition = TRUE;
|
condition = TRUE;
|
||||||
|
|
||||||
/* Advance ecode past the assertion to the start of the first branch,
|
/* Advance ecode past the assertion to the start of the first branch,
|
||||||
but adjust it so that the general choosing code below works. */
|
but adjust it so that the general choosing code below works. If the
|
||||||
|
assertion has a quantifier that allows zero repeats we must skip over
|
||||||
|
the BRAZERO. This is a lunatic thing to do, but somebody did! */
|
||||||
|
|
||||||
|
if (*ecode == OP_BRAZERO) ecode++;
|
||||||
ecode += GET(ecode, 1);
|
ecode += GET(ecode, 1);
|
||||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||||
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
||||||
@@ -1465,7 +1467,18 @@ for (;;)
|
|||||||
md->offset_vector[offset] =
|
md->offset_vector[offset] =
|
||||||
md->offset_vector[md->offset_end - number];
|
md->offset_vector[md->offset_end - number];
|
||||||
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
|
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
|
||||||
if (offset_top <= offset) offset_top = offset + 2;
|
|
||||||
|
/* If this group is at or above the current highwater mark, ensure that
|
||||||
|
any groups between the current high water mark and this group are marked
|
||||||
|
unset and then update the high water mark. */
|
||||||
|
|
||||||
|
if (offset >= offset_top)
|
||||||
|
{
|
||||||
|
register int *iptr = md->offset_vector + offset_top;
|
||||||
|
register int *iend = md->offset_vector + offset;
|
||||||
|
while (iptr < iend) *iptr++ = -1;
|
||||||
|
offset_top = offset + 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ecode += 1 + IMM2_SIZE;
|
ecode += 1 + IMM2_SIZE;
|
||||||
break;
|
break;
|
||||||
@@ -1817,7 +1830,11 @@ for (;;)
|
|||||||
are defined in a range that can be tested for. */
|
are defined in a range that can be tested for. */
|
||||||
|
|
||||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||||
|
{
|
||||||
|
if (new_recursive.offset_save != stacksave)
|
||||||
|
(PUBL(free))(new_recursive.offset_save);
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
}
|
||||||
|
|
||||||
/* Any return code other than NOMATCH is an error. */
|
/* Any return code other than NOMATCH is an error. */
|
||||||
|
|
||||||
@@ -1980,6 +1997,19 @@ for (;;)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
||||||
|
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
||||||
|
at a time from the outer level, thus saving stack. This must precede the
|
||||||
|
empty string test - in this case that test is done at the outer level. */
|
||||||
|
|
||||||
|
if (*ecode == OP_KETRPOS)
|
||||||
|
{
|
||||||
|
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||||
|
md->end_match_ptr = eptr;
|
||||||
|
md->end_offset_top = offset_top;
|
||||||
|
RRETURN(MATCH_KETRPOS);
|
||||||
|
}
|
||||||
|
|
||||||
/* For an ordinary non-repeating ket, just continue at this level. This
|
/* For an ordinary non-repeating ket, just continue at this level. This
|
||||||
also happens for a repeating ket if no characters were matched in the
|
also happens for a repeating ket if no characters were matched in the
|
||||||
group. This is the forcible breaking of infinite loops as implemented in
|
group. This is the forcible breaking of infinite loops as implemented in
|
||||||
@@ -2002,18 +2032,6 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
|
||||||
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
|
||||||
at a time from the outer level, thus saving stack. */
|
|
||||||
|
|
||||||
if (*ecode == OP_KETRPOS)
|
|
||||||
{
|
|
||||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
|
||||||
md->end_match_ptr = eptr;
|
|
||||||
md->end_offset_top = offset_top;
|
|
||||||
RRETURN(MATCH_KETRPOS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The normal repeating kets try the rest of the pattern or restart from
|
/* The normal repeating kets try the rest of the pattern or restart from
|
||||||
the preceding bracket, in the appropriate order. In the second case, we can
|
the preceding bracket, in the appropriate order. In the second case, we can
|
||||||
use tail recursion to avoid using another stack frame, unless we have an
|
use tail recursion to avoid using another stack frame, unless we have an
|
||||||
@@ -3466,7 +3484,7 @@ for (;;)
|
|||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
if (eptr <= pp) goto TAIL_RECURSE;
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
#ifdef SUPPORT_UCP
|
#ifdef SUPPORT_UCP
|
||||||
@@ -3887,7 +3905,7 @@ for (;;)
|
|||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
if (eptr <= pp) goto TAIL_RECURSE;
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
eptr--;
|
eptr--;
|
||||||
@@ -4022,7 +4040,7 @@ for (;;)
|
|||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
if (eptr <= pp) goto TAIL_RECURSE;
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
eptr--;
|
eptr--;
|
||||||
@@ -5593,7 +5611,7 @@ for (;;)
|
|||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
if (eptr <= pp) goto TAIL_RECURSE;
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
eptr--;
|
eptr--;
|
||||||
@@ -5635,12 +5653,17 @@ for (;;)
|
|||||||
|
|
||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
|
/* We use <= pp rather than == pp to detect the start of the run while
|
||||||
|
backtracking because the use of \C in UTF mode can cause BACKCHAR to
|
||||||
|
move back past pp. This is just palliative; the use of \C in UTF mode
|
||||||
|
is fraught with danger. */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
int lgb, rgb;
|
||||||
PCRE_PUCHAR fptr;
|
PCRE_PUCHAR fptr;
|
||||||
|
|
||||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
|
|
||||||
@@ -5658,7 +5681,7 @@ for (;;)
|
|||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
fptr = eptr - 1;
|
fptr = eptr - 1;
|
||||||
if (!utf) c = *fptr; else
|
if (!utf) c = *fptr; else
|
||||||
{
|
{
|
||||||
@@ -5682,54 +5705,25 @@ for (;;)
|
|||||||
switch(ctype)
|
switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if (max < INT_MAX)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
if (eptr >= md->end_subject)
|
||||||
{
|
{
|
||||||
if (eptr >= md->end_subject)
|
SCHECK_PARTIAL();
|
||||||
{
|
break;
|
||||||
SCHECK_PARTIAL();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (IS_NEWLINE(eptr)) break;
|
|
||||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
|
||||||
eptr + 1 >= md->end_subject &&
|
|
||||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
|
||||||
NLBLOCK->nllen == 2 &&
|
|
||||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
|
||||||
{
|
|
||||||
md->hitend = TRUE;
|
|
||||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
|
||||||
}
|
|
||||||
eptr++;
|
|
||||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
|
||||||
}
|
}
|
||||||
}
|
if (IS_NEWLINE(eptr)) break;
|
||||||
|
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||||
/* Handle unlimited UTF-8 repeat */
|
eptr + 1 >= md->end_subject &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
else
|
NLBLOCK->nllen == 2 &&
|
||||||
{
|
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||||
for (i = min; i < max; i++)
|
|
||||||
{
|
{
|
||||||
if (eptr >= md->end_subject)
|
md->hitend = TRUE;
|
||||||
{
|
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||||
SCHECK_PARTIAL();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (IS_NEWLINE(eptr)) break;
|
|
||||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
|
||||||
eptr + 1 >= md->end_subject &&
|
|
||||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
|
||||||
NLBLOCK->nllen == 2 &&
|
|
||||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
|
||||||
{
|
|
||||||
md->hitend = TRUE;
|
|
||||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
|
||||||
}
|
|
||||||
eptr++;
|
|
||||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
|
||||||
}
|
}
|
||||||
|
eptr++;
|
||||||
|
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -5937,7 +5931,7 @@ for (;;)
|
|||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
if (eptr <= pp) goto TAIL_RECURSE;
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
eptr--;
|
eptr--;
|
||||||
@@ -6520,7 +6514,7 @@ tables = re->tables;
|
|||||||
|
|
||||||
if (extra_data != NULL)
|
if (extra_data != NULL)
|
||||||
{
|
{
|
||||||
register unsigned int flags = extra_data->flags;
|
unsigned long int flags = extra_data->flags;
|
||||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||||
study = (const pcre_study_data *)extra_data->study_data;
|
study = (const pcre_study_data *)extra_data->study_data;
|
||||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
|
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
|
||||||
@@ -6692,7 +6686,8 @@ if (md->offset_vector != NULL)
|
|||||||
register int *iend = iptr - re->top_bracket;
|
register int *iend = iptr - re->top_bracket;
|
||||||
if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
|
if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
|
||||||
while (--iptr >= iend) *iptr = -1;
|
while (--iptr >= iend) *iptr = -1;
|
||||||
md->offset_vector[0] = md->offset_vector[1] = -1;
|
if (offsetcount > 0) md->offset_vector[0] = -1;
|
||||||
|
if (offsetcount > 1) md->offset_vector[1] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set up the first character to match, if available. The first_char value is
|
/* Set up the first character to match, if available. The first_char value is
|
||||||
|
@@ -247,6 +247,7 @@ Arguments:
|
|||||||
code the compiled regex
|
code the compiled regex
|
||||||
stringname the name of the capturing substring
|
stringname the name of the capturing substring
|
||||||
ovector the vector of matched substrings
|
ovector the vector of matched substrings
|
||||||
|
stringcount number of captured substrings
|
||||||
|
|
||||||
Returns: the number of the first that is set,
|
Returns: the number of the first that is set,
|
||||||
or the number of the last one if none are set,
|
or the number of the last one if none are set,
|
||||||
@@ -255,13 +256,16 @@ Returns: the number of the first that is set,
|
|||||||
|
|
||||||
#if defined COMPILE_PCRE8
|
#if defined COMPILE_PCRE8
|
||||||
static int
|
static int
|
||||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
get_first_set(const pcre *code, const char *stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
#elif defined COMPILE_PCRE16
|
#elif defined COMPILE_PCRE16
|
||||||
static int
|
static int
|
||||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
|
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
#elif defined COMPILE_PCRE32
|
#elif defined COMPILE_PCRE32
|
||||||
static int
|
static int
|
||||||
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
|
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||||
@@ -292,7 +296,7 @@ if (entrysize <= 0) return entrysize;
|
|||||||
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||||
{
|
{
|
||||||
int n = GET2(entry, 0);
|
int n = GET2(entry, 0);
|
||||||
if (ovector[n*2] >= 0) return n;
|
if (n < stringcount && ovector[n*2] >= 0) return n;
|
||||||
}
|
}
|
||||||
return GET2(entry, 0);
|
return GET2(entry, 0);
|
||||||
}
|
}
|
||||||
@@ -399,7 +403,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
|||||||
PCRE_UCHAR32 *buffer, int size)
|
PCRE_UCHAR32 *buffer, int size)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int n = get_first_set(code, stringname, ovector);
|
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
#if defined COMPILE_PCRE8
|
#if defined COMPILE_PCRE8
|
||||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
@@ -454,7 +458,10 @@ pcre_uchar **stringlist;
|
|||||||
pcre_uchar *p;
|
pcre_uchar *p;
|
||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
for (i = 0; i < double_count; i += 2)
|
||||||
size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
|
{
|
||||||
|
size += sizeof(pcre_uchar *) + IN_UCHARS(1);
|
||||||
|
if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
|
||||||
|
}
|
||||||
|
|
||||||
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
@@ -470,7 +477,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);
|
|||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
for (i = 0; i < double_count; i += 2)
|
||||||
{
|
{
|
||||||
int len = ovector[i+1] - ovector[i];
|
int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||||
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||||
*stringlist++ = p;
|
*stringlist++ = p;
|
||||||
p += len;
|
p += len;
|
||||||
@@ -616,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
|||||||
PCRE_SPTR32 *stringptr)
|
PCRE_SPTR32 *stringptr)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int n = get_first_set(code, stringname, ovector);
|
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
#if defined COMPILE_PCRE8
|
#if defined COMPILE_PCRE8
|
||||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2014 University of Cambridge
|
Copyright (c) 1997-2016 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not
|
|||||||
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
|
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
|
||||||
by "configure". */
|
by "configure". */
|
||||||
|
|
||||||
#if HAVE_STDINT_H
|
#if defined HAVE_STDINT_H
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#elif HAVE_INTTYPES_H
|
#elif defined HAVE_INTTYPES_H
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -275,7 +275,7 @@ pcre.h(.in) and disable (comment out) this message. */
|
|||||||
|
|
||||||
typedef pcre_uint16 pcre_uchar;
|
typedef pcre_uint16 pcre_uchar;
|
||||||
#define UCHAR_SHIFT (1)
|
#define UCHAR_SHIFT (1)
|
||||||
#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
|
#define IN_UCHARS(x) ((x) * 2)
|
||||||
#define MAX_255(c) ((c) <= 255u)
|
#define MAX_255(c) ((c) <= 255u)
|
||||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||||
|
|
||||||
@@ -283,7 +283,7 @@ typedef pcre_uint16 pcre_uchar;
|
|||||||
|
|
||||||
typedef pcre_uint32 pcre_uchar;
|
typedef pcre_uint32 pcre_uchar;
|
||||||
#define UCHAR_SHIFT (2)
|
#define UCHAR_SHIFT (2)
|
||||||
#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
|
#define IN_UCHARS(x) ((x) * 4)
|
||||||
#define MAX_255(c) ((c) <= 255u)
|
#define MAX_255(c) ((c) <= 255u)
|
||||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||||
|
|
||||||
@@ -984,7 +984,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
|||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
|
|
||||||
#define HSPACE_LIST \
|
#define HSPACE_LIST \
|
||||||
CHAR_HT, CHAR_SPACE, 0xa0, \
|
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||||
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||||
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
||||||
NOTACHAR
|
NOTACHAR
|
||||||
@@ -1010,7 +1010,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
|||||||
#define HSPACE_BYTE_CASES \
|
#define HSPACE_BYTE_CASES \
|
||||||
case CHAR_HT: \
|
case CHAR_HT: \
|
||||||
case CHAR_SPACE: \
|
case CHAR_SPACE: \
|
||||||
case 0xa0 /* NBSP */
|
case CHAR_NBSP
|
||||||
|
|
||||||
#define HSPACE_CASES \
|
#define HSPACE_CASES \
|
||||||
HSPACE_BYTE_CASES: \
|
HSPACE_BYTE_CASES: \
|
||||||
@@ -1037,11 +1037,12 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
|||||||
/* ------ EBCDIC environments ------ */
|
/* ------ EBCDIC environments ------ */
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE
|
#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
|
||||||
|
|
||||||
#define HSPACE_BYTE_CASES \
|
#define HSPACE_BYTE_CASES \
|
||||||
case CHAR_HT: \
|
case CHAR_HT: \
|
||||||
case CHAR_SPACE
|
case CHAR_SPACE: \
|
||||||
|
case CHAR_NBSP
|
||||||
|
|
||||||
#define HSPACE_CASES HSPACE_BYTE_CASES
|
#define HSPACE_CASES HSPACE_BYTE_CASES
|
||||||
|
|
||||||
@@ -1215,6 +1216,7 @@ same code point. */
|
|||||||
|
|
||||||
#define CHAR_ESC '\047'
|
#define CHAR_ESC '\047'
|
||||||
#define CHAR_DEL '\007'
|
#define CHAR_DEL '\007'
|
||||||
|
#define CHAR_NBSP '\x41'
|
||||||
#define STR_ESC "\047"
|
#define STR_ESC "\047"
|
||||||
#define STR_DEL "\007"
|
#define STR_DEL "\007"
|
||||||
|
|
||||||
@@ -1229,6 +1231,7 @@ a positive value. */
|
|||||||
#define CHAR_NEL ((unsigned char)'\x85')
|
#define CHAR_NEL ((unsigned char)'\x85')
|
||||||
#define CHAR_ESC '\033'
|
#define CHAR_ESC '\033'
|
||||||
#define CHAR_DEL '\177'
|
#define CHAR_DEL '\177'
|
||||||
|
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||||
|
|
||||||
#define STR_LF "\n"
|
#define STR_LF "\n"
|
||||||
#define STR_NL STR_LF
|
#define STR_NL STR_LF
|
||||||
@@ -1606,6 +1609,7 @@ only. */
|
|||||||
#define CHAR_VERTICAL_LINE '\174'
|
#define CHAR_VERTICAL_LINE '\174'
|
||||||
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
||||||
#define CHAR_TILDE '\176'
|
#define CHAR_TILDE '\176'
|
||||||
|
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||||
|
|
||||||
#define STR_HT "\011"
|
#define STR_HT "\011"
|
||||||
#define STR_VT "\013"
|
#define STR_VT "\013"
|
||||||
@@ -1762,6 +1766,10 @@ only. */
|
|||||||
|
|
||||||
/* Escape items that are just an encoding of a particular data value. */
|
/* Escape items that are just an encoding of a particular data value. */
|
||||||
|
|
||||||
|
#ifndef ESC_a
|
||||||
|
#define ESC_a CHAR_BEL
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef ESC_e
|
#ifndef ESC_e
|
||||||
#define ESC_e CHAR_ESC
|
#define ESC_e CHAR_ESC
|
||||||
#endif
|
#endif
|
||||||
@@ -2281,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
|||||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
||||||
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
|
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
|
||||||
ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERRCOUNT };
|
ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
|
||||||
|
|
||||||
/* JIT compiling modes. The function list is indexed by them. */
|
/* JIT compiling modes. The function list is indexed by them. */
|
||||||
|
|
||||||
@@ -2446,6 +2454,8 @@ typedef struct compile_data {
|
|||||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||||
BOOL dupnames; /* Duplicate names exist */
|
BOOL dupnames; /* Duplicate names exist */
|
||||||
|
BOOL dupgroups; /* Duplicate groups exist: (?| found */
|
||||||
|
BOOL iscondassert; /* Next assert is a condition */
|
||||||
int nltype; /* Newline type */
|
int nltype; /* Newline type */
|
||||||
int nllen; /* Newline string length */
|
int nllen; /* Newline string length */
|
||||||
pcre_uchar nl[4]; /* Newline string when fixed length */
|
pcre_uchar nl[4]; /* Newline string when fixed length */
|
||||||
@@ -2459,6 +2469,13 @@ typedef struct branch_chain {
|
|||||||
pcre_uchar *current_branch;
|
pcre_uchar *current_branch;
|
||||||
} branch_chain;
|
} branch_chain;
|
||||||
|
|
||||||
|
/* Structure for mutual recursion detection. */
|
||||||
|
|
||||||
|
typedef struct recurse_check {
|
||||||
|
struct recurse_check *prev;
|
||||||
|
const pcre_uchar *group;
|
||||||
|
} recurse_check;
|
||||||
|
|
||||||
/* Structure for items in a linked list that represents an explicit recursive
|
/* Structure for items in a linked list that represents an explicit recursive
|
||||||
call within the pattern; used by pcre_exec(). */
|
call within the pattern; used by pcre_exec(). */
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -67,7 +67,8 @@ Arguments:
|
|||||||
code pointer to start of group (the bracket)
|
code pointer to start of group (the bracket)
|
||||||
startcode pointer to start of the whole pattern's code
|
startcode pointer to start of the whole pattern's code
|
||||||
options the compiling options
|
options the compiling options
|
||||||
int RECURSE depth
|
recurses chain of recurse_check to catch mutual recursion
|
||||||
|
countptr pointer to call count (to catch over complexity)
|
||||||
|
|
||||||
Returns: the minimum length
|
Returns: the minimum length
|
||||||
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
||||||
@@ -77,15 +78,19 @@ Returns: the minimum length
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
|
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
|
||||||
const pcre_uchar *startcode, int options, int recurse_depth)
|
const pcre_uchar *startcode, int options, recurse_check *recurses,
|
||||||
|
int *countptr)
|
||||||
{
|
{
|
||||||
int length = -1;
|
int length = -1;
|
||||||
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
|
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
|
||||||
BOOL utf = (options & PCRE_UTF8) != 0;
|
BOOL utf = (options & PCRE_UTF8) != 0;
|
||||||
BOOL had_recurse = FALSE;
|
BOOL had_recurse = FALSE;
|
||||||
|
recurse_check this_recurse;
|
||||||
register int branchlength = 0;
|
register int branchlength = 0;
|
||||||
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
|
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
|
||||||
|
|
||||||
|
if ((*countptr)++ > 1000) return -1; /* too complex */
|
||||||
|
|
||||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
||||||
|
|
||||||
@@ -127,7 +132,7 @@ for (;;)
|
|||||||
case OP_SBRAPOS:
|
case OP_SBRAPOS:
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_ONCE_NC:
|
case OP_ONCE_NC:
|
||||||
d = find_minlength(re, cc, startcode, options, recurse_depth);
|
d = find_minlength(re, cc, startcode, options, recurses, countptr);
|
||||||
if (d < 0) return d;
|
if (d < 0) return d;
|
||||||
branchlength += d;
|
branchlength += d;
|
||||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||||
@@ -390,7 +395,7 @@ for (;;)
|
|||||||
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
|
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
|
||||||
if (cs == NULL) return -2;
|
if (cs == NULL) return -2;
|
||||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||||
if (cc > cs && cc < ce)
|
if (cc > cs && cc < ce) /* Simple recursion */
|
||||||
{
|
{
|
||||||
d = 0;
|
d = 0;
|
||||||
had_recurse = TRUE;
|
had_recurse = TRUE;
|
||||||
@@ -398,8 +403,23 @@ for (;;)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int dd = find_minlength(re, cs, startcode, options, recurse_depth);
|
recurse_check *r = recurses;
|
||||||
if (dd < d) d = dd;
|
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||||
|
if (r != NULL) /* Mutual recursion */
|
||||||
|
{
|
||||||
|
d = 0;
|
||||||
|
had_recurse = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int dd;
|
||||||
|
this_recurse.prev = recurses;
|
||||||
|
this_recurse.group = cs;
|
||||||
|
dd = find_minlength(re, cs, startcode, options, &this_recurse,
|
||||||
|
countptr);
|
||||||
|
if (dd < d) d = dd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
slot += re->name_entry_size;
|
slot += re->name_entry_size;
|
||||||
}
|
}
|
||||||
@@ -415,14 +435,27 @@ for (;;)
|
|||||||
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
|
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
|
||||||
if (cs == NULL) return -2;
|
if (cs == NULL) return -2;
|
||||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||||
if (cc > cs && cc < ce)
|
if (cc > cs && cc < ce) /* Simple recursion */
|
||||||
{
|
{
|
||||||
d = 0;
|
d = 0;
|
||||||
had_recurse = TRUE;
|
had_recurse = TRUE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
d = find_minlength(re, cs, startcode, options, recurse_depth);
|
recurse_check *r = recurses;
|
||||||
|
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||||
|
if (r != NULL) /* Mutual recursion */
|
||||||
|
{
|
||||||
|
d = 0;
|
||||||
|
had_recurse = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this_recurse.prev = recurses;
|
||||||
|
this_recurse.group = cs;
|
||||||
|
d = find_minlength(re, cs, startcode, options, &this_recurse,
|
||||||
|
countptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else d = 0;
|
else d = 0;
|
||||||
@@ -471,12 +504,21 @@ for (;;)
|
|||||||
case OP_RECURSE:
|
case OP_RECURSE:
|
||||||
cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
|
cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
|
||||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||||
if ((cc > cs && cc < ce) || recurse_depth > 10)
|
if (cc > cs && cc < ce) /* Simple recursion */
|
||||||
had_recurse = TRUE;
|
had_recurse = TRUE;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
branchlength += find_minlength(re, cs, startcode, options,
|
recurse_check *r = recurses;
|
||||||
recurse_depth + 1);
|
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||||
|
if (r != NULL) /* Mutual recursion */
|
||||||
|
had_recurse = TRUE;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this_recurse.prev = recurses;
|
||||||
|
this_recurse.group = cs;
|
||||||
|
branchlength += find_minlength(re, cs, startcode, options,
|
||||||
|
&this_recurse, countptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
cc += 1 + LINK_SIZE;
|
cc += 1 + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
@@ -860,7 +902,6 @@ do
|
|||||||
case OP_NOTUPTOI:
|
case OP_NOTUPTOI:
|
||||||
case OP_NOT_HSPACE:
|
case OP_NOT_HSPACE:
|
||||||
case OP_NOT_VSPACE:
|
case OP_NOT_VSPACE:
|
||||||
case OP_PROP:
|
|
||||||
case OP_PRUNE:
|
case OP_PRUNE:
|
||||||
case OP_PRUNE_ARG:
|
case OP_PRUNE_ARG:
|
||||||
case OP_RECURSE:
|
case OP_RECURSE:
|
||||||
@@ -878,6 +919,31 @@ do
|
|||||||
case OP_THEN_ARG:
|
case OP_THEN_ARG:
|
||||||
return SSB_FAIL;
|
return SSB_FAIL;
|
||||||
|
|
||||||
|
/* A "real" property test implies no starting bits, but the fake property
|
||||||
|
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||||
|
are used for characters with more than one "other case", so there is no
|
||||||
|
point in recognizing them for OP_NOTPROP. */
|
||||||
|
|
||||||
|
case OP_PROP:
|
||||||
|
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||||
|
{
|
||||||
|
const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||||
|
while ((c = *p++) < NOTACHAR)
|
||||||
|
{
|
||||||
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
pcre_uchar buff[6];
|
||||||
|
(void)PRIV(ord2utf)(c, buff);
|
||||||
|
c = buff[0];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try_next = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
/* We can ignore word boundary tests. */
|
/* We can ignore word boundary tests. */
|
||||||
|
|
||||||
case OP_WORD_BOUNDARY:
|
case OP_WORD_BOUNDARY:
|
||||||
@@ -1103,24 +1169,17 @@ do
|
|||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
/* The cbit_space table has vertical tab as whitespace; we no longer
|
||||||
ensure it is set as not whitespace. Luckily, the code value is the same
|
have to play fancy tricks because Perl added VT to its whitespace at
|
||||||
(0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
release 5.18. PCRE added it at release 8.34. */
|
||||||
|
|
||||||
case OP_NOT_WHITESPACE:
|
case OP_NOT_WHITESPACE:
|
||||||
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
|
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
|
||||||
start_bits[1] |= 0x08;
|
|
||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* The cbit_space table has vertical tab as whitespace; we have to not
|
|
||||||
set it from the table. Luckily, the code value is the same (0x0b) in
|
|
||||||
ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
|
||||||
|
|
||||||
case OP_WHITESPACE:
|
case OP_WHITESPACE:
|
||||||
c = start_bits[1]; /* Save in case it was already set */
|
|
||||||
set_type_bits(start_bits, cbit_space, table_limit, cd);
|
set_type_bits(start_bits, cbit_space, table_limit, cd);
|
||||||
start_bits[1] = (start_bits[1] & ~0x08) | c;
|
|
||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -1309,7 +1368,7 @@ do
|
|||||||
for (c = 0; c < 16; c++) start_bits[c] |= map[c];
|
for (c = 0; c < 16; c++) start_bits[c] |= map[c];
|
||||||
for (c = 128; c < 256; c++)
|
for (c = 128; c < 256; c++)
|
||||||
{
|
{
|
||||||
if ((map[c/8] && (1 << (c&7))) != 0)
|
if ((map[c/8] & (1 << (c&7))) != 0)
|
||||||
{
|
{
|
||||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||||
@@ -1397,6 +1456,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int min;
|
int min;
|
||||||
|
int count = 0;
|
||||||
BOOL bits_set = FALSE;
|
BOOL bits_set = FALSE;
|
||||||
pcre_uint8 start_bits[32];
|
pcre_uint8 start_bits[32];
|
||||||
PUBL(extra) *extra = NULL;
|
PUBL(extra) *extra = NULL;
|
||||||
@@ -1483,7 +1543,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
|
|||||||
|
|
||||||
/* Find the minimum length of subject string. */
|
/* Find the minimum length of subject string. */
|
||||||
|
|
||||||
switch(min = find_minlength(re, code, code, re->options, 0))
|
switch(min = find_minlength(re, code, code, re->options, NULL, &count))
|
||||||
{
|
{
|
||||||
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
|
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
|
||||||
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
|
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
|
||||||
|
@@ -209,6 +209,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||||
|
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||||
@@ -219,6 +220,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_C0 STR_C "\0"
|
#define STRING_C0 STR_C "\0"
|
||||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||||
#define STRING_Cc0 STR_C STR_c "\0"
|
#define STRING_Cc0 STR_C STR_c "\0"
|
||||||
#define STRING_Cf0 STR_C STR_f "\0"
|
#define STRING_Cf0 STR_C STR_f "\0"
|
||||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||||
@@ -234,11 +236,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||||
|
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||||
|
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||||
|
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||||
@@ -258,12 +263,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||||
|
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||||
|
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||||
#define STRING_L0 STR_L "\0"
|
#define STRING_L0 STR_L "\0"
|
||||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||||
|
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
|
||||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||||
#define STRING_Ll0 STR_L STR_l "\0"
|
#define STRING_Ll0 STR_L STR_l "\0"
|
||||||
@@ -274,18 +282,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||||
#define STRING_M0 STR_M "\0"
|
#define STRING_M0 STR_M "\0"
|
||||||
|
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||||
|
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||||
#define STRING_Mc0 STR_M STR_c "\0"
|
#define STRING_Mc0 STR_M STR_c "\0"
|
||||||
#define STRING_Me0 STR_M STR_e "\0"
|
#define STRING_Me0 STR_M STR_e "\0"
|
||||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||||
|
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||||
#define STRING_Mn0 STR_M STR_n "\0"
|
#define STRING_Mn0 STR_M STR_n "\0"
|
||||||
|
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||||
#define STRING_N0 STR_N "\0"
|
#define STRING_N0 STR_N "\0"
|
||||||
|
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||||
#define STRING_Nd0 STR_N STR_d "\0"
|
#define STRING_Nd0 STR_N STR_d "\0"
|
||||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||||
@@ -294,12 +308,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||||
|
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||||
#define STRING_P0 STR_P "\0"
|
#define STRING_P0 STR_P "\0"
|
||||||
|
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||||
|
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||||
|
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
|
||||||
#define STRING_Pc0 STR_P STR_c "\0"
|
#define STRING_Pc0 STR_P STR_c "\0"
|
||||||
#define STRING_Pd0 STR_P STR_d "\0"
|
#define STRING_Pd0 STR_P STR_d "\0"
|
||||||
#define STRING_Pe0 STR_P STR_e "\0"
|
#define STRING_Pe0 STR_P STR_e "\0"
|
||||||
@@ -309,6 +328,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Pi0 STR_P STR_i "\0"
|
#define STRING_Pi0 STR_P STR_i "\0"
|
||||||
#define STRING_Po0 STR_P STR_o "\0"
|
#define STRING_Po0 STR_P STR_o "\0"
|
||||||
#define STRING_Ps0 STR_P STR_s "\0"
|
#define STRING_Ps0 STR_P STR_s "\0"
|
||||||
|
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||||
#define STRING_S0 STR_S "\0"
|
#define STRING_S0 STR_S "\0"
|
||||||
@@ -317,6 +337,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Sc0 STR_S STR_c "\0"
|
#define STRING_Sc0 STR_S STR_c "\0"
|
||||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||||
#define STRING_Sk0 STR_S STR_k "\0"
|
#define STRING_Sk0 STR_S STR_k "\0"
|
||||||
#define STRING_Sm0 STR_S STR_m "\0"
|
#define STRING_Sm0 STR_S STR_m "\0"
|
||||||
@@ -337,8 +358,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||||
|
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||||
|
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||||
@@ -357,6 +380,7 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Avestan0
|
STRING_Avestan0
|
||||||
STRING_Balinese0
|
STRING_Balinese0
|
||||||
STRING_Bamum0
|
STRING_Bamum0
|
||||||
|
STRING_Bassa_Vah0
|
||||||
STRING_Batak0
|
STRING_Batak0
|
||||||
STRING_Bengali0
|
STRING_Bengali0
|
||||||
STRING_Bopomofo0
|
STRING_Bopomofo0
|
||||||
@@ -367,6 +391,7 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_C0
|
STRING_C0
|
||||||
STRING_Canadian_Aboriginal0
|
STRING_Canadian_Aboriginal0
|
||||||
STRING_Carian0
|
STRING_Carian0
|
||||||
|
STRING_Caucasian_Albanian0
|
||||||
STRING_Cc0
|
STRING_Cc0
|
||||||
STRING_Cf0
|
STRING_Cf0
|
||||||
STRING_Chakma0
|
STRING_Chakma0
|
||||||
@@ -382,11 +407,14 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Cyrillic0
|
STRING_Cyrillic0
|
||||||
STRING_Deseret0
|
STRING_Deseret0
|
||||||
STRING_Devanagari0
|
STRING_Devanagari0
|
||||||
|
STRING_Duployan0
|
||||||
STRING_Egyptian_Hieroglyphs0
|
STRING_Egyptian_Hieroglyphs0
|
||||||
|
STRING_Elbasan0
|
||||||
STRING_Ethiopic0
|
STRING_Ethiopic0
|
||||||
STRING_Georgian0
|
STRING_Georgian0
|
||||||
STRING_Glagolitic0
|
STRING_Glagolitic0
|
||||||
STRING_Gothic0
|
STRING_Gothic0
|
||||||
|
STRING_Grantha0
|
||||||
STRING_Greek0
|
STRING_Greek0
|
||||||
STRING_Gujarati0
|
STRING_Gujarati0
|
||||||
STRING_Gurmukhi0
|
STRING_Gurmukhi0
|
||||||
@@ -406,12 +434,15 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Kayah_Li0
|
STRING_Kayah_Li0
|
||||||
STRING_Kharoshthi0
|
STRING_Kharoshthi0
|
||||||
STRING_Khmer0
|
STRING_Khmer0
|
||||||
|
STRING_Khojki0
|
||||||
|
STRING_Khudawadi0
|
||||||
STRING_L0
|
STRING_L0
|
||||||
STRING_L_AMPERSAND0
|
STRING_L_AMPERSAND0
|
||||||
STRING_Lao0
|
STRING_Lao0
|
||||||
STRING_Latin0
|
STRING_Latin0
|
||||||
STRING_Lepcha0
|
STRING_Lepcha0
|
||||||
STRING_Limbu0
|
STRING_Limbu0
|
||||||
|
STRING_Linear_A0
|
||||||
STRING_Linear_B0
|
STRING_Linear_B0
|
||||||
STRING_Lisu0
|
STRING_Lisu0
|
||||||
STRING_Ll0
|
STRING_Ll0
|
||||||
@@ -422,18 +453,24 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Lycian0
|
STRING_Lycian0
|
||||||
STRING_Lydian0
|
STRING_Lydian0
|
||||||
STRING_M0
|
STRING_M0
|
||||||
|
STRING_Mahajani0
|
||||||
STRING_Malayalam0
|
STRING_Malayalam0
|
||||||
STRING_Mandaic0
|
STRING_Mandaic0
|
||||||
|
STRING_Manichaean0
|
||||||
STRING_Mc0
|
STRING_Mc0
|
||||||
STRING_Me0
|
STRING_Me0
|
||||||
STRING_Meetei_Mayek0
|
STRING_Meetei_Mayek0
|
||||||
|
STRING_Mende_Kikakui0
|
||||||
STRING_Meroitic_Cursive0
|
STRING_Meroitic_Cursive0
|
||||||
STRING_Meroitic_Hieroglyphs0
|
STRING_Meroitic_Hieroglyphs0
|
||||||
STRING_Miao0
|
STRING_Miao0
|
||||||
STRING_Mn0
|
STRING_Mn0
|
||||||
|
STRING_Modi0
|
||||||
STRING_Mongolian0
|
STRING_Mongolian0
|
||||||
|
STRING_Mro0
|
||||||
STRING_Myanmar0
|
STRING_Myanmar0
|
||||||
STRING_N0
|
STRING_N0
|
||||||
|
STRING_Nabataean0
|
||||||
STRING_Nd0
|
STRING_Nd0
|
||||||
STRING_New_Tai_Lue0
|
STRING_New_Tai_Lue0
|
||||||
STRING_Nko0
|
STRING_Nko0
|
||||||
@@ -442,12 +479,17 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Ogham0
|
STRING_Ogham0
|
||||||
STRING_Ol_Chiki0
|
STRING_Ol_Chiki0
|
||||||
STRING_Old_Italic0
|
STRING_Old_Italic0
|
||||||
|
STRING_Old_North_Arabian0
|
||||||
|
STRING_Old_Permic0
|
||||||
STRING_Old_Persian0
|
STRING_Old_Persian0
|
||||||
STRING_Old_South_Arabian0
|
STRING_Old_South_Arabian0
|
||||||
STRING_Old_Turkic0
|
STRING_Old_Turkic0
|
||||||
STRING_Oriya0
|
STRING_Oriya0
|
||||||
STRING_Osmanya0
|
STRING_Osmanya0
|
||||||
STRING_P0
|
STRING_P0
|
||||||
|
STRING_Pahawh_Hmong0
|
||||||
|
STRING_Palmyrene0
|
||||||
|
STRING_Pau_Cin_Hau0
|
||||||
STRING_Pc0
|
STRING_Pc0
|
||||||
STRING_Pd0
|
STRING_Pd0
|
||||||
STRING_Pe0
|
STRING_Pe0
|
||||||
@@ -457,6 +499,7 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Pi0
|
STRING_Pi0
|
||||||
STRING_Po0
|
STRING_Po0
|
||||||
STRING_Ps0
|
STRING_Ps0
|
||||||
|
STRING_Psalter_Pahlavi0
|
||||||
STRING_Rejang0
|
STRING_Rejang0
|
||||||
STRING_Runic0
|
STRING_Runic0
|
||||||
STRING_S0
|
STRING_S0
|
||||||
@@ -465,6 +508,7 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Sc0
|
STRING_Sc0
|
||||||
STRING_Sharada0
|
STRING_Sharada0
|
||||||
STRING_Shavian0
|
STRING_Shavian0
|
||||||
|
STRING_Siddham0
|
||||||
STRING_Sinhala0
|
STRING_Sinhala0
|
||||||
STRING_Sk0
|
STRING_Sk0
|
||||||
STRING_Sm0
|
STRING_Sm0
|
||||||
@@ -485,8 +529,10 @@ const char PRIV(utt_names)[] =
|
|||||||
STRING_Thai0
|
STRING_Thai0
|
||||||
STRING_Tibetan0
|
STRING_Tibetan0
|
||||||
STRING_Tifinagh0
|
STRING_Tifinagh0
|
||||||
|
STRING_Tirhuta0
|
||||||
STRING_Ugaritic0
|
STRING_Ugaritic0
|
||||||
STRING_Vai0
|
STRING_Vai0
|
||||||
|
STRING_Warang_Citi0
|
||||||
STRING_Xan0
|
STRING_Xan0
|
||||||
STRING_Xps0
|
STRING_Xps0
|
||||||
STRING_Xsp0
|
STRING_Xsp0
|
||||||
@@ -505,146 +551,169 @@ const ucp_type_table PRIV(utt)[] = {
|
|||||||
{ 20, PT_SC, ucp_Avestan },
|
{ 20, PT_SC, ucp_Avestan },
|
||||||
{ 28, PT_SC, ucp_Balinese },
|
{ 28, PT_SC, ucp_Balinese },
|
||||||
{ 37, PT_SC, ucp_Bamum },
|
{ 37, PT_SC, ucp_Bamum },
|
||||||
{ 43, PT_SC, ucp_Batak },
|
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||||
{ 49, PT_SC, ucp_Bengali },
|
{ 53, PT_SC, ucp_Batak },
|
||||||
{ 57, PT_SC, ucp_Bopomofo },
|
{ 59, PT_SC, ucp_Bengali },
|
||||||
{ 66, PT_SC, ucp_Brahmi },
|
{ 67, PT_SC, ucp_Bopomofo },
|
||||||
{ 73, PT_SC, ucp_Braille },
|
{ 76, PT_SC, ucp_Brahmi },
|
||||||
{ 81, PT_SC, ucp_Buginese },
|
{ 83, PT_SC, ucp_Braille },
|
||||||
{ 90, PT_SC, ucp_Buhid },
|
{ 91, PT_SC, ucp_Buginese },
|
||||||
{ 96, PT_GC, ucp_C },
|
{ 100, PT_SC, ucp_Buhid },
|
||||||
{ 98, PT_SC, ucp_Canadian_Aboriginal },
|
{ 106, PT_GC, ucp_C },
|
||||||
{ 118, PT_SC, ucp_Carian },
|
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||||
{ 125, PT_PC, ucp_Cc },
|
{ 128, PT_SC, ucp_Carian },
|
||||||
{ 128, PT_PC, ucp_Cf },
|
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||||
{ 131, PT_SC, ucp_Chakma },
|
{ 154, PT_PC, ucp_Cc },
|
||||||
{ 138, PT_SC, ucp_Cham },
|
{ 157, PT_PC, ucp_Cf },
|
||||||
{ 143, PT_SC, ucp_Cherokee },
|
{ 160, PT_SC, ucp_Chakma },
|
||||||
{ 152, PT_PC, ucp_Cn },
|
{ 167, PT_SC, ucp_Cham },
|
||||||
{ 155, PT_PC, ucp_Co },
|
{ 172, PT_SC, ucp_Cherokee },
|
||||||
{ 158, PT_SC, ucp_Common },
|
{ 181, PT_PC, ucp_Cn },
|
||||||
{ 165, PT_SC, ucp_Coptic },
|
{ 184, PT_PC, ucp_Co },
|
||||||
{ 172, PT_PC, ucp_Cs },
|
{ 187, PT_SC, ucp_Common },
|
||||||
{ 175, PT_SC, ucp_Cuneiform },
|
{ 194, PT_SC, ucp_Coptic },
|
||||||
{ 185, PT_SC, ucp_Cypriot },
|
{ 201, PT_PC, ucp_Cs },
|
||||||
{ 193, PT_SC, ucp_Cyrillic },
|
{ 204, PT_SC, ucp_Cuneiform },
|
||||||
{ 202, PT_SC, ucp_Deseret },
|
{ 214, PT_SC, ucp_Cypriot },
|
||||||
{ 210, PT_SC, ucp_Devanagari },
|
{ 222, PT_SC, ucp_Cyrillic },
|
||||||
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
|
{ 231, PT_SC, ucp_Deseret },
|
||||||
{ 242, PT_SC, ucp_Ethiopic },
|
{ 239, PT_SC, ucp_Devanagari },
|
||||||
{ 251, PT_SC, ucp_Georgian },
|
{ 250, PT_SC, ucp_Duployan },
|
||||||
{ 260, PT_SC, ucp_Glagolitic },
|
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||||
{ 271, PT_SC, ucp_Gothic },
|
{ 280, PT_SC, ucp_Elbasan },
|
||||||
{ 278, PT_SC, ucp_Greek },
|
{ 288, PT_SC, ucp_Ethiopic },
|
||||||
{ 284, PT_SC, ucp_Gujarati },
|
{ 297, PT_SC, ucp_Georgian },
|
||||||
{ 293, PT_SC, ucp_Gurmukhi },
|
{ 306, PT_SC, ucp_Glagolitic },
|
||||||
{ 302, PT_SC, ucp_Han },
|
{ 317, PT_SC, ucp_Gothic },
|
||||||
{ 306, PT_SC, ucp_Hangul },
|
{ 324, PT_SC, ucp_Grantha },
|
||||||
{ 313, PT_SC, ucp_Hanunoo },
|
{ 332, PT_SC, ucp_Greek },
|
||||||
{ 321, PT_SC, ucp_Hebrew },
|
{ 338, PT_SC, ucp_Gujarati },
|
||||||
{ 328, PT_SC, ucp_Hiragana },
|
{ 347, PT_SC, ucp_Gurmukhi },
|
||||||
{ 337, PT_SC, ucp_Imperial_Aramaic },
|
{ 356, PT_SC, ucp_Han },
|
||||||
{ 354, PT_SC, ucp_Inherited },
|
{ 360, PT_SC, ucp_Hangul },
|
||||||
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
|
{ 367, PT_SC, ucp_Hanunoo },
|
||||||
{ 386, PT_SC, ucp_Inscriptional_Parthian },
|
{ 375, PT_SC, ucp_Hebrew },
|
||||||
{ 409, PT_SC, ucp_Javanese },
|
{ 382, PT_SC, ucp_Hiragana },
|
||||||
{ 418, PT_SC, ucp_Kaithi },
|
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||||
{ 425, PT_SC, ucp_Kannada },
|
{ 408, PT_SC, ucp_Inherited },
|
||||||
{ 433, PT_SC, ucp_Katakana },
|
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||||
{ 442, PT_SC, ucp_Kayah_Li },
|
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||||
{ 451, PT_SC, ucp_Kharoshthi },
|
{ 463, PT_SC, ucp_Javanese },
|
||||||
{ 462, PT_SC, ucp_Khmer },
|
{ 472, PT_SC, ucp_Kaithi },
|
||||||
{ 468, PT_GC, ucp_L },
|
{ 479, PT_SC, ucp_Kannada },
|
||||||
{ 470, PT_LAMP, 0 },
|
{ 487, PT_SC, ucp_Katakana },
|
||||||
{ 473, PT_SC, ucp_Lao },
|
{ 496, PT_SC, ucp_Kayah_Li },
|
||||||
{ 477, PT_SC, ucp_Latin },
|
{ 505, PT_SC, ucp_Kharoshthi },
|
||||||
{ 483, PT_SC, ucp_Lepcha },
|
{ 516, PT_SC, ucp_Khmer },
|
||||||
{ 490, PT_SC, ucp_Limbu },
|
{ 522, PT_SC, ucp_Khojki },
|
||||||
{ 496, PT_SC, ucp_Linear_B },
|
{ 529, PT_SC, ucp_Khudawadi },
|
||||||
{ 505, PT_SC, ucp_Lisu },
|
{ 539, PT_GC, ucp_L },
|
||||||
{ 510, PT_PC, ucp_Ll },
|
{ 541, PT_LAMP, 0 },
|
||||||
{ 513, PT_PC, ucp_Lm },
|
{ 544, PT_SC, ucp_Lao },
|
||||||
{ 516, PT_PC, ucp_Lo },
|
{ 548, PT_SC, ucp_Latin },
|
||||||
{ 519, PT_PC, ucp_Lt },
|
{ 554, PT_SC, ucp_Lepcha },
|
||||||
{ 522, PT_PC, ucp_Lu },
|
{ 561, PT_SC, ucp_Limbu },
|
||||||
{ 525, PT_SC, ucp_Lycian },
|
{ 567, PT_SC, ucp_Linear_A },
|
||||||
{ 532, PT_SC, ucp_Lydian },
|
{ 576, PT_SC, ucp_Linear_B },
|
||||||
{ 539, PT_GC, ucp_M },
|
{ 585, PT_SC, ucp_Lisu },
|
||||||
{ 541, PT_SC, ucp_Malayalam },
|
{ 590, PT_PC, ucp_Ll },
|
||||||
{ 551, PT_SC, ucp_Mandaic },
|
{ 593, PT_PC, ucp_Lm },
|
||||||
{ 559, PT_PC, ucp_Mc },
|
{ 596, PT_PC, ucp_Lo },
|
||||||
{ 562, PT_PC, ucp_Me },
|
{ 599, PT_PC, ucp_Lt },
|
||||||
{ 565, PT_SC, ucp_Meetei_Mayek },
|
{ 602, PT_PC, ucp_Lu },
|
||||||
{ 578, PT_SC, ucp_Meroitic_Cursive },
|
{ 605, PT_SC, ucp_Lycian },
|
||||||
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
|
{ 612, PT_SC, ucp_Lydian },
|
||||||
{ 616, PT_SC, ucp_Miao },
|
{ 619, PT_GC, ucp_M },
|
||||||
{ 621, PT_PC, ucp_Mn },
|
{ 621, PT_SC, ucp_Mahajani },
|
||||||
{ 624, PT_SC, ucp_Mongolian },
|
{ 630, PT_SC, ucp_Malayalam },
|
||||||
{ 634, PT_SC, ucp_Myanmar },
|
{ 640, PT_SC, ucp_Mandaic },
|
||||||
{ 642, PT_GC, ucp_N },
|
{ 648, PT_SC, ucp_Manichaean },
|
||||||
{ 644, PT_PC, ucp_Nd },
|
{ 659, PT_PC, ucp_Mc },
|
||||||
{ 647, PT_SC, ucp_New_Tai_Lue },
|
{ 662, PT_PC, ucp_Me },
|
||||||
{ 659, PT_SC, ucp_Nko },
|
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||||
{ 663, PT_PC, ucp_Nl },
|
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||||
{ 666, PT_PC, ucp_No },
|
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||||
{ 669, PT_SC, ucp_Ogham },
|
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||||
{ 675, PT_SC, ucp_Ol_Chiki },
|
{ 730, PT_SC, ucp_Miao },
|
||||||
{ 684, PT_SC, ucp_Old_Italic },
|
{ 735, PT_PC, ucp_Mn },
|
||||||
{ 695, PT_SC, ucp_Old_Persian },
|
{ 738, PT_SC, ucp_Modi },
|
||||||
{ 707, PT_SC, ucp_Old_South_Arabian },
|
{ 743, PT_SC, ucp_Mongolian },
|
||||||
{ 725, PT_SC, ucp_Old_Turkic },
|
{ 753, PT_SC, ucp_Mro },
|
||||||
{ 736, PT_SC, ucp_Oriya },
|
{ 757, PT_SC, ucp_Myanmar },
|
||||||
{ 742, PT_SC, ucp_Osmanya },
|
{ 765, PT_GC, ucp_N },
|
||||||
{ 750, PT_GC, ucp_P },
|
{ 767, PT_SC, ucp_Nabataean },
|
||||||
{ 752, PT_PC, ucp_Pc },
|
{ 777, PT_PC, ucp_Nd },
|
||||||
{ 755, PT_PC, ucp_Pd },
|
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||||
{ 758, PT_PC, ucp_Pe },
|
{ 792, PT_SC, ucp_Nko },
|
||||||
{ 761, PT_PC, ucp_Pf },
|
{ 796, PT_PC, ucp_Nl },
|
||||||
{ 764, PT_SC, ucp_Phags_Pa },
|
{ 799, PT_PC, ucp_No },
|
||||||
{ 773, PT_SC, ucp_Phoenician },
|
{ 802, PT_SC, ucp_Ogham },
|
||||||
{ 784, PT_PC, ucp_Pi },
|
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||||
{ 787, PT_PC, ucp_Po },
|
{ 817, PT_SC, ucp_Old_Italic },
|
||||||
{ 790, PT_PC, ucp_Ps },
|
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||||
{ 793, PT_SC, ucp_Rejang },
|
{ 846, PT_SC, ucp_Old_Permic },
|
||||||
{ 800, PT_SC, ucp_Runic },
|
{ 857, PT_SC, ucp_Old_Persian },
|
||||||
{ 806, PT_GC, ucp_S },
|
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||||
{ 808, PT_SC, ucp_Samaritan },
|
{ 887, PT_SC, ucp_Old_Turkic },
|
||||||
{ 818, PT_SC, ucp_Saurashtra },
|
{ 898, PT_SC, ucp_Oriya },
|
||||||
{ 829, PT_PC, ucp_Sc },
|
{ 904, PT_SC, ucp_Osmanya },
|
||||||
{ 832, PT_SC, ucp_Sharada },
|
{ 912, PT_GC, ucp_P },
|
||||||
{ 840, PT_SC, ucp_Shavian },
|
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||||
{ 848, PT_SC, ucp_Sinhala },
|
{ 927, PT_SC, ucp_Palmyrene },
|
||||||
{ 856, PT_PC, ucp_Sk },
|
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||||
{ 859, PT_PC, ucp_Sm },
|
{ 949, PT_PC, ucp_Pc },
|
||||||
{ 862, PT_PC, ucp_So },
|
{ 952, PT_PC, ucp_Pd },
|
||||||
{ 865, PT_SC, ucp_Sora_Sompeng },
|
{ 955, PT_PC, ucp_Pe },
|
||||||
{ 878, PT_SC, ucp_Sundanese },
|
{ 958, PT_PC, ucp_Pf },
|
||||||
{ 888, PT_SC, ucp_Syloti_Nagri },
|
{ 961, PT_SC, ucp_Phags_Pa },
|
||||||
{ 901, PT_SC, ucp_Syriac },
|
{ 970, PT_SC, ucp_Phoenician },
|
||||||
{ 908, PT_SC, ucp_Tagalog },
|
{ 981, PT_PC, ucp_Pi },
|
||||||
{ 916, PT_SC, ucp_Tagbanwa },
|
{ 984, PT_PC, ucp_Po },
|
||||||
{ 925, PT_SC, ucp_Tai_Le },
|
{ 987, PT_PC, ucp_Ps },
|
||||||
{ 932, PT_SC, ucp_Tai_Tham },
|
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||||
{ 941, PT_SC, ucp_Tai_Viet },
|
{ 1006, PT_SC, ucp_Rejang },
|
||||||
{ 950, PT_SC, ucp_Takri },
|
{ 1013, PT_SC, ucp_Runic },
|
||||||
{ 956, PT_SC, ucp_Tamil },
|
{ 1019, PT_GC, ucp_S },
|
||||||
{ 962, PT_SC, ucp_Telugu },
|
{ 1021, PT_SC, ucp_Samaritan },
|
||||||
{ 969, PT_SC, ucp_Thaana },
|
{ 1031, PT_SC, ucp_Saurashtra },
|
||||||
{ 976, PT_SC, ucp_Thai },
|
{ 1042, PT_PC, ucp_Sc },
|
||||||
{ 981, PT_SC, ucp_Tibetan },
|
{ 1045, PT_SC, ucp_Sharada },
|
||||||
{ 989, PT_SC, ucp_Tifinagh },
|
{ 1053, PT_SC, ucp_Shavian },
|
||||||
{ 998, PT_SC, ucp_Ugaritic },
|
{ 1061, PT_SC, ucp_Siddham },
|
||||||
{ 1007, PT_SC, ucp_Vai },
|
{ 1069, PT_SC, ucp_Sinhala },
|
||||||
{ 1011, PT_ALNUM, 0 },
|
{ 1077, PT_PC, ucp_Sk },
|
||||||
{ 1015, PT_PXSPACE, 0 },
|
{ 1080, PT_PC, ucp_Sm },
|
||||||
{ 1019, PT_SPACE, 0 },
|
{ 1083, PT_PC, ucp_So },
|
||||||
{ 1023, PT_UCNC, 0 },
|
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||||
{ 1027, PT_WORD, 0 },
|
{ 1099, PT_SC, ucp_Sundanese },
|
||||||
{ 1031, PT_SC, ucp_Yi },
|
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||||
{ 1034, PT_GC, ucp_Z },
|
{ 1122, PT_SC, ucp_Syriac },
|
||||||
{ 1036, PT_PC, ucp_Zl },
|
{ 1129, PT_SC, ucp_Tagalog },
|
||||||
{ 1039, PT_PC, ucp_Zp },
|
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||||
{ 1042, PT_PC, ucp_Zs }
|
{ 1146, PT_SC, ucp_Tai_Le },
|
||||||
|
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||||
|
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||||
|
{ 1171, PT_SC, ucp_Takri },
|
||||||
|
{ 1177, PT_SC, ucp_Tamil },
|
||||||
|
{ 1183, PT_SC, ucp_Telugu },
|
||||||
|
{ 1190, PT_SC, ucp_Thaana },
|
||||||
|
{ 1197, PT_SC, ucp_Thai },
|
||||||
|
{ 1202, PT_SC, ucp_Tibetan },
|
||||||
|
{ 1210, PT_SC, ucp_Tifinagh },
|
||||||
|
{ 1219, PT_SC, ucp_Tirhuta },
|
||||||
|
{ 1227, PT_SC, ucp_Ugaritic },
|
||||||
|
{ 1236, PT_SC, ucp_Vai },
|
||||||
|
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||||
|
{ 1252, PT_ALNUM, 0 },
|
||||||
|
{ 1256, PT_PXSPACE, 0 },
|
||||||
|
{ 1260, PT_SPACE, 0 },
|
||||||
|
{ 1264, PT_UCNC, 0 },
|
||||||
|
{ 1268, PT_WORD, 0 },
|
||||||
|
{ 1272, PT_SC, ucp_Yi },
|
||||||
|
{ 1275, PT_GC, ucp_Z },
|
||||||
|
{ 1277, PT_PC, ucp_Zl },
|
||||||
|
{ 1280, PT_PC, ucp_Zp },
|
||||||
|
{ 1283, PT_PC, ucp_Zs }
|
||||||
};
|
};
|
||||||
|
|
||||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -242,7 +242,7 @@ while ((t = *data++) != XCL_END)
|
|||||||
|
|
||||||
case PT_PXPUNCT:
|
case PT_PXPUNCT:
|
||||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
||||||
(c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||||
return !negated;
|
return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@@ -192,7 +192,31 @@ enum {
|
|||||||
ucp_Miao,
|
ucp_Miao,
|
||||||
ucp_Sharada,
|
ucp_Sharada,
|
||||||
ucp_Sora_Sompeng,
|
ucp_Sora_Sompeng,
|
||||||
ucp_Takri
|
ucp_Takri,
|
||||||
|
/* New for Unicode 7.0.0: */
|
||||||
|
ucp_Bassa_Vah,
|
||||||
|
ucp_Caucasian_Albanian,
|
||||||
|
ucp_Duployan,
|
||||||
|
ucp_Elbasan,
|
||||||
|
ucp_Grantha,
|
||||||
|
ucp_Khojki,
|
||||||
|
ucp_Khudawadi,
|
||||||
|
ucp_Linear_A,
|
||||||
|
ucp_Mahajani,
|
||||||
|
ucp_Manichaean,
|
||||||
|
ucp_Mende_Kikakui,
|
||||||
|
ucp_Modi,
|
||||||
|
ucp_Mro,
|
||||||
|
ucp_Nabataean,
|
||||||
|
ucp_Old_North_Arabian,
|
||||||
|
ucp_Old_Permic,
|
||||||
|
ucp_Pahawh_Hmong,
|
||||||
|
ucp_Palmyrene,
|
||||||
|
ucp_Psalter_Pahlavi,
|
||||||
|
ucp_Pau_Cin_Hau,
|
||||||
|
ucp_Siddham,
|
||||||
|
ucp_Tirhuta,
|
||||||
|
ucp_Warang_Citi
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user