mirror of
https://github.com/pocoproject/poco.git
synced 2025-01-19 00:46:03 +01:00
GH #1586: Upgrade bundled PCRE to 8.40
This commit is contained in:
parent
f8a0bbff1b
commit
7d91a4bc94
@ -40,7 +40,7 @@ public:
|
||||
// Implementation note: the following definitions must be kept
|
||||
// in sync with those from ucp.h (PCRE).
|
||||
enum CharacterCategory
|
||||
/// Unicode 5.0 character categories.
|
||||
/// Unicode character categories.
|
||||
{
|
||||
UCP_OTHER,
|
||||
UCP_LETTER,
|
||||
@ -52,7 +52,7 @@ public:
|
||||
};
|
||||
|
||||
enum CharacterType
|
||||
/// Unicode 5.0 character types.
|
||||
/// Unicode character types.
|
||||
{
|
||||
UCP_CONTROL,
|
||||
UCP_FORMAT,
|
||||
@ -87,7 +87,7 @@ public:
|
||||
};
|
||||
|
||||
enum Script
|
||||
/// Unicode 5.0 scripts.
|
||||
/// Unicode 7.0 script identifiers.
|
||||
{
|
||||
UCP_ARABIC,
|
||||
UCP_ARMENIAN,
|
||||
@ -150,11 +150,13 @@ public:
|
||||
UCP_TIFINAGH,
|
||||
UCP_UGARITIC,
|
||||
UCP_YI,
|
||||
// Unicode 5.0
|
||||
UCP_BALINESE,
|
||||
UCP_CUNEIFORM,
|
||||
UCP_NKO,
|
||||
UCP_PHAGS_PA,
|
||||
UCP_PHOENICIAN,
|
||||
// Unicode 5.1
|
||||
UCP_CARIAN,
|
||||
UCP_CHAM,
|
||||
UCP_KAYAH_LI,
|
||||
@ -165,7 +167,59 @@ public:
|
||||
UCP_REJANG,
|
||||
UCP_SAURASHTRA,
|
||||
UCP_SUNDANESE,
|
||||
UCP_VAI
|
||||
UCP_VAI,
|
||||
// Unicode 5.2
|
||||
UCP_AVESTAN,
|
||||
UCP_BAMUM,
|
||||
UCP_EGYPTIAN_HIEROGLYPHS,
|
||||
UCP_IMPERIAL_ARAMAIC,
|
||||
UCP_INSCRIPTIONAL_PAHLAVI,
|
||||
UCP_INSCRIPTIONAL_PARTHIAN,
|
||||
UCP_JAVANESE,
|
||||
UCP_KAITHI,
|
||||
UCP_LISU,
|
||||
UCP_MEETEI_MAYEK,
|
||||
UCP_OLD_SOUTH_ARABIAN,
|
||||
UCP_OLD_TURKIC,
|
||||
UCP_SAMARITAN,
|
||||
UCP_TAI_THAM,
|
||||
UCP_TAI_VIET,
|
||||
// Unicode 6.0
|
||||
UCP_BATAK,
|
||||
UCP_BRAHMI,
|
||||
UCP_MANDAIC,
|
||||
// Unicode 6.1
|
||||
UCP_CHAKMA,
|
||||
UCP_MEROITIC_CURSIVE,
|
||||
UCP_MEROITIC_HIEROGLYPHS,
|
||||
UCP_MIAO,
|
||||
UCP_SHARADA,
|
||||
UCP_SORA_SOMPENG,
|
||||
UCP_TAKRI,
|
||||
// Unicode 7.0
|
||||
UCP_BASSA_VAH,
|
||||
UCP_CAUCASIAN_ALBANIAN,
|
||||
UCP_DUPLOYAN,
|
||||
UCP_ELBASAN,
|
||||
UCP_GRANTHA,
|
||||
UCP_KHOJKI,
|
||||
UCP_KHUDAWADI,
|
||||
UCP_LINEAR_A,
|
||||
UCP_MAHAJANI,
|
||||
UCP_MANICHAEAN,
|
||||
UCP_MENDE_KIKAKUI,
|
||||
UCP_MODI,
|
||||
UCP_MRO,
|
||||
UCP_NABATAEAN,
|
||||
UCP_OLD_NORTH_ARABIAN,
|
||||
UCP_OLD_PERMIC,
|
||||
UCP_PAHAWH_HMONG,
|
||||
UCP_PALMYRENE,
|
||||
UCP_PSALTER_PAHLAVI,
|
||||
UCP_PAU_CIN_HAU,
|
||||
UCP_SIDDHAM,
|
||||
UCP_TIRHUTA,
|
||||
UCP_WARANG_CITI
|
||||
};
|
||||
|
||||
enum
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 35
|
||||
#define PCRE_MINOR 40
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2014-04-04
|
||||
#define PCRE_DATE 2017-01-11
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -283,7 +283,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 8.35"
|
||||
#define PACKAGE_STRING "PCRE 8.40"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
@ -292,7 +292,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "8.35"
|
||||
#define PACKAGE_VERSION "8.40"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
@ -394,7 +394,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "8.35"
|
||||
#define VERSION "8.40"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
@ -2735,9 +2735,10 @@ for (;;)
|
||||
condcode == OP_DNRREF)
|
||||
return PCRE_ERROR_DFA_UCOND;
|
||||
|
||||
/* The DEFINE condition is always false */
|
||||
/* The DEFINE condition is always false, and the assertion (?!) is
|
||||
converted to OP_FAIL. */
|
||||
|
||||
if (condcode == OP_DEF)
|
||||
if (condcode == OP_DEF || condcode == OP_FAIL)
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
|
||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||
@ -3241,7 +3242,7 @@ md->callout_data = NULL;
|
||||
|
||||
if (extra_data != NULL)
|
||||
{
|
||||
unsigned int flags = extra_data->flags;
|
||||
unsigned long int flags = extra_data->flags;
|
||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
|
||||
|
@ -1137,88 +1137,81 @@ for (;;)
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
if (offset < md->offset_max)
|
||||
if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
|
||||
|
||||
matched_once = FALSE;
|
||||
code_offset = (int)(ecode - md->start_code);
|
||||
|
||||
save_offset1 = md->offset_vector[offset];
|
||||
save_offset2 = md->offset_vector[offset+1];
|
||||
save_offset3 = md->offset_vector[md->offset_end - number];
|
||||
save_capture_last = md->capture_last;
|
||||
|
||||
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
||||
|
||||
/* Each time round the loop, save the current subject position for use
|
||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||
restart it with a new subject starting position, remembering that we had
|
||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||
usual. If we haven't matched any alternatives in any iteration, check to
|
||||
see if a previous iteration matched. If so, the group has matched;
|
||||
continue from afterwards. Otherwise it has failed; restore the previous
|
||||
capture values before returning NOMATCH. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
matched_once = FALSE;
|
||||
code_offset = (int)(ecode - md->start_code);
|
||||
|
||||
save_offset1 = md->offset_vector[offset];
|
||||
save_offset2 = md->offset_vector[offset+1];
|
||||
save_offset3 = md->offset_vector[md->offset_end - number];
|
||||
save_capture_last = md->capture_last;
|
||||
|
||||
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
||||
|
||||
/* Each time round the loop, save the current subject position for use
|
||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||
restart it with a new subject starting position, remembering that we had
|
||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||
usual. If we haven't matched any alternatives in any iteration, check to
|
||||
see if a previous iteration matched. If so, the group has matched;
|
||||
continue from afterwards. Otherwise it has failed; restore the previous
|
||||
capture values before returning NOMATCH. */
|
||||
|
||||
for (;;)
|
||||
md->offset_vector[md->offset_end - number] =
|
||||
(int)(eptr - md->start_subject);
|
||||
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
||||
eptrb, RM63);
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
{
|
||||
md->offset_vector[md->offset_end - number] =
|
||||
(int)(eptr - md->start_subject);
|
||||
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
||||
eptrb, RM63);
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
offset_top = md->end_offset_top;
|
||||
ecode = md->start_code + code_offset;
|
||||
save_capture_last = md->capture_last;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||
{
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
ecode = md->start_code + code_offset;
|
||||
save_capture_last = md->capture_last;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||
continue;
|
||||
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||
break;
|
||||
}
|
||||
|
||||
/* See comment in the code for capturing groups above about handling
|
||||
THEN. */
|
||||
|
||||
if (rrc == MATCH_THEN)
|
||||
{
|
||||
next = ecode + GET(ecode,1);
|
||||
if (md->start_match_ptr < next &&
|
||||
(*ecode == OP_ALT || *next == OP_ALT))
|
||||
rrc = MATCH_NOMATCH;
|
||||
}
|
||||
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
if (*ecode != OP_ALT) break;
|
||||
eptr = md->end_match_ptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!matched_once)
|
||||
/* See comment in the code for capturing groups above about handling
|
||||
THEN. */
|
||||
|
||||
if (rrc == MATCH_THEN)
|
||||
{
|
||||
md->offset_vector[offset] = save_offset1;
|
||||
md->offset_vector[offset+1] = save_offset2;
|
||||
md->offset_vector[md->offset_end - number] = save_offset3;
|
||||
next = ecode + GET(ecode,1);
|
||||
if (md->start_match_ptr < next &&
|
||||
(*ecode == OP_ALT || *next == OP_ALT))
|
||||
rrc = MATCH_NOMATCH;
|
||||
}
|
||||
|
||||
if (allow_zero || matched_once)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
if (*ecode != OP_ALT) break;
|
||||
}
|
||||
|
||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
||||
as a non-capturing bracket. */
|
||||
if (!matched_once)
|
||||
{
|
||||
md->offset_vector[offset] = save_offset1;
|
||||
md->offset_vector[offset+1] = save_offset2;
|
||||
md->offset_vector[md->offset_end - number] = save_offset3;
|
||||
}
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
if (allow_zero || matched_once)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
||||
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
||||
@ -1242,10 +1235,15 @@ for (;;)
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
{
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
ecode = md->start_code + code_offset;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||
{
|
||||
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||
break;
|
||||
}
|
||||
eptr = md->end_match_ptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1379,6 +1377,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case OP_DEF: /* DEFINE - always false */
|
||||
case OP_FAIL: /* From optimized (?!) condition */
|
||||
break;
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
@ -1395,8 +1394,11 @@ for (;;)
|
||||
condition = TRUE;
|
||||
|
||||
/* Advance ecode past the assertion to the start of the first branch,
|
||||
but adjust it so that the general choosing code below works. */
|
||||
but adjust it so that the general choosing code below works. If the
|
||||
assertion has a quantifier that allows zero repeats we must skip over
|
||||
the BRAZERO. This is a lunatic thing to do, but somebody did! */
|
||||
|
||||
if (*ecode == OP_BRAZERO) ecode++;
|
||||
ecode += GET(ecode, 1);
|
||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
||||
@ -1465,7 +1467,18 @@ for (;;)
|
||||
md->offset_vector[offset] =
|
||||
md->offset_vector[md->offset_end - number];
|
||||
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
|
||||
if (offset_top <= offset) offset_top = offset + 2;
|
||||
|
||||
/* If this group is at or above the current highwater mark, ensure that
|
||||
any groups between the current high water mark and this group are marked
|
||||
unset and then update the high water mark. */
|
||||
|
||||
if (offset >= offset_top)
|
||||
{
|
||||
register int *iptr = md->offset_vector + offset_top;
|
||||
register int *iend = md->offset_vector + offset;
|
||||
while (iptr < iend) *iptr++ = -1;
|
||||
offset_top = offset + 2;
|
||||
}
|
||||
}
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
break;
|
||||
@ -1817,7 +1830,11 @@ for (;;)
|
||||
are defined in a range that can be tested for. */
|
||||
|
||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||
{
|
||||
if (new_recursive.offset_save != stacksave)
|
||||
(PUBL(free))(new_recursive.offset_save);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
/* Any return code other than NOMATCH is an error. */
|
||||
|
||||
@ -1980,6 +1997,19 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
||||
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
||||
at a time from the outer level, thus saving stack. This must precede the
|
||||
empty string test - in this case that test is done at the outer level. */
|
||||
|
||||
if (*ecode == OP_KETRPOS)
|
||||
{
|
||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||
md->end_match_ptr = eptr;
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
}
|
||||
|
||||
/* For an ordinary non-repeating ket, just continue at this level. This
|
||||
also happens for a repeating ket if no characters were matched in the
|
||||
group. This is the forcible breaking of infinite loops as implemented in
|
||||
@ -2002,18 +2032,6 @@ for (;;)
|
||||
break;
|
||||
}
|
||||
|
||||
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
||||
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
||||
at a time from the outer level, thus saving stack. */
|
||||
|
||||
if (*ecode == OP_KETRPOS)
|
||||
{
|
||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||
md->end_match_ptr = eptr;
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
}
|
||||
|
||||
/* The normal repeating kets try the rest of the pattern or restart from
|
||||
the preceding bracket, in the appropriate order. In the second case, we can
|
||||
use tail recursion to avoid using another stack frame, unless we have an
|
||||
@ -3466,7 +3484,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
#ifdef SUPPORT_UCP
|
||||
@ -3887,7 +3905,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@ -4022,7 +4040,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@ -5593,7 +5611,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@ -5635,12 +5653,17 @@ for (;;)
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
/* We use <= pp rather than == pp to detect the start of the run while
|
||||
backtracking because the use of \C in UTF mode can cause BACKCHAR to
|
||||
move back past pp. This is just palliative; the use of \C in UTF mode
|
||||
is fraught with danger. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
int lgb, rgb;
|
||||
PCRE_PUCHAR fptr;
|
||||
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
|
||||
@ -5658,7 +5681,7 @@ for (;;)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
fptr = eptr - 1;
|
||||
if (!utf) c = *fptr; else
|
||||
{
|
||||
@ -5682,54 +5705,25 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if (max < INT_MAX)
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle unlimited UTF-8 repeat */
|
||||
|
||||
else
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5937,7 +5931,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@ -6520,7 +6514,7 @@ tables = re->tables;
|
||||
|
||||
if (extra_data != NULL)
|
||||
{
|
||||
register unsigned int flags = extra_data->flags;
|
||||
unsigned long int flags = extra_data->flags;
|
||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
|
||||
@ -6692,7 +6686,8 @@ if (md->offset_vector != NULL)
|
||||
register int *iend = iptr - re->top_bracket;
|
||||
if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
|
||||
while (--iptr >= iend) *iptr = -1;
|
||||
md->offset_vector[0] = md->offset_vector[1] = -1;
|
||||
if (offsetcount > 0) md->offset_vector[0] = -1;
|
||||
if (offsetcount > 1) md->offset_vector[1] = -1;
|
||||
}
|
||||
|
||||
/* Set up the first character to match, if available. The first_char value is
|
||||
|
@ -247,6 +247,7 @@ Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
stringcount number of captured substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
@ -255,13 +256,16 @@ Returns: the number of the first that is set,
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector,
|
||||
int stringcount)
|
||||
#elif defined COMPILE_PCRE16
|
||||
static int
|
||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
|
||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
|
||||
int stringcount)
|
||||
#elif defined COMPILE_PCRE32
|
||||
static int
|
||||
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
|
||||
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
|
||||
int stringcount)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||
@ -292,7 +296,7 @@ if (entrysize <= 0) return entrysize;
|
||||
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||
{
|
||||
int n = GET2(entry, 0);
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
if (n < stringcount && ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return GET2(entry, 0);
|
||||
}
|
||||
@ -399,7 +403,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||
PCRE_UCHAR32 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||
if (n <= 0) return n;
|
||||
#if defined COMPILE_PCRE8
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
@ -454,7 +458,10 @@ pcre_uchar **stringlist;
|
||||
pcre_uchar *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
|
||||
{
|
||||
size += sizeof(pcre_uchar *) + IN_UCHARS(1);
|
||||
if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
@ -470,7 +477,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
@ -616,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||
PCRE_SPTR32 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||
if (n <= 0) return n;
|
||||
#if defined COMPILE_PCRE8
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
|
@ -7,7 +7,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not
|
||||
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
|
||||
by "configure". */
|
||||
|
||||
#if HAVE_STDINT_H
|
||||
#if defined HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#elif HAVE_INTTYPES_H
|
||||
#elif defined HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
@ -275,7 +275,7 @@ pcre.h(.in) and disable (comment out) this message. */
|
||||
|
||||
typedef pcre_uint16 pcre_uchar;
|
||||
#define UCHAR_SHIFT (1)
|
||||
#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
|
||||
#define IN_UCHARS(x) ((x) * 2)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
|
||||
@ -283,7 +283,7 @@ typedef pcre_uint16 pcre_uchar;
|
||||
|
||||
typedef pcre_uint32 pcre_uchar;
|
||||
#define UCHAR_SHIFT (2)
|
||||
#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
|
||||
#define IN_UCHARS(x) ((x) * 4)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
|
||||
@ -984,7 +984,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
||||
#ifndef EBCDIC
|
||||
|
||||
#define HSPACE_LIST \
|
||||
CHAR_HT, CHAR_SPACE, 0xa0, \
|
||||
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
||||
NOTACHAR
|
||||
@ -1010,7 +1010,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE: \
|
||||
case 0xa0 /* NBSP */
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES \
|
||||
HSPACE_BYTE_CASES: \
|
||||
@ -1037,11 +1037,12 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
|
||||
/* ------ EBCDIC environments ------ */
|
||||
|
||||
#else
|
||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE
|
||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
|
||||
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE
|
||||
case CHAR_SPACE: \
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES HSPACE_BYTE_CASES
|
||||
|
||||
@ -1215,6 +1216,7 @@ same code point. */
|
||||
|
||||
#define CHAR_ESC '\047'
|
||||
#define CHAR_DEL '\007'
|
||||
#define CHAR_NBSP '\x41'
|
||||
#define STR_ESC "\047"
|
||||
#define STR_DEL "\007"
|
||||
|
||||
@ -1229,6 +1231,7 @@ a positive value. */
|
||||
#define CHAR_NEL ((unsigned char)'\x85')
|
||||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_LF "\n"
|
||||
#define STR_NL STR_LF
|
||||
@ -1606,6 +1609,7 @@ only. */
|
||||
#define CHAR_VERTICAL_LINE '\174'
|
||||
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
||||
#define CHAR_TILDE '\176'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_HT "\011"
|
||||
#define STR_VT "\013"
|
||||
@ -1762,6 +1766,10 @@ only. */
|
||||
|
||||
/* Escape items that are just an encoding of a particular data value. */
|
||||
|
||||
#ifndef ESC_a
|
||||
#define ESC_a CHAR_BEL
|
||||
#endif
|
||||
|
||||
#ifndef ESC_e
|
||||
#define ESC_e CHAR_ESC
|
||||
#endif
|
||||
@ -2281,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
||||
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
|
||||
ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERRCOUNT };
|
||||
ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
|
||||
|
||||
/* JIT compiling modes. The function list is indexed by them. */
|
||||
|
||||
@ -2446,6 +2454,8 @@ typedef struct compile_data {
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
BOOL dupgroups; /* Duplicate groups exist: (?| found */
|
||||
BOOL iscondassert; /* Next assert is a condition */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
pcre_uchar nl[4]; /* Newline string when fixed length */
|
||||
@ -2459,6 +2469,13 @@ typedef struct branch_chain {
|
||||
pcre_uchar *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for mutual recursion detection. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
const pcre_uchar *group;
|
||||
} recurse_check;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern; used by pcre_exec(). */
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -67,7 +67,8 @@ Arguments:
|
||||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern's code
|
||||
options the compiling options
|
||||
int RECURSE depth
|
||||
recurses chain of recurse_check to catch mutual recursion
|
||||
countptr pointer to call count (to catch over complexity)
|
||||
|
||||
Returns: the minimum length
|
||||
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
||||
@ -77,15 +78,19 @@ Returns: the minimum length
|
||||
|
||||
static int
|
||||
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
|
||||
const pcre_uchar *startcode, int options, int recurse_depth)
|
||||
const pcre_uchar *startcode, int options, recurse_check *recurses,
|
||||
int *countptr)
|
||||
{
|
||||
int length = -1;
|
||||
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
|
||||
BOOL utf = (options & PCRE_UTF8) != 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
recurse_check this_recurse;
|
||||
register int branchlength = 0;
|
||||
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
|
||||
|
||||
if ((*countptr)++ > 1000) return -1; /* too complex */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
@ -127,7 +132,7 @@ for (;;)
|
||||
case OP_SBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
d = find_minlength(re, cc, startcode, options, recurse_depth);
|
||||
d = find_minlength(re, cc, startcode, options, recurses, countptr);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
@ -390,7 +395,7 @@ for (;;)
|
||||
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
@ -398,8 +403,23 @@ for (;;)
|
||||
}
|
||||
else
|
||||
{
|
||||
int dd = find_minlength(re, cs, startcode, options, recurse_depth);
|
||||
if (dd < d) d = dd;
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
int dd;
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, options, &this_recurse,
|
||||
countptr);
|
||||
if (dd < d) d = dd;
|
||||
}
|
||||
}
|
||||
slot += re->name_entry_size;
|
||||
}
|
||||
@ -415,14 +435,27 @@ for (;;)
|
||||
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
d = find_minlength(re, cs, startcode, options, recurse_depth);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, options, &this_recurse,
|
||||
countptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
else d = 0;
|
||||
@ -471,12 +504,21 @@ for (;;)
|
||||
case OP_RECURSE:
|
||||
cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if ((cc > cs && cc < ce) || recurse_depth > 10)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
branchlength += find_minlength(re, cs, startcode, options,
|
||||
recurse_depth + 1);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
branchlength += find_minlength(re, cs, startcode, options,
|
||||
&this_recurse, countptr);
|
||||
}
|
||||
}
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
@ -860,7 +902,6 @@ do
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_PROP:
|
||||
case OP_PRUNE:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_RECURSE:
|
||||
@ -878,6 +919,31 @@ do
|
||||
case OP_THEN_ARG:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* A "real" property test implies no starting bits, but the fake property
|
||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||
are used for characters with more than one "other case", so there is no
|
||||
point in recognizing them for OP_NOTPROP. */
|
||||
|
||||
case OP_PROP:
|
||||
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||
{
|
||||
const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||
while ((c = *p++) < NOTACHAR)
|
||||
{
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
|
||||
if (utf)
|
||||
{
|
||||
pcre_uchar buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
c = buff[0];
|
||||
}
|
||||
#endif
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
}
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* We can ignore word boundary tests. */
|
||||
|
||||
case OP_WORD_BOUNDARY:
|
||||
@ -1103,24 +1169,17 @@ do
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
ensure it is set as not whitespace. Luckily, the code value is the same
|
||||
(0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
||||
/* The cbit_space table has vertical tab as whitespace; we no longer
|
||||
have to play fancy tricks because Perl added VT to its whitespace at
|
||||
release 5.18. PCRE added it at release 8.34. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
|
||||
start_bits[1] |= 0x08;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to not
|
||||
set it from the table. Luckily, the code value is the same (0x0b) in
|
||||
ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
c = start_bits[1]; /* Save in case it was already set */
|
||||
set_type_bits(start_bits, cbit_space, table_limit, cd);
|
||||
start_bits[1] = (start_bits[1] & ~0x08) | c;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@ -1309,7 +1368,7 @@ do
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= map[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((map[c/8] && (1 << (c&7))) != 0)
|
||||
if ((map[c/8] & (1 << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||
@ -1397,6 +1456,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
|
||||
#endif
|
||||
{
|
||||
int min;
|
||||
int count = 0;
|
||||
BOOL bits_set = FALSE;
|
||||
pcre_uint8 start_bits[32];
|
||||
PUBL(extra) *extra = NULL;
|
||||
@ -1483,7 +1543,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
|
||||
|
||||
/* Find the minimum length of subject string. */
|
||||
|
||||
switch(min = find_minlength(re, code, code, re->options, 0))
|
||||
switch(min = find_minlength(re, code, code, re->options, NULL, &count))
|
||||
{
|
||||
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
|
||||
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
|
||||
|
@ -209,6 +209,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
@ -219,6 +220,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_C0 STR_C "\0"
|
||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cc0 STR_C STR_c "\0"
|
||||
#define STRING_Cf0 STR_C STR_f "\0"
|
||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
@ -234,11 +236,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
@ -258,12 +263,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
#define STRING_L0 STR_L "\0"
|
||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
@ -274,18 +282,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
@ -294,12 +308,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
|
||||
#define STRING_Pc0 STR_P STR_c "\0"
|
||||
#define STRING_Pd0 STR_P STR_d "\0"
|
||||
#define STRING_Pe0 STR_P STR_e "\0"
|
||||
@ -309,6 +328,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Pi0 STR_P STR_i "\0"
|
||||
#define STRING_Po0 STR_P STR_o "\0"
|
||||
#define STRING_Ps0 STR_P STR_s "\0"
|
||||
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
@ -317,6 +337,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
@ -337,8 +358,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
@ -357,6 +380,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Bassa_Vah0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
@ -367,6 +391,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_C0
|
||||
STRING_Canadian_Aboriginal0
|
||||
STRING_Carian0
|
||||
STRING_Caucasian_Albanian0
|
||||
STRING_Cc0
|
||||
STRING_Cf0
|
||||
STRING_Chakma0
|
||||
@ -382,11 +407,14 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Elbasan0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
STRING_Gothic0
|
||||
STRING_Grantha0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gurmukhi0
|
||||
@ -406,12 +434,15 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khmer0
|
||||
STRING_Khojki0
|
||||
STRING_Khudawadi0
|
||||
STRING_L0
|
||||
STRING_L_AMPERSAND0
|
||||
STRING_Lao0
|
||||
STRING_Latin0
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_A0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
@ -422,18 +453,24 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Lycian0
|
||||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Mahajani0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Manichaean0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Mende_Kikakui0
|
||||
STRING_Meroitic_Cursive0
|
||||
STRING_Meroitic_Hieroglyphs0
|
||||
STRING_Miao0
|
||||
STRING_Mn0
|
||||
STRING_Modi0
|
||||
STRING_Mongolian0
|
||||
STRING_Mro0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Nko0
|
||||
@ -442,12 +479,17 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pahawh_Hmong0
|
||||
STRING_Palmyrene0
|
||||
STRING_Pau_Cin_Hau0
|
||||
STRING_Pc0
|
||||
STRING_Pd0
|
||||
STRING_Pe0
|
||||
@ -457,6 +499,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Pi0
|
||||
STRING_Po0
|
||||
STRING_Ps0
|
||||
STRING_Psalter_Pahlavi0
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
@ -465,6 +508,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Sc0
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Siddham0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
@ -485,8 +529,10 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Thai0
|
||||
STRING_Tibetan0
|
||||
STRING_Tifinagh0
|
||||
STRING_Tirhuta0
|
||||
STRING_Ugaritic0
|
||||
STRING_Vai0
|
||||
STRING_Warang_Citi0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
STRING_Xsp0
|
||||
@ -505,146 +551,169 @@ const ucp_type_table PRIV(utt)[] = {
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Batak },
|
||||
{ 49, PT_SC, ucp_Bengali },
|
||||
{ 57, PT_SC, ucp_Bopomofo },
|
||||
{ 66, PT_SC, ucp_Brahmi },
|
||||
{ 73, PT_SC, ucp_Braille },
|
||||
{ 81, PT_SC, ucp_Buginese },
|
||||
{ 90, PT_SC, ucp_Buhid },
|
||||
{ 96, PT_GC, ucp_C },
|
||||
{ 98, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 118, PT_SC, ucp_Carian },
|
||||
{ 125, PT_PC, ucp_Cc },
|
||||
{ 128, PT_PC, ucp_Cf },
|
||||
{ 131, PT_SC, ucp_Chakma },
|
||||
{ 138, PT_SC, ucp_Cham },
|
||||
{ 143, PT_SC, ucp_Cherokee },
|
||||
{ 152, PT_PC, ucp_Cn },
|
||||
{ 155, PT_PC, ucp_Co },
|
||||
{ 158, PT_SC, ucp_Common },
|
||||
{ 165, PT_SC, ucp_Coptic },
|
||||
{ 172, PT_PC, ucp_Cs },
|
||||
{ 175, PT_SC, ucp_Cuneiform },
|
||||
{ 185, PT_SC, ucp_Cypriot },
|
||||
{ 193, PT_SC, ucp_Cyrillic },
|
||||
{ 202, PT_SC, ucp_Deseret },
|
||||
{ 210, PT_SC, ucp_Devanagari },
|
||||
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 242, PT_SC, ucp_Ethiopic },
|
||||
{ 251, PT_SC, ucp_Georgian },
|
||||
{ 260, PT_SC, ucp_Glagolitic },
|
||||
{ 271, PT_SC, ucp_Gothic },
|
||||
{ 278, PT_SC, ucp_Greek },
|
||||
{ 284, PT_SC, ucp_Gujarati },
|
||||
{ 293, PT_SC, ucp_Gurmukhi },
|
||||
{ 302, PT_SC, ucp_Han },
|
||||
{ 306, PT_SC, ucp_Hangul },
|
||||
{ 313, PT_SC, ucp_Hanunoo },
|
||||
{ 321, PT_SC, ucp_Hebrew },
|
||||
{ 328, PT_SC, ucp_Hiragana },
|
||||
{ 337, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 354, PT_SC, ucp_Inherited },
|
||||
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 386, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 409, PT_SC, ucp_Javanese },
|
||||
{ 418, PT_SC, ucp_Kaithi },
|
||||
{ 425, PT_SC, ucp_Kannada },
|
||||
{ 433, PT_SC, ucp_Katakana },
|
||||
{ 442, PT_SC, ucp_Kayah_Li },
|
||||
{ 451, PT_SC, ucp_Kharoshthi },
|
||||
{ 462, PT_SC, ucp_Khmer },
|
||||
{ 468, PT_GC, ucp_L },
|
||||
{ 470, PT_LAMP, 0 },
|
||||
{ 473, PT_SC, ucp_Lao },
|
||||
{ 477, PT_SC, ucp_Latin },
|
||||
{ 483, PT_SC, ucp_Lepcha },
|
||||
{ 490, PT_SC, ucp_Limbu },
|
||||
{ 496, PT_SC, ucp_Linear_B },
|
||||
{ 505, PT_SC, ucp_Lisu },
|
||||
{ 510, PT_PC, ucp_Ll },
|
||||
{ 513, PT_PC, ucp_Lm },
|
||||
{ 516, PT_PC, ucp_Lo },
|
||||
{ 519, PT_PC, ucp_Lt },
|
||||
{ 522, PT_PC, ucp_Lu },
|
||||
{ 525, PT_SC, ucp_Lycian },
|
||||
{ 532, PT_SC, ucp_Lydian },
|
||||
{ 539, PT_GC, ucp_M },
|
||||
{ 541, PT_SC, ucp_Malayalam },
|
||||
{ 551, PT_SC, ucp_Mandaic },
|
||||
{ 559, PT_PC, ucp_Mc },
|
||||
{ 562, PT_PC, ucp_Me },
|
||||
{ 565, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 578, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 616, PT_SC, ucp_Miao },
|
||||
{ 621, PT_PC, ucp_Mn },
|
||||
{ 624, PT_SC, ucp_Mongolian },
|
||||
{ 634, PT_SC, ucp_Myanmar },
|
||||
{ 642, PT_GC, ucp_N },
|
||||
{ 644, PT_PC, ucp_Nd },
|
||||
{ 647, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 659, PT_SC, ucp_Nko },
|
||||
{ 663, PT_PC, ucp_Nl },
|
||||
{ 666, PT_PC, ucp_No },
|
||||
{ 669, PT_SC, ucp_Ogham },
|
||||
{ 675, PT_SC, ucp_Ol_Chiki },
|
||||
{ 684, PT_SC, ucp_Old_Italic },
|
||||
{ 695, PT_SC, ucp_Old_Persian },
|
||||
{ 707, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 725, PT_SC, ucp_Old_Turkic },
|
||||
{ 736, PT_SC, ucp_Oriya },
|
||||
{ 742, PT_SC, ucp_Osmanya },
|
||||
{ 750, PT_GC, ucp_P },
|
||||
{ 752, PT_PC, ucp_Pc },
|
||||
{ 755, PT_PC, ucp_Pd },
|
||||
{ 758, PT_PC, ucp_Pe },
|
||||
{ 761, PT_PC, ucp_Pf },
|
||||
{ 764, PT_SC, ucp_Phags_Pa },
|
||||
{ 773, PT_SC, ucp_Phoenician },
|
||||
{ 784, PT_PC, ucp_Pi },
|
||||
{ 787, PT_PC, ucp_Po },
|
||||
{ 790, PT_PC, ucp_Ps },
|
||||
{ 793, PT_SC, ucp_Rejang },
|
||||
{ 800, PT_SC, ucp_Runic },
|
||||
{ 806, PT_GC, ucp_S },
|
||||
{ 808, PT_SC, ucp_Samaritan },
|
||||
{ 818, PT_SC, ucp_Saurashtra },
|
||||
{ 829, PT_PC, ucp_Sc },
|
||||
{ 832, PT_SC, ucp_Sharada },
|
||||
{ 840, PT_SC, ucp_Shavian },
|
||||
{ 848, PT_SC, ucp_Sinhala },
|
||||
{ 856, PT_PC, ucp_Sk },
|
||||
{ 859, PT_PC, ucp_Sm },
|
||||
{ 862, PT_PC, ucp_So },
|
||||
{ 865, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 878, PT_SC, ucp_Sundanese },
|
||||
{ 888, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 901, PT_SC, ucp_Syriac },
|
||||
{ 908, PT_SC, ucp_Tagalog },
|
||||
{ 916, PT_SC, ucp_Tagbanwa },
|
||||
{ 925, PT_SC, ucp_Tai_Le },
|
||||
{ 932, PT_SC, ucp_Tai_Tham },
|
||||
{ 941, PT_SC, ucp_Tai_Viet },
|
||||
{ 950, PT_SC, ucp_Takri },
|
||||
{ 956, PT_SC, ucp_Tamil },
|
||||
{ 962, PT_SC, ucp_Telugu },
|
||||
{ 969, PT_SC, ucp_Thaana },
|
||||
{ 976, PT_SC, ucp_Thai },
|
||||
{ 981, PT_SC, ucp_Tibetan },
|
||||
{ 989, PT_SC, ucp_Tifinagh },
|
||||
{ 998, PT_SC, ucp_Ugaritic },
|
||||
{ 1007, PT_SC, ucp_Vai },
|
||||
{ 1011, PT_ALNUM, 0 },
|
||||
{ 1015, PT_PXSPACE, 0 },
|
||||
{ 1019, PT_SPACE, 0 },
|
||||
{ 1023, PT_UCNC, 0 },
|
||||
{ 1027, PT_WORD, 0 },
|
||||
{ 1031, PT_SC, ucp_Yi },
|
||||
{ 1034, PT_GC, ucp_Z },
|
||||
{ 1036, PT_PC, ucp_Zl },
|
||||
{ 1039, PT_PC, ucp_Zp },
|
||||
{ 1042, PT_PC, ucp_Zs }
|
||||
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||
{ 53, PT_SC, ucp_Batak },
|
||||
{ 59, PT_SC, ucp_Bengali },
|
||||
{ 67, PT_SC, ucp_Bopomofo },
|
||||
{ 76, PT_SC, ucp_Brahmi },
|
||||
{ 83, PT_SC, ucp_Braille },
|
||||
{ 91, PT_SC, ucp_Buginese },
|
||||
{ 100, PT_SC, ucp_Buhid },
|
||||
{ 106, PT_GC, ucp_C },
|
||||
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 128, PT_SC, ucp_Carian },
|
||||
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 154, PT_PC, ucp_Cc },
|
||||
{ 157, PT_PC, ucp_Cf },
|
||||
{ 160, PT_SC, ucp_Chakma },
|
||||
{ 167, PT_SC, ucp_Cham },
|
||||
{ 172, PT_SC, ucp_Cherokee },
|
||||
{ 181, PT_PC, ucp_Cn },
|
||||
{ 184, PT_PC, ucp_Co },
|
||||
{ 187, PT_SC, ucp_Common },
|
||||
{ 194, PT_SC, ucp_Coptic },
|
||||
{ 201, PT_PC, ucp_Cs },
|
||||
{ 204, PT_SC, ucp_Cuneiform },
|
||||
{ 214, PT_SC, ucp_Cypriot },
|
||||
{ 222, PT_SC, ucp_Cyrillic },
|
||||
{ 231, PT_SC, ucp_Deseret },
|
||||
{ 239, PT_SC, ucp_Devanagari },
|
||||
{ 250, PT_SC, ucp_Duployan },
|
||||
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 280, PT_SC, ucp_Elbasan },
|
||||
{ 288, PT_SC, ucp_Ethiopic },
|
||||
{ 297, PT_SC, ucp_Georgian },
|
||||
{ 306, PT_SC, ucp_Glagolitic },
|
||||
{ 317, PT_SC, ucp_Gothic },
|
||||
{ 324, PT_SC, ucp_Grantha },
|
||||
{ 332, PT_SC, ucp_Greek },
|
||||
{ 338, PT_SC, ucp_Gujarati },
|
||||
{ 347, PT_SC, ucp_Gurmukhi },
|
||||
{ 356, PT_SC, ucp_Han },
|
||||
{ 360, PT_SC, ucp_Hangul },
|
||||
{ 367, PT_SC, ucp_Hanunoo },
|
||||
{ 375, PT_SC, ucp_Hebrew },
|
||||
{ 382, PT_SC, ucp_Hiragana },
|
||||
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 408, PT_SC, ucp_Inherited },
|
||||
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 463, PT_SC, ucp_Javanese },
|
||||
{ 472, PT_SC, ucp_Kaithi },
|
||||
{ 479, PT_SC, ucp_Kannada },
|
||||
{ 487, PT_SC, ucp_Katakana },
|
||||
{ 496, PT_SC, ucp_Kayah_Li },
|
||||
{ 505, PT_SC, ucp_Kharoshthi },
|
||||
{ 516, PT_SC, ucp_Khmer },
|
||||
{ 522, PT_SC, ucp_Khojki },
|
||||
{ 529, PT_SC, ucp_Khudawadi },
|
||||
{ 539, PT_GC, ucp_L },
|
||||
{ 541, PT_LAMP, 0 },
|
||||
{ 544, PT_SC, ucp_Lao },
|
||||
{ 548, PT_SC, ucp_Latin },
|
||||
{ 554, PT_SC, ucp_Lepcha },
|
||||
{ 561, PT_SC, ucp_Limbu },
|
||||
{ 567, PT_SC, ucp_Linear_A },
|
||||
{ 576, PT_SC, ucp_Linear_B },
|
||||
{ 585, PT_SC, ucp_Lisu },
|
||||
{ 590, PT_PC, ucp_Ll },
|
||||
{ 593, PT_PC, ucp_Lm },
|
||||
{ 596, PT_PC, ucp_Lo },
|
||||
{ 599, PT_PC, ucp_Lt },
|
||||
{ 602, PT_PC, ucp_Lu },
|
||||
{ 605, PT_SC, ucp_Lycian },
|
||||
{ 612, PT_SC, ucp_Lydian },
|
||||
{ 619, PT_GC, ucp_M },
|
||||
{ 621, PT_SC, ucp_Mahajani },
|
||||
{ 630, PT_SC, ucp_Malayalam },
|
||||
{ 640, PT_SC, ucp_Mandaic },
|
||||
{ 648, PT_SC, ucp_Manichaean },
|
||||
{ 659, PT_PC, ucp_Mc },
|
||||
{ 662, PT_PC, ucp_Me },
|
||||
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 730, PT_SC, ucp_Miao },
|
||||
{ 735, PT_PC, ucp_Mn },
|
||||
{ 738, PT_SC, ucp_Modi },
|
||||
{ 743, PT_SC, ucp_Mongolian },
|
||||
{ 753, PT_SC, ucp_Mro },
|
||||
{ 757, PT_SC, ucp_Myanmar },
|
||||
{ 765, PT_GC, ucp_N },
|
||||
{ 767, PT_SC, ucp_Nabataean },
|
||||
{ 777, PT_PC, ucp_Nd },
|
||||
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 792, PT_SC, ucp_Nko },
|
||||
{ 796, PT_PC, ucp_Nl },
|
||||
{ 799, PT_PC, ucp_No },
|
||||
{ 802, PT_SC, ucp_Ogham },
|
||||
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||
{ 817, PT_SC, ucp_Old_Italic },
|
||||
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 846, PT_SC, ucp_Old_Permic },
|
||||
{ 857, PT_SC, ucp_Old_Persian },
|
||||
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 887, PT_SC, ucp_Old_Turkic },
|
||||
{ 898, PT_SC, ucp_Oriya },
|
||||
{ 904, PT_SC, ucp_Osmanya },
|
||||
{ 912, PT_GC, ucp_P },
|
||||
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 927, PT_SC, ucp_Palmyrene },
|
||||
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 949, PT_PC, ucp_Pc },
|
||||
{ 952, PT_PC, ucp_Pd },
|
||||
{ 955, PT_PC, ucp_Pe },
|
||||
{ 958, PT_PC, ucp_Pf },
|
||||
{ 961, PT_SC, ucp_Phags_Pa },
|
||||
{ 970, PT_SC, ucp_Phoenician },
|
||||
{ 981, PT_PC, ucp_Pi },
|
||||
{ 984, PT_PC, ucp_Po },
|
||||
{ 987, PT_PC, ucp_Ps },
|
||||
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1006, PT_SC, ucp_Rejang },
|
||||
{ 1013, PT_SC, ucp_Runic },
|
||||
{ 1019, PT_GC, ucp_S },
|
||||
{ 1021, PT_SC, ucp_Samaritan },
|
||||
{ 1031, PT_SC, ucp_Saurashtra },
|
||||
{ 1042, PT_PC, ucp_Sc },
|
||||
{ 1045, PT_SC, ucp_Sharada },
|
||||
{ 1053, PT_SC, ucp_Shavian },
|
||||
{ 1061, PT_SC, ucp_Siddham },
|
||||
{ 1069, PT_SC, ucp_Sinhala },
|
||||
{ 1077, PT_PC, ucp_Sk },
|
||||
{ 1080, PT_PC, ucp_Sm },
|
||||
{ 1083, PT_PC, ucp_So },
|
||||
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1099, PT_SC, ucp_Sundanese },
|
||||
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1122, PT_SC, ucp_Syriac },
|
||||
{ 1129, PT_SC, ucp_Tagalog },
|
||||
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||
{ 1146, PT_SC, ucp_Tai_Le },
|
||||
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||
{ 1171, PT_SC, ucp_Takri },
|
||||
{ 1177, PT_SC, ucp_Tamil },
|
||||
{ 1183, PT_SC, ucp_Telugu },
|
||||
{ 1190, PT_SC, ucp_Thaana },
|
||||
{ 1197, PT_SC, ucp_Thai },
|
||||
{ 1202, PT_SC, ucp_Tibetan },
|
||||
{ 1210, PT_SC, ucp_Tifinagh },
|
||||
{ 1219, PT_SC, ucp_Tirhuta },
|
||||
{ 1227, PT_SC, ucp_Ugaritic },
|
||||
{ 1236, PT_SC, ucp_Vai },
|
||||
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||
{ 1252, PT_ALNUM, 0 },
|
||||
{ 1256, PT_PXSPACE, 0 },
|
||||
{ 1260, PT_SPACE, 0 },
|
||||
{ 1264, PT_UCNC, 0 },
|
||||
{ 1268, PT_WORD, 0 },
|
||||
{ 1272, PT_SC, ucp_Yi },
|
||||
{ 1275, PT_GC, ucp_Z },
|
||||
{ 1277, PT_PC, ucp_Zl },
|
||||
{ 1280, PT_PC, ucp_Zp },
|
||||
{ 1283, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -242,7 +242,7 @@ while ((t = *data++) != XCL_END)
|
||||
|
||||
case PT_PXPUNCT:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
||||
(c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
|
@ -192,7 +192,31 @@ enum {
|
||||
ucp_Miao,
|
||||
ucp_Sharada,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Takri
|
||||
ucp_Takri,
|
||||
/* New for Unicode 7.0.0: */
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Khudawadi,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Modi,
|
||||
ucp_Mro,
|
||||
ucp_Nabataean,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Old_Permic,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Palmyrene,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Tirhuta,
|
||||
ucp_Warang_Citi
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user