mirror of
https://github.com/pocoproject/poco.git
synced 2025-10-28 03:20:11 +01:00
GH #1586: Upgrade bundled PCRE to 8.40
This commit is contained in:
@@ -1137,88 +1137,81 @@ for (;;)
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
if (offset < md->offset_max)
|
||||
if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
|
||||
|
||||
matched_once = FALSE;
|
||||
code_offset = (int)(ecode - md->start_code);
|
||||
|
||||
save_offset1 = md->offset_vector[offset];
|
||||
save_offset2 = md->offset_vector[offset+1];
|
||||
save_offset3 = md->offset_vector[md->offset_end - number];
|
||||
save_capture_last = md->capture_last;
|
||||
|
||||
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
||||
|
||||
/* Each time round the loop, save the current subject position for use
|
||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||
restart it with a new subject starting position, remembering that we had
|
||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||
usual. If we haven't matched any alternatives in any iteration, check to
|
||||
see if a previous iteration matched. If so, the group has matched;
|
||||
continue from afterwards. Otherwise it has failed; restore the previous
|
||||
capture values before returning NOMATCH. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
matched_once = FALSE;
|
||||
code_offset = (int)(ecode - md->start_code);
|
||||
|
||||
save_offset1 = md->offset_vector[offset];
|
||||
save_offset2 = md->offset_vector[offset+1];
|
||||
save_offset3 = md->offset_vector[md->offset_end - number];
|
||||
save_capture_last = md->capture_last;
|
||||
|
||||
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
||||
|
||||
/* Each time round the loop, save the current subject position for use
|
||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||
restart it with a new subject starting position, remembering that we had
|
||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||
usual. If we haven't matched any alternatives in any iteration, check to
|
||||
see if a previous iteration matched. If so, the group has matched;
|
||||
continue from afterwards. Otherwise it has failed; restore the previous
|
||||
capture values before returning NOMATCH. */
|
||||
|
||||
for (;;)
|
||||
md->offset_vector[md->offset_end - number] =
|
||||
(int)(eptr - md->start_subject);
|
||||
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
||||
eptrb, RM63);
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
{
|
||||
md->offset_vector[md->offset_end - number] =
|
||||
(int)(eptr - md->start_subject);
|
||||
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
|
||||
eptrb, RM63);
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
offset_top = md->end_offset_top;
|
||||
ecode = md->start_code + code_offset;
|
||||
save_capture_last = md->capture_last;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||
{
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
ecode = md->start_code + code_offset;
|
||||
save_capture_last = md->capture_last;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||
continue;
|
||||
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||
break;
|
||||
}
|
||||
|
||||
/* See comment in the code for capturing groups above about handling
|
||||
THEN. */
|
||||
|
||||
if (rrc == MATCH_THEN)
|
||||
{
|
||||
next = ecode + GET(ecode,1);
|
||||
if (md->start_match_ptr < next &&
|
||||
(*ecode == OP_ALT || *next == OP_ALT))
|
||||
rrc = MATCH_NOMATCH;
|
||||
}
|
||||
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
if (*ecode != OP_ALT) break;
|
||||
eptr = md->end_match_ptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!matched_once)
|
||||
/* See comment in the code for capturing groups above about handling
|
||||
THEN. */
|
||||
|
||||
if (rrc == MATCH_THEN)
|
||||
{
|
||||
md->offset_vector[offset] = save_offset1;
|
||||
md->offset_vector[offset+1] = save_offset2;
|
||||
md->offset_vector[md->offset_end - number] = save_offset3;
|
||||
next = ecode + GET(ecode,1);
|
||||
if (md->start_match_ptr < next &&
|
||||
(*ecode == OP_ALT || *next == OP_ALT))
|
||||
rrc = MATCH_NOMATCH;
|
||||
}
|
||||
|
||||
if (allow_zero || matched_once)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
if (*ecode != OP_ALT) break;
|
||||
}
|
||||
|
||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
||||
as a non-capturing bracket. */
|
||||
if (!matched_once)
|
||||
{
|
||||
md->offset_vector[offset] = save_offset1;
|
||||
md->offset_vector[offset+1] = save_offset2;
|
||||
md->offset_vector[md->offset_end - number] = save_offset3;
|
||||
}
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
if (allow_zero || matched_once)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
||||
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
||||
@@ -1242,10 +1235,15 @@ for (;;)
|
||||
if (rrc == MATCH_KETRPOS)
|
||||
{
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
ecode = md->start_code + code_offset;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||
if (eptr == md->end_match_ptr) /* Matched an empty string */
|
||||
{
|
||||
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||
break;
|
||||
}
|
||||
eptr = md->end_match_ptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1379,6 +1377,7 @@ for (;;)
|
||||
break;
|
||||
|
||||
case OP_DEF: /* DEFINE - always false */
|
||||
case OP_FAIL: /* From optimized (?!) condition */
|
||||
break;
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
@@ -1395,8 +1394,11 @@ for (;;)
|
||||
condition = TRUE;
|
||||
|
||||
/* Advance ecode past the assertion to the start of the first branch,
|
||||
but adjust it so that the general choosing code below works. */
|
||||
but adjust it so that the general choosing code below works. If the
|
||||
assertion has a quantifier that allows zero repeats we must skip over
|
||||
the BRAZERO. This is a lunatic thing to do, but somebody did! */
|
||||
|
||||
if (*ecode == OP_BRAZERO) ecode++;
|
||||
ecode += GET(ecode, 1);
|
||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
||||
@@ -1465,7 +1467,18 @@ for (;;)
|
||||
md->offset_vector[offset] =
|
||||
md->offset_vector[md->offset_end - number];
|
||||
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
|
||||
if (offset_top <= offset) offset_top = offset + 2;
|
||||
|
||||
/* If this group is at or above the current highwater mark, ensure that
|
||||
any groups between the current high water mark and this group are marked
|
||||
unset and then update the high water mark. */
|
||||
|
||||
if (offset >= offset_top)
|
||||
{
|
||||
register int *iptr = md->offset_vector + offset_top;
|
||||
register int *iend = md->offset_vector + offset;
|
||||
while (iptr < iend) *iptr++ = -1;
|
||||
offset_top = offset + 2;
|
||||
}
|
||||
}
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
break;
|
||||
@@ -1817,7 +1830,11 @@ for (;;)
|
||||
are defined in a range that can be tested for. */
|
||||
|
||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||
{
|
||||
if (new_recursive.offset_save != stacksave)
|
||||
(PUBL(free))(new_recursive.offset_save);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
/* Any return code other than NOMATCH is an error. */
|
||||
|
||||
@@ -1980,6 +1997,19 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
||||
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
||||
at a time from the outer level, thus saving stack. This must precede the
|
||||
empty string test - in this case that test is done at the outer level. */
|
||||
|
||||
if (*ecode == OP_KETRPOS)
|
||||
{
|
||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||
md->end_match_ptr = eptr;
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
}
|
||||
|
||||
/* For an ordinary non-repeating ket, just continue at this level. This
|
||||
also happens for a repeating ket if no characters were matched in the
|
||||
group. This is the forcible breaking of infinite loops as implemented in
|
||||
@@ -2002,18 +2032,6 @@ for (;;)
|
||||
break;
|
||||
}
|
||||
|
||||
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
||||
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
||||
at a time from the outer level, thus saving stack. */
|
||||
|
||||
if (*ecode == OP_KETRPOS)
|
||||
{
|
||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||
md->end_match_ptr = eptr;
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
}
|
||||
|
||||
/* The normal repeating kets try the rest of the pattern or restart from
|
||||
the preceding bracket, in the appropriate order. In the second case, we can
|
||||
use tail recursion to avoid using another stack frame, unless we have an
|
||||
@@ -3466,7 +3484,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
#ifdef SUPPORT_UCP
|
||||
@@ -3887,7 +3905,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@@ -4022,7 +4040,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@@ -5593,7 +5611,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@@ -5635,12 +5653,17 @@ for (;;)
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
/* We use <= pp rather than == pp to detect the start of the run while
|
||||
backtracking because the use of \C in UTF mode can cause BACKCHAR to
|
||||
move back past pp. This is just palliative; the use of \C in UTF mode
|
||||
is fraught with danger. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
int lgb, rgb;
|
||||
PCRE_PUCHAR fptr;
|
||||
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
|
||||
@@ -5658,7 +5681,7 @@ for (;;)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
fptr = eptr - 1;
|
||||
if (!utf) c = *fptr; else
|
||||
{
|
||||
@@ -5682,54 +5705,25 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if (max < INT_MAX)
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle unlimited UTF-8 repeat */
|
||||
|
||||
else
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
if (IS_NEWLINE(eptr)) break;
|
||||
if (md->partial != 0 && /* Take care with CRLF partial */
|
||||
eptr + 1 >= md->end_subject &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
UCHAR21(eptr) == NLBLOCK->nl[0])
|
||||
{
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
md->hitend = TRUE;
|
||||
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
|
||||
}
|
||||
eptr++;
|
||||
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -5937,7 +5931,7 @@ for (;;)
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
if (eptr <= pp) goto TAIL_RECURSE;
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
eptr--;
|
||||
@@ -6520,7 +6514,7 @@ tables = re->tables;
|
||||
|
||||
if (extra_data != NULL)
|
||||
{
|
||||
register unsigned int flags = extra_data->flags;
|
||||
unsigned long int flags = extra_data->flags;
|
||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
|
||||
@@ -6692,7 +6686,8 @@ if (md->offset_vector != NULL)
|
||||
register int *iend = iptr - re->top_bracket;
|
||||
if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
|
||||
while (--iptr >= iend) *iptr = -1;
|
||||
md->offset_vector[0] = md->offset_vector[1] = -1;
|
||||
if (offsetcount > 0) md->offset_vector[0] = -1;
|
||||
if (offsetcount > 1) md->offset_vector[1] = -1;
|
||||
}
|
||||
|
||||
/* Set up the first character to match, if available. The first_char value is
|
||||
|
||||
Reference in New Issue
Block a user