Updated to PCRE version 8.41

Testing Done: Built on Windows OS for all configurations.
This commit is contained in:
Shahzad 2017-12-01 10:19:10 -05:00
parent d962ffc81b
commit e091894209
8 changed files with 558 additions and 502 deletions

View File

@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 40
#define PCRE_PRERELEASE
#define PCRE_DATE 2017-01-11
#define PCRE_MINOR 41
#define PCRE_PRERELEASE
#define PCRE_DATE 2017-07-05
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@ -323,7 +323,7 @@ these bits, just add new ones on the end, in order to remain compatible. */
struct real_pcre8_or_16; /* declaration; the definition is private */
typedef struct real_pcre8_or_16 pcre;
struct real_pcre8_or_16; /* declaration; the definition is private */
typedef struct real_pcre8_or_16 pcre16;

View File

@ -5741,6 +5741,21 @@ for (;; ptr++)
ptr = p - 1; /* Character before the next significant one. */
}
/* We also need to skip over (?# comments, which are not dependent on
extended mode. */
if (ptr[1] == CHAR_LEFT_PARENTHESIS && ptr[2] == CHAR_QUESTION_MARK &&
ptr[3] == CHAR_NUMBER_SIGN)
{
ptr += 4;
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr == CHAR_NULL)
{
*errorcodeptr = ERR18;
goto FAILED;
}
}
/* If the next character is '+', we have a possessive quantifier. This
implies greediness, whatever the setting of the PCRE_UNGREEDY option.
If the next character is '?' this is a minimizing repeat, by default,
@ -8212,7 +8227,6 @@ for (;; ptr++)
if (mclength == 1 || req_caseopt == 0)
{
firstchar = mcbuffer[0] | req_caseopt;
firstchar = mcbuffer[0];
firstcharflags = req_caseopt;
@ -9781,4 +9795,3 @@ return (pcre32 *)re;
}
/* End of pcre_compile.c */

View File

@ -2624,7 +2624,7 @@ for (;;)
if (isinclass)
{
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (*ecode == OP_CRPOSRANGE)
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
{
active_count--; /* Remove non-match possibility */
next_active_state--;

View File

@ -670,7 +670,7 @@ if (ecode == NULL)
return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
else
{
int len = (char *)&rdepth - (char *)eptr;
int len = (int)((char *)&rdepth - (char *)eptr);
return (len > 0)? -len : len;
}
}
@ -1041,7 +1041,7 @@ for (;;)
the result of a recursive call to match() whatever happened so it was
possible to reduce stack usage by turning this into a tail recursion,
except in the case of a possibly empty group. However, now that there is
the possibility of (*THEN) occurring in the final alternative, this
the possiblity of (*THEN) occurring in the final alternative, this
optimization is no longer always possible.
We can optimize if we know there are no (*THEN)s in the pattern; at present

View File

@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
by "configure". */
#if HAVE_STDINT_H
#if defined HAVE_STDINT_H
#include <stdint.h>
#elif HAVE_INTTYPES_H
#elif defined HAVE_INTTYPES_H
#include <inttypes.h>
#endif
@ -2772,6 +2772,9 @@ extern const pcre_uint8 PRIV(ucd_stage1)[];
extern const pcre_uint16 PRIV(ucd_stage2)[];
extern const pcre_uint32 PRIV(ucp_gentype)[];
extern const pcre_uint32 PRIV(ucp_gbtable)[];
#ifdef COMPILE_PCRE32
extern const ucd_record PRIV(dummy_ucd_record)[];
#endif
#ifdef SUPPORT_JIT
extern const int PRIV(ucp_typerange)[];
#endif
@ -2780,10 +2783,16 @@ extern const int PRIV(ucp_typerange)[];
/* UCD access macros */
#define UCD_BLOCK_SIZE 128
#define GET_UCD(ch) (PRIV(ucd_records) + \
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
#ifdef COMPILE_PCRE32
#define GET_UCD(ch) ((ch > 0x10ffff)? PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
#else
#define GET_UCD(ch) REAL_GET_UCD(ch)
#endif
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]

File diff suppressed because it is too large Load Diff

View File

@ -157,7 +157,7 @@ const pcre_uint32 PRIV(ucp_gbtable[]) = {
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
(1<<ucp_gbT),

View File

@ -35,6 +35,20 @@ const pcre_uint16 PRIV(ucd_stage2)[] = {0};
const pcre_uint32 PRIV(ucd_caseless_sets)[] = {0};
#else
/* If the 32-bit library is run in non-32-bit mode, character values
greater than 0x10ffff may be encountered. For these we set up a
special record. */
#ifdef COMPILE_PCRE32
const ucd_record PRIV(dummy_ucd_record)[] = {{
ucp_Common, /* script */
ucp_Cn, /* type unassigned */
ucp_gbOther, /* grapheme break property */
0, /* case set */
0, /* other case */
}};
#endif
/* When recompiling tables with a new Unicode version, please check the
types in this structure definition from pcre_internal.h (the actual
field names will be different):