Updated to PCRE version 8.41

Testing Done: Built on Windows OS for all configurations.
2025-11-25 06:36:37 +01:00 · 2017-12-01 10:19:10 -05:00
parent 2cac517034
commit e9b2ba9b35
8 changed files with 556 additions and 500 deletions
--- a/Foundation/src/pcre.h
+++ b/Foundation/src/pcre.h
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE_MAJOR          8
-#define PCRE_MINOR          40
-#define PCRE_PRERELEASE     
-#define PCRE_DATE           2017-01-11
+#define PCRE_MINOR          41
+#define PCRE_PRERELEASE
+#define PCRE_DATE           2017-07-05

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -323,7 +323,7 @@ these bits, just add new ones on the end, in order to remain compatible. */

 struct real_pcre8_or_16;                 /* declaration; the definition is private  */
 typedef struct real_pcre8_or_16 pcre;
-    
+
 struct real_pcre8_or_16;               /* declaration; the definition is private  */
 typedef struct real_pcre8_or_16 pcre16;

--- a/Foundation/src/pcre_compile.c
+++ b/Foundation/src/pcre_compile.c
@@ -5741,6 +5741,21 @@ for (;; ptr++)
      ptr = p - 1;    /* Character before the next significant one. */
      }

+    /* We also need to skip over (?# comments, which are not dependent on
+    extended mode. */
+
+    if (ptr[1] == CHAR_LEFT_PARENTHESIS && ptr[2] == CHAR_QUESTION_MARK &&
+        ptr[3] == CHAR_NUMBER_SIGN)
+      {
+      ptr += 4;
+      while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+      if (*ptr == CHAR_NULL)
+        {
+        *errorcodeptr = ERR18;
+        goto FAILED;
+        }
+      }
+
    /* If the next character is '+', we have a possessive quantifier. This
    implies greediness, whatever the setting of the PCRE_UNGREEDY option.
    If the next character is '?' this is a minimizing repeat, by default,
@@ -8212,7 +8227,6 @@ for (;; ptr++)

      if (mclength == 1 || req_caseopt == 0)
        {
-        firstchar = mcbuffer[0] | req_caseopt;
        firstchar = mcbuffer[0];
        firstcharflags = req_caseopt;

@@ -9781,4 +9795,3 @@ return (pcre32 *)re;
 }

 /* End of pcre_compile.c */
-
--- a/Foundation/src/pcre_dfa_exec.c
+++ b/Foundation/src/pcre_dfa_exec.c
@@ -2624,7 +2624,7 @@ for (;;)
          if (isinclass)
            {
            int max = (int)GET2(ecode, 1 + IMM2_SIZE);
-            if (*ecode == OP_CRPOSRANGE)
+            if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
              {
              active_count--;           /* Remove non-match possibility */
              next_active_state--;
--- a/Foundation/src/pcre_exec.c
+++ b/Foundation/src/pcre_exec.c
@@ -670,7 +670,7 @@ if (ecode == NULL)
    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
  else
    {
-    int len = (char *)&rdepth - (char *)eptr;
+    int len = (int)((char *)&rdepth - (char *)eptr);
    return (len > 0)? -len : len;
    }
  }
--- a/Foundation/src/pcre_internal.h
+++ b/Foundation/src/pcre_internal.h
@@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not
 have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
 by "configure". */

-#if HAVE_STDINT_H
+#if defined HAVE_STDINT_H
 #include <stdint.h>
-#elif HAVE_INTTYPES_H
+#elif defined HAVE_INTTYPES_H
 #include <inttypes.h>
 #endif

@@ -2772,6 +2772,9 @@ extern const pcre_uint8  PRIV(ucd_stage1)[];
 extern const pcre_uint16 PRIV(ucd_stage2)[];
 extern const pcre_uint32 PRIV(ucp_gentype)[];
 extern const pcre_uint32 PRIV(ucp_gbtable)[];
+#ifdef COMPILE_PCRE32
+extern const ucd_record  PRIV(dummy_ucd_record)[];
+#endif
 #ifdef SUPPORT_JIT
 extern const int         PRIV(ucp_typerange)[];
 #endif
@@ -2780,10 +2783,16 @@ extern const int         PRIV(ucp_typerange)[];
 /* UCD access macros */

 #define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (PRIV(ucd_records) + \
+#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
        UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])

+#ifdef COMPILE_PCRE32
+#define GET_UCD(ch) ((ch > 0x10ffff)? PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
+#else
+#define GET_UCD(ch) REAL_GET_UCD(ch)
+#endif
+
 #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
 #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
 #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
--- a/Foundation/src/pcre_jit_compile.c
+++ b/Foundation/src/pcre_jit_compile.c
--- a/Foundation/src/pcre_tables.c
+++ b/Foundation/src/pcre_tables.c
@@ -157,7 +157,7 @@ const pcre_uint32 PRIV(ucp_gbtable[]) = {

   (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark),                /*  5 SpacingMark */
   (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|   /*  6 L */
-     (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
+     (1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),

   (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)|   /*  7 V */
     (1<<ucp_gbT),
--- a/Foundation/src/pcre_ucd.c
+++ b/Foundation/src/pcre_ucd.c
@@ -35,6 +35,20 @@ const pcre_uint16 PRIV(ucd_stage2)[] = {0};
 const pcre_uint32 PRIV(ucd_caseless_sets)[] = {0};
 #else

+/* If the 32-bit library is run in non-32-bit mode, character values
+greater than 0x10ffff may be encountered. For these we set up a
+special record. */
+
+#ifdef COMPILE_PCRE32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+  ucp_Common,    /* script */
+  ucp_Cn,        /* type unassigned */
+  ucp_gbOther,   /* grapheme break property */
+  0,             /* case set */
+  0,             /* other case */
+  }};
+#endif
+
 /* When recompiling tables with a new Unicode version, please check the
 types in this structure definition from pcre_internal.h (the actual
 field names will be different):