GH #1586: Upgrade bundled PCRE to 8.40

2025-01-19 00:46:03 +01:00 · 2017-02-11 12:04:36 +01:00 · 2017-02-11 12:04:36 +01:00 · 7d91a4bc94
commit 7d91a4bc94
parent f8a0bbff1b
14 changed files with 6596 additions and 4713 deletions
--- a/Foundation/include/Poco/Unicode.h
+++ b/Foundation/include/Poco/Unicode.h
@ -40,7 +40,7 @@ public:
 	// Implementation note: the following definitions must be kept
 	// in sync with those from ucp.h (PCRE).
 	enum CharacterCategory
-		/// Unicode 5.0 character categories.
+		/// Unicode character categories.
 	{
 		UCP_OTHER,
 		UCP_LETTER,
@ -52,7 +52,7 @@ public:
 	};

 	enum CharacterType
-		/// Unicode 5.0 character types.
+		/// Unicode character types.
 	{
 		UCP_CONTROL,
 		UCP_FORMAT,
@ -87,7 +87,7 @@ public:
 	};
 	
 	enum Script
-		/// Unicode 5.0 scripts.
+		/// Unicode 7.0 script identifiers.
 	{
 		UCP_ARABIC,
 		UCP_ARMENIAN,
@ -150,11 +150,13 @@ public:
 		UCP_TIFINAGH,
 		UCP_UGARITIC,
 		UCP_YI,
+		// Unicode 5.0
 		UCP_BALINESE,
 		UCP_CUNEIFORM,
 		UCP_NKO,
 		UCP_PHAGS_PA,
 		UCP_PHOENICIAN,
+		// Unicode 5.1
 		UCP_CARIAN,
 		UCP_CHAM,
 		UCP_KAYAH_LI,
@ -165,7 +167,59 @@ public:
 		UCP_REJANG,
 		UCP_SAURASHTRA,
 		UCP_SUNDANESE,
-		UCP_VAI
+		UCP_VAI,
+		// Unicode 5.2
+		UCP_AVESTAN,
+		UCP_BAMUM,
+		UCP_EGYPTIAN_HIEROGLYPHS,
+		UCP_IMPERIAL_ARAMAIC,
+		UCP_INSCRIPTIONAL_PAHLAVI,
+		UCP_INSCRIPTIONAL_PARTHIAN,
+		UCP_JAVANESE,
+		UCP_KAITHI,
+		UCP_LISU,
+		UCP_MEETEI_MAYEK,
+		UCP_OLD_SOUTH_ARABIAN,
+		UCP_OLD_TURKIC,
+		UCP_SAMARITAN,
+		UCP_TAI_THAM,
+		UCP_TAI_VIET,
+		// Unicode 6.0
+		UCP_BATAK,
+		UCP_BRAHMI,
+		UCP_MANDAIC,
+		// Unicode 6.1
+		UCP_CHAKMA,
+		UCP_MEROITIC_CURSIVE,
+		UCP_MEROITIC_HIEROGLYPHS,
+		UCP_MIAO,
+		UCP_SHARADA,
+		UCP_SORA_SOMPENG,
+		UCP_TAKRI,
+		// Unicode 7.0
+		UCP_BASSA_VAH,
+		UCP_CAUCASIAN_ALBANIAN,
+		UCP_DUPLOYAN,
+		UCP_ELBASAN,
+		UCP_GRANTHA,
+		UCP_KHOJKI,
+		UCP_KHUDAWADI,
+		UCP_LINEAR_A,
+		UCP_MAHAJANI,
+		UCP_MANICHAEAN,
+		UCP_MENDE_KIKAKUI,
+		UCP_MODI,
+		UCP_MRO,
+		UCP_NABATAEAN,
+		UCP_OLD_NORTH_ARABIAN,
+		UCP_OLD_PERMIC,
+		UCP_PAHAWH_HMONG,
+		UCP_PALMYRENE,
+		UCP_PSALTER_PAHLAVI,
+		UCP_PAU_CIN_HAU,
+		UCP_SIDDHAM,
+		UCP_TIRHUTA,
+		UCP_WARANG_CITI
 	};
 	
 	enum
--- a/Foundation/src/pcre.h
+++ b/Foundation/src/pcre.h
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE_MAJOR          8
-#define PCRE_MINOR          35
+#define PCRE_MINOR          40
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2014-04-04
+#define PCRE_DATE           2017-01-11

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
--- a/Foundation/src/pcre_compile.c
+++ b/Foundation/src/pcre_compile.c
--- a/Foundation/src/pcre_config.h
+++ b/Foundation/src/pcre_config.h
@ -283,7 +283,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE 8.35"
+#define PACKAGE_STRING "PCRE 8.40"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre"
@ -292,7 +292,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "8.35"
+#define PACKAGE_VERSION "8.40"

 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
   parentheses (of any kind) in a pattern. This limits the amount of system
@ -394,7 +394,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* #undef SUPPORT_VALGRIND */

 /* Version number of package */
-#define VERSION "8.35"
+#define VERSION "8.40"

 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
--- a/Foundation/src/pcre_dfa_exec.c
+++ b/Foundation/src/pcre_dfa_exec.c
@ -2735,9 +2735,10 @@ for (;;)
            condcode == OP_DNRREF)
          return PCRE_ERROR_DFA_UCOND;

-        /* The DEFINE condition is always false */
+        /* The DEFINE condition is always false, and the assertion (?!) is
+        converted to OP_FAIL. */

-        if (condcode == OP_DEF)
+        if (condcode == OP_DEF || condcode == OP_FAIL)
          { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }

        /* The only supported version of OP_RREF is for the value RREF_ANY,
@ -3241,7 +3242,7 @@ md->callout_data = NULL;

 if (extra_data != NULL)
  {
-  unsigned int flags = extra_data->flags;
+  unsigned long int flags = extra_data->flags;
  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
    study = (const pcre_study_data *)extra_data->study_data;
  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
--- a/Foundation/src/pcre_exec.c
+++ b/Foundation/src/pcre_exec.c
@ -1137,88 +1137,81 @@ for (;;)
    printf("\n");
 #endif

-    if (offset < md->offset_max)
+    if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
+
+    matched_once = FALSE;
+    code_offset = (int)(ecode - md->start_code);
+
+    save_offset1 = md->offset_vector[offset];
+    save_offset2 = md->offset_vector[offset+1];
+    save_offset3 = md->offset_vector[md->offset_end - number];
+    save_capture_last = md->capture_last;
+
+    DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+
+    /* Each time round the loop, save the current subject position for use
+    when the group matches. For MATCH_MATCH, the group has matched, so we
+    restart it with a new subject starting position, remembering that we had
+    at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
+    usual. If we haven't matched any alternatives in any iteration, check to
+    see if a previous iteration matched. If so, the group has matched;
+    continue from afterwards. Otherwise it has failed; restore the previous
+    capture values before returning NOMATCH. */
+
+    for (;;)
      {
-      matched_once = FALSE;
-      code_offset = (int)(ecode - md->start_code);
-
-      save_offset1 = md->offset_vector[offset];
-      save_offset2 = md->offset_vector[offset+1];
-      save_offset3 = md->offset_vector[md->offset_end - number];
-      save_capture_last = md->capture_last;
-
-      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
-
-      /* Each time round the loop, save the current subject position for use
-      when the group matches. For MATCH_MATCH, the group has matched, so we
-      restart it with a new subject starting position, remembering that we had
-      at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
-      usual. If we haven't matched any alternatives in any iteration, check to
-      see if a previous iteration matched. If so, the group has matched;
-      continue from afterwards. Otherwise it has failed; restore the previous
-      capture values before returning NOMATCH. */
-
-      for (;;)
+      md->offset_vector[md->offset_end - number] =
+        (int)(eptr - md->start_subject);
+      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
+        eptrb, RM63);
+      if (rrc == MATCH_KETRPOS)
        {
-        md->offset_vector[md->offset_end - number] =
-          (int)(eptr - md->start_subject);
-        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
-          eptrb, RM63);
-        if (rrc == MATCH_KETRPOS)
+        offset_top = md->end_offset_top;
+        ecode = md->start_code + code_offset;
+        save_capture_last = md->capture_last;
+        matched_once = TRUE;
+        mstart = md->start_match_ptr;    /* In case \K changed it */
+        if (eptr == md->end_match_ptr)   /* Matched an empty string */
          {
-          offset_top = md->end_offset_top;
-          eptr = md->end_match_ptr;
-          ecode = md->start_code + code_offset;
-          save_capture_last = md->capture_last;
-          matched_once = TRUE;
-          mstart = md->start_match_ptr;    /* In case \K changed it */
-          continue;
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+          break;
          }
-
-        /* See comment in the code for capturing groups above about handling
-        THEN. */
-
-        if (rrc == MATCH_THEN)
-          {
-          next = ecode + GET(ecode,1);
-          if (md->start_match_ptr < next &&
-              (*ecode == OP_ALT || *next == OP_ALT))
-            rrc = MATCH_NOMATCH;
-          }
-
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        md->capture_last = save_capture_last;
-        ecode += GET(ecode, 1);
-        if (*ecode != OP_ALT) break;
+        eptr = md->end_match_ptr;
+        continue;
        }

-      if (!matched_once)
+      /* See comment in the code for capturing groups above about handling
+      THEN. */
+
+      if (rrc == MATCH_THEN)
        {
-        md->offset_vector[offset] = save_offset1;
-        md->offset_vector[offset+1] = save_offset2;
-        md->offset_vector[md->offset_end - number] = save_offset3;
+        next = ecode + GET(ecode,1);
+        if (md->start_match_ptr < next &&
+            (*ecode == OP_ALT || *next == OP_ALT))
+          rrc = MATCH_NOMATCH;
        }

-      if (allow_zero || matched_once)
-        {
-        ecode += 1 + LINK_SIZE;
-        break;
-        }
-
-      RRETURN(MATCH_NOMATCH);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      md->capture_last = save_capture_last;
+      ecode += GET(ecode, 1);
+      if (*ecode != OP_ALT) break;
      }

-    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
-    as a non-capturing bracket. */
+    if (!matched_once)
+      {
+      md->offset_vector[offset] = save_offset1;
+      md->offset_vector[offset+1] = save_offset2;
+      md->offset_vector[md->offset_end - number] = save_offset3;
+      }

-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    if (allow_zero || matched_once)
+      {
+      ecode += 1 + LINK_SIZE;
+      break;
+      }

-    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
-
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    RRETURN(MATCH_NOMATCH);

    /* Non-capturing possessive bracket with unlimited repeat. We come here
    from BRAZERO with allow_zero = TRUE. The code is similar to the above,
@ -1242,10 +1235,15 @@ for (;;)
      if (rrc == MATCH_KETRPOS)
        {
        offset_top = md->end_offset_top;
-        eptr = md->end_match_ptr;
        ecode = md->start_code + code_offset;
        matched_once = TRUE;
        mstart = md->start_match_ptr;   /* In case \K reset it */
+        if (eptr == md->end_match_ptr)  /* Matched an empty string */
+          {
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+          break;
+          }
+        eptr = md->end_match_ptr;
        continue;
        }

@ -1379,6 +1377,7 @@ for (;;)
      break;

      case OP_DEF:     /* DEFINE - always false */
+      case OP_FAIL:    /* From optimized (?!) condition */
      break;

      /* The condition is an assertion. Call match() to evaluate it - setting
@ -1395,8 +1394,11 @@ for (;;)
        condition = TRUE;

        /* Advance ecode past the assertion to the start of the first branch,
-        but adjust it so that the general choosing code below works. */
+        but adjust it so that the general choosing code below works. If the
+        assertion has a quantifier that allows zero repeats we must skip over
+        the BRAZERO. This is a lunatic thing to do, but somebody did! */

+        if (*ecode == OP_BRAZERO) ecode++;
        ecode += GET(ecode, 1);
        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
        ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
@ -1465,7 +1467,18 @@ for (;;)
      md->offset_vector[offset] =
        md->offset_vector[md->offset_end - number];
      md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
-      if (offset_top <= offset) offset_top = offset + 2;
+
+      /* If this group is at or above the current highwater mark, ensure that
+      any groups between the current high water mark and this group are marked
+      unset and then update the high water mark. */
+
+      if (offset >= offset_top)
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + offset;
+        while (iptr < iend) *iptr++ = -1;
+        offset_top = offset + 2;
+        }
      }
    ecode += 1 + IMM2_SIZE;
    break;
@ -1817,7 +1830,11 @@ for (;;)
        are defined in a range that can be tested for. */

        if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+          {
+          if (new_recursive.offset_save != stacksave)
+            (PUBL(free))(new_recursive.offset_save);
          RRETURN(MATCH_NOMATCH);
+          }

        /* Any return code other than NOMATCH is an error. */

@ -1980,6 +1997,19 @@ for (;;)
        }
      }

+    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+    at a time from the outer level, thus saving stack. This must precede the
+    empty string test - in this case that test is done at the outer level. */
+
+    if (*ecode == OP_KETRPOS)
+      {
+      md->start_match_ptr = mstart;    /* In case \K reset it */
+      md->end_match_ptr = eptr;
+      md->end_offset_top = offset_top;
+      RRETURN(MATCH_KETRPOS);
+      }
+
    /* For an ordinary non-repeating ket, just continue at this level. This
    also happens for a repeating ket if no characters were matched in the
    group. This is the forcible breaking of infinite loops as implemented in
@ -2002,18 +2032,6 @@ for (;;)
      break;
      }

-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
-    at a time from the outer level, thus saving stack. */
-
-    if (*ecode == OP_KETRPOS)
-      {
-      md->start_match_ptr = mstart;    /* In case \K reset it */
-      md->end_match_ptr = eptr;
-      md->end_offset_top = offset_top;
-      RRETURN(MATCH_KETRPOS);
-      }
-
    /* The normal repeating kets try the rest of the pattern or restart from
    the preceding bracket, in the appropriate order. In the second case, we can
    use tail recursion to avoid using another stack frame, unless we have an
@ -3466,7 +3484,7 @@ for (;;)
          if (possessive) continue;    /* No backtracking */
          for(;;)
            {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 #ifdef SUPPORT_UCP
@ -3887,7 +3905,7 @@ for (;;)
          if (possessive) continue;    /* No backtracking */
          for(;;)
            {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            eptr--;
@ -4022,7 +4040,7 @@ for (;;)
          if (possessive) continue;    /* No backtracking */
          for(;;)
            {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            eptr--;
@ -5593,7 +5611,7 @@ for (;;)
        if (possessive) continue;    /* No backtracking */
        for(;;)
          {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
          eptr--;
@ -5635,12 +5653,17 @@ for (;;)

        if (possessive) continue;    /* No backtracking */

+        /* We use <= pp rather than == pp to detect the start of the run while
+        backtracking because the use of \C in UTF mode can cause BACKCHAR to
+        move back past pp. This is just palliative; the use of \C in UTF mode
+        is fraught with danger. */
+
        for(;;)
          {
          int lgb, rgb;
          PCRE_PUCHAR fptr;

-          if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);

@ -5658,7 +5681,7 @@ for (;;)

          for (;;)
            {
-            if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
            fptr = eptr - 1;
            if (!utf) c = *fptr; else
              {
@ -5682,54 +5705,25 @@ for (;;)
        switch(ctype)
          {
          case OP_ANY:
-          if (max < INT_MAX)
+          for (i = min; i < max; i++)
            {
-            for (i = min; i < max; i++)
+            if (eptr >= md->end_subject)
              {
-              if (eptr >= md->end_subject)
-                {
-                SCHECK_PARTIAL();
-                break;
-                }
-              if (IS_NEWLINE(eptr)) break;
-              if (md->partial != 0 &&    /* Take care with CRLF partial */
-                  eptr + 1 >= md->end_subject &&
-                  NLBLOCK->nltype == NLTYPE_FIXED &&
-                  NLBLOCK->nllen == 2 &&
-                  UCHAR21(eptr) == NLBLOCK->nl[0])
-                {
-                md->hitend = TRUE;
-                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
-                }
-              eptr++;
-              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+              SCHECK_PARTIAL();
+              break;
              }
-            }
-
-          /* Handle unlimited UTF-8 repeat */
-
-          else
-            {
-            for (i = min; i < max; i++)
+            if (IS_NEWLINE(eptr)) break;
+            if (md->partial != 0 &&    /* Take care with CRLF partial */
+                eptr + 1 >= md->end_subject &&
+                NLBLOCK->nltype == NLTYPE_FIXED &&
+                NLBLOCK->nllen == 2 &&
+                UCHAR21(eptr) == NLBLOCK->nl[0])
              {
-              if (eptr >= md->end_subject)
-                {
-                SCHECK_PARTIAL();
-                break;
-                }
-              if (IS_NEWLINE(eptr)) break;
-              if (md->partial != 0 &&    /* Take care with CRLF partial */
-                  eptr + 1 >= md->end_subject &&
-                  NLBLOCK->nltype == NLTYPE_FIXED &&
-                  NLBLOCK->nllen == 2 &&
-                  UCHAR21(eptr) == NLBLOCK->nl[0])
-                {
-                md->hitend = TRUE;
-                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
-                }
-              eptr++;
-              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+              md->hitend = TRUE;
+              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
              }
+            eptr++;
+            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
            }
          break;

@ -5937,7 +5931,7 @@ for (;;)
        if (possessive) continue;    /* No backtracking */
        for(;;)
          {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
          eptr--;
@ -6520,7 +6514,7 @@ tables = re->tables;

 if (extra_data != NULL)
  {
-  register unsigned int flags = extra_data->flags;
+  unsigned long int flags = extra_data->flags;
  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
    study = (const pcre_study_data *)extra_data->study_data;
  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
@ -6692,7 +6686,8 @@ if (md->offset_vector != NULL)
  register int *iend = iptr - re->top_bracket;
  if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
  while (--iptr >= iend) *iptr = -1;
-  md->offset_vector[0] = md->offset_vector[1] = -1;
+  if (offsetcount > 0) md->offset_vector[0] = -1;
+  if (offsetcount > 1) md->offset_vector[1] = -1;
  }

 /* Set up the first character to match, if available. The first_char value is
--- a/Foundation/src/pcre_get.c
+++ b/Foundation/src/pcre_get.c
@ -247,6 +247,7 @@ Arguments:
  code         the compiled regex
  stringname   the name of the capturing substring
  ovector      the vector of matched substrings
+  stringcount  number of captured substrings

 Returns:       the number of the first that is set,
               or the number of the last one if none are set,
@ -255,13 +256,16 @@ Returns:       the number of the first that is set,

 #if defined COMPILE_PCRE8
 static int
-get_first_set(const pcre *code, const char *stringname, int *ovector)
+get_first_set(const pcre *code, const char *stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE16
 static int
-get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE32
 static int
-get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
+get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
+  int stringcount)
 #endif
 {
 const REAL_PCRE *re = (const REAL_PCRE *)code;
@ -292,7 +296,7 @@ if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
  {
  int n = GET2(entry, 0);
-  if (ovector[n*2] >= 0) return n;
+  if (n < stringcount && ovector[n*2] >= 0) return n;
  }
 return GET2(entry, 0);
 }
@ -399,7 +403,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
  PCRE_UCHAR32 *buffer, int size)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
@ -454,7 +458,10 @@ pcre_uchar **stringlist;
 pcre_uchar *p;

 for (i = 0; i < double_count; i += 2)
-  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
+  {
+  size += sizeof(pcre_uchar *) + IN_UCHARS(1);
+  if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
+  }

 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
@ -470,7 +477,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);

 for (i = 0; i < double_count; i += 2)
  {
-  int len = ovector[i+1] - ovector[i];
+  int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
  memcpy(p, subject + ovector[i], IN_UCHARS(len));
  *stringlist++ = p;
  p += len;
@ -616,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
  PCRE_SPTR32 *stringptr)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
--- a/Foundation/src/pcre_internal.h
+++ b/Foundation/src/pcre_internal.h
@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2014 University of Cambridge
+           Copyright (c) 1997-2016 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not
 have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
 by "configure". */

-#if HAVE_STDINT_H
+#if defined HAVE_STDINT_H
 #include <stdint.h>
-#elif HAVE_INTTYPES_H
+#elif defined HAVE_INTTYPES_H
 #include <inttypes.h>
 #endif

@ -275,7 +275,7 @@ pcre.h(.in) and disable (comment out) this message. */

 typedef pcre_uint16 pcre_uchar;
 #define UCHAR_SHIFT (1)
-#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
+#define IN_UCHARS(x) ((x) * 2)
 #define MAX_255(c) ((c) <= 255u)
 #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))

@ -283,7 +283,7 @@ typedef pcre_uint16 pcre_uchar;

 typedef pcre_uint32 pcre_uchar;
 #define UCHAR_SHIFT (2)
-#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
+#define IN_UCHARS(x) ((x) * 4)
 #define MAX_255(c) ((c) <= 255u)
 #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))

@ -984,7 +984,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
 #ifndef EBCDIC

 #define HSPACE_LIST \
-  CHAR_HT, CHAR_SPACE, 0xa0, \
+  CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
  0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
  0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
  NOTACHAR
@ -1010,7 +1010,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
 #define HSPACE_BYTE_CASES \
  case CHAR_HT: \
  case CHAR_SPACE: \
-  case 0xa0     /* NBSP */
+  case CHAR_NBSP

 #define HSPACE_CASES \
  HSPACE_BYTE_CASES: \
@ -1037,11 +1037,12 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
 /* ------ EBCDIC environments ------ */

 #else
-#define HSPACE_LIST CHAR_HT, CHAR_SPACE
+#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR

 #define HSPACE_BYTE_CASES \
  case CHAR_HT: \
-  case CHAR_SPACE
+  case CHAR_SPACE: \
+  case CHAR_NBSP

 #define HSPACE_CASES HSPACE_BYTE_CASES

@ -1215,6 +1216,7 @@ same code point. */

 #define CHAR_ESC                    '\047'
 #define CHAR_DEL                    '\007'
+#define CHAR_NBSP                   '\x41'
 #define STR_ESC                     "\047"
 #define STR_DEL                     "\007"

@ -1229,6 +1231,7 @@ a positive value. */
 #define CHAR_NEL                    ((unsigned char)'\x85')
 #define CHAR_ESC                    '\033'
 #define CHAR_DEL                    '\177'
+#define CHAR_NBSP                   ((unsigned char)'\xa0')

 #define STR_LF                      "\n"
 #define STR_NL                      STR_LF
@ -1606,6 +1609,7 @@ only. */
 #define CHAR_VERTICAL_LINE          '\174'
 #define CHAR_RIGHT_CURLY_BRACKET    '\175'
 #define CHAR_TILDE                  '\176'
+#define CHAR_NBSP                   ((unsigned char)'\xa0')

 #define STR_HT                      "\011"
 #define STR_VT                      "\013"
@ -1762,6 +1766,10 @@ only. */

 /* Escape items that are just an encoding of a particular data value. */

+#ifndef ESC_a
+#define ESC_a CHAR_BEL
+#endif
+
 #ifndef ESC_e
 #define ESC_e CHAR_ESC
 #endif
@ -2281,7 +2289,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
-       ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERRCOUNT };
+       ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };

 /* JIT compiling modes. The function list is indexed by them. */

@ -2446,6 +2454,8 @@ typedef struct compile_data {
  BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
  BOOL check_lookbehind;            /* Lookbehinds need later checking */
  BOOL dupnames;                    /* Duplicate names exist */
+  BOOL dupgroups;                   /* Duplicate groups exist: (?| found */
+  BOOL iscondassert;                /* Next assert is a condition */
  int  nltype;                      /* Newline type */
  int  nllen;                       /* Newline string length */
  pcre_uchar nl[4];                 /* Newline string when fixed length */
@ -2459,6 +2469,13 @@ typedef struct branch_chain {
  pcre_uchar *current_branch;
 } branch_chain;

+/* Structure for mutual recursion detection. */
+
+typedef struct recurse_check {
+  struct recurse_check *prev;
+  const pcre_uchar *group;
+} recurse_check;
+
 /* Structure for items in a linked list that represents an explicit recursive
 call within the pattern; used by pcre_exec(). */

--- a/Foundation/src/pcre_jit_compile.c
+++ b/Foundation/src/pcre_jit_compile.c
--- a/Foundation/src/pcre_study.c
+++ b/Foundation/src/pcre_study.c
@ -67,7 +67,8 @@ Arguments:
  code            pointer to start of group (the bracket)
  startcode       pointer to start of the whole pattern's code
  options         the compiling options
-  int             RECURSE depth
+  recurses        chain of recurse_check to catch mutual recursion
+  countptr        pointer to call count (to catch over complexity)

 Returns:   the minimum length
           -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@ -77,15 +78,19 @@ Returns:   the minimum length

 static int
 find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
-  const pcre_uchar *startcode, int options, int recurse_depth)
+  const pcre_uchar *startcode, int options, recurse_check *recurses,
+  int *countptr)
 {
 int length = -1;
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 BOOL had_recurse = FALSE;
+recurse_check this_recurse;
 register int branchlength = 0;
 register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;

+if ((*countptr)++ > 1000) return -1;   /* too complex */
+
 if (*code == OP_CBRA || *code == OP_SCBRA ||
    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;

@ -127,7 +132,7 @@ for (;;)
    case OP_SBRAPOS:
    case OP_ONCE:
    case OP_ONCE_NC:
-    d = find_minlength(re, cc, startcode, options, recurse_depth);
+    d = find_minlength(re, cc, startcode, options, recurses, countptr);
    if (d < 0) return d;
    branchlength += d;
    do cc += GET(cc, 1); while (*cc == OP_ALT);
@ -390,7 +395,7 @@ for (;;)
        ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
        if (cs == NULL) return -2;
        do ce += GET(ce, 1); while (*ce == OP_ALT);
-        if (cc > cs && cc < ce)
+        if (cc > cs && cc < ce)     /* Simple recursion */
          {
          d = 0;
          had_recurse = TRUE;
@ -398,8 +403,23 @@ for (;;)
          }
        else
          {
-          int dd = find_minlength(re, cs, startcode, options, recurse_depth);
-          if (dd < d) d = dd;
+          recurse_check *r = recurses;
+          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+          if (r != NULL)           /* Mutual recursion */
+            {
+            d = 0;
+            had_recurse = TRUE;
+            break;
+            }
+          else
+            {
+            int dd;
+            this_recurse.prev = recurses;
+            this_recurse.group = cs;
+            dd = find_minlength(re, cs, startcode, options, &this_recurse,
+              countptr);
+            if (dd < d) d = dd;
+            }
          }
        slot += re->name_entry_size;
        }
@ -415,14 +435,27 @@ for (;;)
      ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
      if (cs == NULL) return -2;
      do ce += GET(ce, 1); while (*ce == OP_ALT);
-      if (cc > cs && cc < ce)
+      if (cc > cs && cc < ce)    /* Simple recursion */
        {
        d = 0;
        had_recurse = TRUE;
        }
      else
        {
-        d = find_minlength(re, cs, startcode, options, recurse_depth);
+        recurse_check *r = recurses;
+        for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+        if (r != NULL)           /* Mutual recursion */
+          {
+          d = 0;
+          had_recurse = TRUE;
+          }
+        else
+          {
+          this_recurse.prev = recurses;
+          this_recurse.group = cs;
+          d = find_minlength(re, cs, startcode, options, &this_recurse,
+            countptr);
+          }
        }
      }
    else d = 0;
@ -471,12 +504,21 @@ for (;;)
    case OP_RECURSE:
    cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
    do ce += GET(ce, 1); while (*ce == OP_ALT);
-    if ((cc > cs && cc < ce) || recurse_depth > 10)
+    if (cc > cs && cc < ce)    /* Simple recursion */
      had_recurse = TRUE;
    else
      {
-      branchlength += find_minlength(re, cs, startcode, options,
-        recurse_depth + 1);
+      recurse_check *r = recurses;
+      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+      if (r != NULL)           /* Mutual recursion */
+        had_recurse = TRUE;
+      else
+        {
+        this_recurse.prev = recurses;
+        this_recurse.group = cs;
+        branchlength += find_minlength(re, cs, startcode, options,
+          &this_recurse, countptr);
+        }
      }
    cc += 1 + LINK_SIZE;
    break;
@ -860,7 +902,6 @@ do
      case OP_NOTUPTOI:
      case OP_NOT_HSPACE:
      case OP_NOT_VSPACE:
-      case OP_PROP:
      case OP_PRUNE:
      case OP_PRUNE_ARG:
      case OP_RECURSE:
@ -878,6 +919,31 @@ do
      case OP_THEN_ARG:
      return SSB_FAIL;

+      /* A "real" property test implies no starting bits, but the fake property
+      PT_CLIST identifies a list of characters. These lists are short, as they
+      are used for characters with more than one "other case", so there is no
+      point in recognizing them for OP_NOTPROP. */
+
+      case OP_PROP:
+      if (tcode[1] != PT_CLIST) return SSB_FAIL;
+        {
+        const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
+        while ((c = *p++) < NOTACHAR)
+          {
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+          if (utf)
+            {
+            pcre_uchar buff[6];
+            (void)PRIV(ord2utf)(c, buff);
+            c = buff[0];
+            }
+#endif
+          if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
+          }
+        }
+      try_next = FALSE;
+      break;
+
      /* We can ignore word boundary tests. */

      case OP_WORD_BOUNDARY:
@ -1103,24 +1169,17 @@ do
      try_next = FALSE;
      break;

-      /* The cbit_space table has vertical tab as whitespace; we have to
-      ensure it is set as not whitespace. Luckily, the code value is the same
-      (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
+      /* The cbit_space table has vertical tab as whitespace; we no longer
+      have to play fancy tricks because Perl added VT to its whitespace at
+      release 5.18. PCRE added it at release 8.34. */

      case OP_NOT_WHITESPACE:
      set_nottype_bits(start_bits, cbit_space, table_limit, cd);
-      start_bits[1] |= 0x08;
      try_next = FALSE;
      break;

-      /* The cbit_space table has vertical tab as whitespace; we have to not
-      set it from the table. Luckily, the code value is the same (0x0b) in
-      ASCII and EBCDIC, so we can just adjust the appropriate bit. */
-
      case OP_WHITESPACE:
-      c = start_bits[1];    /* Save in case it was already set */
      set_type_bits(start_bits, cbit_space, table_limit, cd);
-      start_bits[1] = (start_bits[1] & ~0x08) | c;
      try_next = FALSE;
      break;

@ -1309,7 +1368,7 @@ do
            for (c = 0; c < 16; c++) start_bits[c] |= map[c];
            for (c = 128; c < 256; c++)
              {
-              if ((map[c/8] && (1 << (c&7))) != 0)
+              if ((map[c/8] & (1 << (c&7))) != 0)
                {
                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
@ -1397,6 +1456,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
 #endif
 {
 int min;
+int count = 0;
 BOOL bits_set = FALSE;
 pcre_uint8 start_bits[32];
 PUBL(extra) *extra = NULL;
@ -1483,7 +1543,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&

 /* Find the minimum length of subject string. */

-switch(min = find_minlength(re, code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, NULL, &count))
  {
  case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
  case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
--- a/Foundation/src/pcre_tables.c
+++ b/Foundation/src/pcre_tables.c
@ -209,6 +209,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
 #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
 #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
+#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
 #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
 #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
 #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
@ -219,6 +220,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_C0 STR_C "\0"
 #define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
 #define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
+#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
 #define STRING_Cc0 STR_C STR_c "\0"
 #define STRING_Cf0 STR_C STR_f "\0"
 #define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
@ -234,11 +236,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
 #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
 #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
 #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
 #define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
 #define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
 #define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
 #define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
+#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
 #define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
 #define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
 #define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
@ -258,12 +263,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
 #define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
 #define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
+#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
+#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
 #define STRING_L0 STR_L "\0"
 #define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
 #define STRING_Lao0 STR_L STR_a STR_o "\0"
 #define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
 #define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
 #define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
+#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
 #define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
 #define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
 #define STRING_Ll0 STR_L STR_l "\0"
@ -274,18 +282,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
 #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
 #define STRING_M0 STR_M "\0"
+#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
 #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
 #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
+#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
 #define STRING_Mc0 STR_M STR_c "\0"
 #define STRING_Me0 STR_M STR_e "\0"
 #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
+#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
 #define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
 #define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
 #define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
 #define STRING_Mn0 STR_M STR_n "\0"
+#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
 #define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
+#define STRING_Mro0 STR_M STR_r STR_o "\0"
 #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
 #define STRING_N0 STR_N "\0"
+#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
 #define STRING_Nd0 STR_N STR_d "\0"
 #define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
 #define STRING_Nko0 STR_N STR_k STR_o "\0"
@ -294,12 +308,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
 #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
 #define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
+#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
+#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
 #define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
 #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
 #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
 #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
 #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
 #define STRING_P0 STR_P "\0"
+#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
+#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
+#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
 #define STRING_Pc0 STR_P STR_c "\0"
 #define STRING_Pd0 STR_P STR_d "\0"
 #define STRING_Pe0 STR_P STR_e "\0"
@ -309,6 +328,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Pi0 STR_P STR_i "\0"
 #define STRING_Po0 STR_P STR_o "\0"
 #define STRING_Ps0 STR_P STR_s "\0"
+#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
 #define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
 #define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
 #define STRING_S0 STR_S "\0"
@ -317,6 +337,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Sc0 STR_S STR_c "\0"
 #define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
 #define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
+#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
 #define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
 #define STRING_Sk0 STR_S STR_k "\0"
 #define STRING_Sm0 STR_S STR_m "\0"
@ -337,8 +358,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
 #define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
 #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
+#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
 #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
 #define STRING_Vai0 STR_V STR_a STR_i "\0"
+#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
 #define STRING_Xan0 STR_X STR_a STR_n "\0"
 #define STRING_Xps0 STR_X STR_p STR_s "\0"
 #define STRING_Xsp0 STR_X STR_s STR_p "\0"
@ -357,6 +380,7 @@ const char PRIV(utt_names)[] =
  STRING_Avestan0
  STRING_Balinese0
  STRING_Bamum0
+  STRING_Bassa_Vah0
  STRING_Batak0
  STRING_Bengali0
  STRING_Bopomofo0
@ -367,6 +391,7 @@ const char PRIV(utt_names)[] =
  STRING_C0
  STRING_Canadian_Aboriginal0
  STRING_Carian0
+  STRING_Caucasian_Albanian0
  STRING_Cc0
  STRING_Cf0
  STRING_Chakma0
@ -382,11 +407,14 @@ const char PRIV(utt_names)[] =
  STRING_Cyrillic0
  STRING_Deseret0
  STRING_Devanagari0
+  STRING_Duployan0
  STRING_Egyptian_Hieroglyphs0
+  STRING_Elbasan0
  STRING_Ethiopic0
  STRING_Georgian0
  STRING_Glagolitic0
  STRING_Gothic0
+  STRING_Grantha0
  STRING_Greek0
  STRING_Gujarati0
  STRING_Gurmukhi0
@ -406,12 +434,15 @@ const char PRIV(utt_names)[] =
  STRING_Kayah_Li0
  STRING_Kharoshthi0
  STRING_Khmer0
+  STRING_Khojki0
+  STRING_Khudawadi0
  STRING_L0
  STRING_L_AMPERSAND0
  STRING_Lao0
  STRING_Latin0
  STRING_Lepcha0
  STRING_Limbu0
+  STRING_Linear_A0
  STRING_Linear_B0
  STRING_Lisu0
  STRING_Ll0
@ -422,18 +453,24 @@ const char PRIV(utt_names)[] =
  STRING_Lycian0
  STRING_Lydian0
  STRING_M0
+  STRING_Mahajani0
  STRING_Malayalam0
  STRING_Mandaic0
+  STRING_Manichaean0
  STRING_Mc0
  STRING_Me0
  STRING_Meetei_Mayek0
+  STRING_Mende_Kikakui0
  STRING_Meroitic_Cursive0
  STRING_Meroitic_Hieroglyphs0
  STRING_Miao0
  STRING_Mn0
+  STRING_Modi0
  STRING_Mongolian0
+  STRING_Mro0
  STRING_Myanmar0
  STRING_N0
+  STRING_Nabataean0
  STRING_Nd0
  STRING_New_Tai_Lue0
  STRING_Nko0
@ -442,12 +479,17 @@ const char PRIV(utt_names)[] =
  STRING_Ogham0
  STRING_Ol_Chiki0
  STRING_Old_Italic0
+  STRING_Old_North_Arabian0
+  STRING_Old_Permic0
  STRING_Old_Persian0
  STRING_Old_South_Arabian0
  STRING_Old_Turkic0
  STRING_Oriya0
  STRING_Osmanya0
  STRING_P0
+  STRING_Pahawh_Hmong0
+  STRING_Palmyrene0
+  STRING_Pau_Cin_Hau0
  STRING_Pc0
  STRING_Pd0
  STRING_Pe0
@ -457,6 +499,7 @@ const char PRIV(utt_names)[] =
  STRING_Pi0
  STRING_Po0
  STRING_Ps0
+  STRING_Psalter_Pahlavi0
  STRING_Rejang0
  STRING_Runic0
  STRING_S0
@ -465,6 +508,7 @@ const char PRIV(utt_names)[] =
  STRING_Sc0
  STRING_Sharada0
  STRING_Shavian0
+  STRING_Siddham0
  STRING_Sinhala0
  STRING_Sk0
  STRING_Sm0
@ -485,8 +529,10 @@ const char PRIV(utt_names)[] =
  STRING_Thai0
  STRING_Tibetan0
  STRING_Tifinagh0
+  STRING_Tirhuta0
  STRING_Ugaritic0
  STRING_Vai0
+  STRING_Warang_Citi0
  STRING_Xan0
  STRING_Xps0
  STRING_Xsp0
@ -505,146 +551,169 @@ const ucp_type_table PRIV(utt)[] = {
  {  20, PT_SC, ucp_Avestan },
  {  28, PT_SC, ucp_Balinese },
  {  37, PT_SC, ucp_Bamum },
-  {  43, PT_SC, ucp_Batak },
-  {  49, PT_SC, ucp_Bengali },
-  {  57, PT_SC, ucp_Bopomofo },
-  {  66, PT_SC, ucp_Brahmi },
-  {  73, PT_SC, ucp_Braille },
-  {  81, PT_SC, ucp_Buginese },
-  {  90, PT_SC, ucp_Buhid },
-  {  96, PT_GC, ucp_C },
-  {  98, PT_SC, ucp_Canadian_Aboriginal },
-  { 118, PT_SC, ucp_Carian },
-  { 125, PT_PC, ucp_Cc },
-  { 128, PT_PC, ucp_Cf },
-  { 131, PT_SC, ucp_Chakma },
-  { 138, PT_SC, ucp_Cham },
-  { 143, PT_SC, ucp_Cherokee },
-  { 152, PT_PC, ucp_Cn },
-  { 155, PT_PC, ucp_Co },
-  { 158, PT_SC, ucp_Common },
-  { 165, PT_SC, ucp_Coptic },
-  { 172, PT_PC, ucp_Cs },
-  { 175, PT_SC, ucp_Cuneiform },
-  { 185, PT_SC, ucp_Cypriot },
-  { 193, PT_SC, ucp_Cyrillic },
-  { 202, PT_SC, ucp_Deseret },
-  { 210, PT_SC, ucp_Devanagari },
-  { 221, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 242, PT_SC, ucp_Ethiopic },
-  { 251, PT_SC, ucp_Georgian },
-  { 260, PT_SC, ucp_Glagolitic },
-  { 271, PT_SC, ucp_Gothic },
-  { 278, PT_SC, ucp_Greek },
-  { 284, PT_SC, ucp_Gujarati },
-  { 293, PT_SC, ucp_Gurmukhi },
-  { 302, PT_SC, ucp_Han },
-  { 306, PT_SC, ucp_Hangul },
-  { 313, PT_SC, ucp_Hanunoo },
-  { 321, PT_SC, ucp_Hebrew },
-  { 328, PT_SC, ucp_Hiragana },
-  { 337, PT_SC, ucp_Imperial_Aramaic },
-  { 354, PT_SC, ucp_Inherited },
-  { 364, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 386, PT_SC, ucp_Inscriptional_Parthian },
-  { 409, PT_SC, ucp_Javanese },
-  { 418, PT_SC, ucp_Kaithi },
-  { 425, PT_SC, ucp_Kannada },
-  { 433, PT_SC, ucp_Katakana },
-  { 442, PT_SC, ucp_Kayah_Li },
-  { 451, PT_SC, ucp_Kharoshthi },
-  { 462, PT_SC, ucp_Khmer },
-  { 468, PT_GC, ucp_L },
-  { 470, PT_LAMP, 0 },
-  { 473, PT_SC, ucp_Lao },
-  { 477, PT_SC, ucp_Latin },
-  { 483, PT_SC, ucp_Lepcha },
-  { 490, PT_SC, ucp_Limbu },
-  { 496, PT_SC, ucp_Linear_B },
-  { 505, PT_SC, ucp_Lisu },
-  { 510, PT_PC, ucp_Ll },
-  { 513, PT_PC, ucp_Lm },
-  { 516, PT_PC, ucp_Lo },
-  { 519, PT_PC, ucp_Lt },
-  { 522, PT_PC, ucp_Lu },
-  { 525, PT_SC, ucp_Lycian },
-  { 532, PT_SC, ucp_Lydian },
-  { 539, PT_GC, ucp_M },
-  { 541, PT_SC, ucp_Malayalam },
-  { 551, PT_SC, ucp_Mandaic },
-  { 559, PT_PC, ucp_Mc },
-  { 562, PT_PC, ucp_Me },
-  { 565, PT_SC, ucp_Meetei_Mayek },
-  { 578, PT_SC, ucp_Meroitic_Cursive },
-  { 595, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 616, PT_SC, ucp_Miao },
-  { 621, PT_PC, ucp_Mn },
-  { 624, PT_SC, ucp_Mongolian },
-  { 634, PT_SC, ucp_Myanmar },
-  { 642, PT_GC, ucp_N },
-  { 644, PT_PC, ucp_Nd },
-  { 647, PT_SC, ucp_New_Tai_Lue },
-  { 659, PT_SC, ucp_Nko },
-  { 663, PT_PC, ucp_Nl },
-  { 666, PT_PC, ucp_No },
-  { 669, PT_SC, ucp_Ogham },
-  { 675, PT_SC, ucp_Ol_Chiki },
-  { 684, PT_SC, ucp_Old_Italic },
-  { 695, PT_SC, ucp_Old_Persian },
-  { 707, PT_SC, ucp_Old_South_Arabian },
-  { 725, PT_SC, ucp_Old_Turkic },
-  { 736, PT_SC, ucp_Oriya },
-  { 742, PT_SC, ucp_Osmanya },
-  { 750, PT_GC, ucp_P },
-  { 752, PT_PC, ucp_Pc },
-  { 755, PT_PC, ucp_Pd },
-  { 758, PT_PC, ucp_Pe },
-  { 761, PT_PC, ucp_Pf },
-  { 764, PT_SC, ucp_Phags_Pa },
-  { 773, PT_SC, ucp_Phoenician },
-  { 784, PT_PC, ucp_Pi },
-  { 787, PT_PC, ucp_Po },
-  { 790, PT_PC, ucp_Ps },
-  { 793, PT_SC, ucp_Rejang },
-  { 800, PT_SC, ucp_Runic },
-  { 806, PT_GC, ucp_S },
-  { 808, PT_SC, ucp_Samaritan },
-  { 818, PT_SC, ucp_Saurashtra },
-  { 829, PT_PC, ucp_Sc },
-  { 832, PT_SC, ucp_Sharada },
-  { 840, PT_SC, ucp_Shavian },
-  { 848, PT_SC, ucp_Sinhala },
-  { 856, PT_PC, ucp_Sk },
-  { 859, PT_PC, ucp_Sm },
-  { 862, PT_PC, ucp_So },
-  { 865, PT_SC, ucp_Sora_Sompeng },
-  { 878, PT_SC, ucp_Sundanese },
-  { 888, PT_SC, ucp_Syloti_Nagri },
-  { 901, PT_SC, ucp_Syriac },
-  { 908, PT_SC, ucp_Tagalog },
-  { 916, PT_SC, ucp_Tagbanwa },
-  { 925, PT_SC, ucp_Tai_Le },
-  { 932, PT_SC, ucp_Tai_Tham },
-  { 941, PT_SC, ucp_Tai_Viet },
-  { 950, PT_SC, ucp_Takri },
-  { 956, PT_SC, ucp_Tamil },
-  { 962, PT_SC, ucp_Telugu },
-  { 969, PT_SC, ucp_Thaana },
-  { 976, PT_SC, ucp_Thai },
-  { 981, PT_SC, ucp_Tibetan },
-  { 989, PT_SC, ucp_Tifinagh },
-  { 998, PT_SC, ucp_Ugaritic },
-  { 1007, PT_SC, ucp_Vai },
-  { 1011, PT_ALNUM, 0 },
-  { 1015, PT_PXSPACE, 0 },
-  { 1019, PT_SPACE, 0 },
-  { 1023, PT_UCNC, 0 },
-  { 1027, PT_WORD, 0 },
-  { 1031, PT_SC, ucp_Yi },
-  { 1034, PT_GC, ucp_Z },
-  { 1036, PT_PC, ucp_Zl },
-  { 1039, PT_PC, ucp_Zp },
-  { 1042, PT_PC, ucp_Zs }
+  {  43, PT_SC, ucp_Bassa_Vah },
+  {  53, PT_SC, ucp_Batak },
+  {  59, PT_SC, ucp_Bengali },
+  {  67, PT_SC, ucp_Bopomofo },
+  {  76, PT_SC, ucp_Brahmi },
+  {  83, PT_SC, ucp_Braille },
+  {  91, PT_SC, ucp_Buginese },
+  { 100, PT_SC, ucp_Buhid },
+  { 106, PT_GC, ucp_C },
+  { 108, PT_SC, ucp_Canadian_Aboriginal },
+  { 128, PT_SC, ucp_Carian },
+  { 135, PT_SC, ucp_Caucasian_Albanian },
+  { 154, PT_PC, ucp_Cc },
+  { 157, PT_PC, ucp_Cf },
+  { 160, PT_SC, ucp_Chakma },
+  { 167, PT_SC, ucp_Cham },
+  { 172, PT_SC, ucp_Cherokee },
+  { 181, PT_PC, ucp_Cn },
+  { 184, PT_PC, ucp_Co },
+  { 187, PT_SC, ucp_Common },
+  { 194, PT_SC, ucp_Coptic },
+  { 201, PT_PC, ucp_Cs },
+  { 204, PT_SC, ucp_Cuneiform },
+  { 214, PT_SC, ucp_Cypriot },
+  { 222, PT_SC, ucp_Cyrillic },
+  { 231, PT_SC, ucp_Deseret },
+  { 239, PT_SC, ucp_Devanagari },
+  { 250, PT_SC, ucp_Duployan },
+  { 259, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 280, PT_SC, ucp_Elbasan },
+  { 288, PT_SC, ucp_Ethiopic },
+  { 297, PT_SC, ucp_Georgian },
+  { 306, PT_SC, ucp_Glagolitic },
+  { 317, PT_SC, ucp_Gothic },
+  { 324, PT_SC, ucp_Grantha },
+  { 332, PT_SC, ucp_Greek },
+  { 338, PT_SC, ucp_Gujarati },
+  { 347, PT_SC, ucp_Gurmukhi },
+  { 356, PT_SC, ucp_Han },
+  { 360, PT_SC, ucp_Hangul },
+  { 367, PT_SC, ucp_Hanunoo },
+  { 375, PT_SC, ucp_Hebrew },
+  { 382, PT_SC, ucp_Hiragana },
+  { 391, PT_SC, ucp_Imperial_Aramaic },
+  { 408, PT_SC, ucp_Inherited },
+  { 418, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 440, PT_SC, ucp_Inscriptional_Parthian },
+  { 463, PT_SC, ucp_Javanese },
+  { 472, PT_SC, ucp_Kaithi },
+  { 479, PT_SC, ucp_Kannada },
+  { 487, PT_SC, ucp_Katakana },
+  { 496, PT_SC, ucp_Kayah_Li },
+  { 505, PT_SC, ucp_Kharoshthi },
+  { 516, PT_SC, ucp_Khmer },
+  { 522, PT_SC, ucp_Khojki },
+  { 529, PT_SC, ucp_Khudawadi },
+  { 539, PT_GC, ucp_L },
+  { 541, PT_LAMP, 0 },
+  { 544, PT_SC, ucp_Lao },
+  { 548, PT_SC, ucp_Latin },
+  { 554, PT_SC, ucp_Lepcha },
+  { 561, PT_SC, ucp_Limbu },
+  { 567, PT_SC, ucp_Linear_A },
+  { 576, PT_SC, ucp_Linear_B },
+  { 585, PT_SC, ucp_Lisu },
+  { 590, PT_PC, ucp_Ll },
+  { 593, PT_PC, ucp_Lm },
+  { 596, PT_PC, ucp_Lo },
+  { 599, PT_PC, ucp_Lt },
+  { 602, PT_PC, ucp_Lu },
+  { 605, PT_SC, ucp_Lycian },
+  { 612, PT_SC, ucp_Lydian },
+  { 619, PT_GC, ucp_M },
+  { 621, PT_SC, ucp_Mahajani },
+  { 630, PT_SC, ucp_Malayalam },
+  { 640, PT_SC, ucp_Mandaic },
+  { 648, PT_SC, ucp_Manichaean },
+  { 659, PT_PC, ucp_Mc },
+  { 662, PT_PC, ucp_Me },
+  { 665, PT_SC, ucp_Meetei_Mayek },
+  { 678, PT_SC, ucp_Mende_Kikakui },
+  { 692, PT_SC, ucp_Meroitic_Cursive },
+  { 709, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 730, PT_SC, ucp_Miao },
+  { 735, PT_PC, ucp_Mn },
+  { 738, PT_SC, ucp_Modi },
+  { 743, PT_SC, ucp_Mongolian },
+  { 753, PT_SC, ucp_Mro },
+  { 757, PT_SC, ucp_Myanmar },
+  { 765, PT_GC, ucp_N },
+  { 767, PT_SC, ucp_Nabataean },
+  { 777, PT_PC, ucp_Nd },
+  { 780, PT_SC, ucp_New_Tai_Lue },
+  { 792, PT_SC, ucp_Nko },
+  { 796, PT_PC, ucp_Nl },
+  { 799, PT_PC, ucp_No },
+  { 802, PT_SC, ucp_Ogham },
+  { 808, PT_SC, ucp_Ol_Chiki },
+  { 817, PT_SC, ucp_Old_Italic },
+  { 828, PT_SC, ucp_Old_North_Arabian },
+  { 846, PT_SC, ucp_Old_Permic },
+  { 857, PT_SC, ucp_Old_Persian },
+  { 869, PT_SC, ucp_Old_South_Arabian },
+  { 887, PT_SC, ucp_Old_Turkic },
+  { 898, PT_SC, ucp_Oriya },
+  { 904, PT_SC, ucp_Osmanya },
+  { 912, PT_GC, ucp_P },
+  { 914, PT_SC, ucp_Pahawh_Hmong },
+  { 927, PT_SC, ucp_Palmyrene },
+  { 937, PT_SC, ucp_Pau_Cin_Hau },
+  { 949, PT_PC, ucp_Pc },
+  { 952, PT_PC, ucp_Pd },
+  { 955, PT_PC, ucp_Pe },
+  { 958, PT_PC, ucp_Pf },
+  { 961, PT_SC, ucp_Phags_Pa },
+  { 970, PT_SC, ucp_Phoenician },
+  { 981, PT_PC, ucp_Pi },
+  { 984, PT_PC, ucp_Po },
+  { 987, PT_PC, ucp_Ps },
+  { 990, PT_SC, ucp_Psalter_Pahlavi },
+  { 1006, PT_SC, ucp_Rejang },
+  { 1013, PT_SC, ucp_Runic },
+  { 1019, PT_GC, ucp_S },
+  { 1021, PT_SC, ucp_Samaritan },
+  { 1031, PT_SC, ucp_Saurashtra },
+  { 1042, PT_PC, ucp_Sc },
+  { 1045, PT_SC, ucp_Sharada },
+  { 1053, PT_SC, ucp_Shavian },
+  { 1061, PT_SC, ucp_Siddham },
+  { 1069, PT_SC, ucp_Sinhala },
+  { 1077, PT_PC, ucp_Sk },
+  { 1080, PT_PC, ucp_Sm },
+  { 1083, PT_PC, ucp_So },
+  { 1086, PT_SC, ucp_Sora_Sompeng },
+  { 1099, PT_SC, ucp_Sundanese },
+  { 1109, PT_SC, ucp_Syloti_Nagri },
+  { 1122, PT_SC, ucp_Syriac },
+  { 1129, PT_SC, ucp_Tagalog },
+  { 1137, PT_SC, ucp_Tagbanwa },
+  { 1146, PT_SC, ucp_Tai_Le },
+  { 1153, PT_SC, ucp_Tai_Tham },
+  { 1162, PT_SC, ucp_Tai_Viet },
+  { 1171, PT_SC, ucp_Takri },
+  { 1177, PT_SC, ucp_Tamil },
+  { 1183, PT_SC, ucp_Telugu },
+  { 1190, PT_SC, ucp_Thaana },
+  { 1197, PT_SC, ucp_Thai },
+  { 1202, PT_SC, ucp_Tibetan },
+  { 1210, PT_SC, ucp_Tifinagh },
+  { 1219, PT_SC, ucp_Tirhuta },
+  { 1227, PT_SC, ucp_Ugaritic },
+  { 1236, PT_SC, ucp_Vai },
+  { 1240, PT_SC, ucp_Warang_Citi },
+  { 1252, PT_ALNUM, 0 },
+  { 1256, PT_PXSPACE, 0 },
+  { 1260, PT_SPACE, 0 },
+  { 1264, PT_UCNC, 0 },
+  { 1268, PT_WORD, 0 },
+  { 1272, PT_SC, ucp_Yi },
+  { 1275, PT_GC, ucp_Z },
+  { 1277, PT_PC, ucp_Zl },
+  { 1280, PT_PC, ucp_Zp },
+  { 1283, PT_PC, ucp_Zs }
 };

 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
--- a/Foundation/src/pcre_ucd.c
+++ b/Foundation/src/pcre_ucd.c
--- a/Foundation/src/pcre_xclass.c
+++ b/Foundation/src/pcre_xclass.c
@ -242,7 +242,7 @@ while ((t = *data++) != XCL_END)

      case PT_PXPUNCT:
      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
-            (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
+            (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
        return !negated;
      break;

--- a/Foundation/src/ucp.h
+++ b/Foundation/src/ucp.h
@ -192,7 +192,31 @@ enum {
  ucp_Miao,
  ucp_Sharada,
  ucp_Sora_Sompeng,
-  ucp_Takri
+  ucp_Takri,
+  /* New for Unicode 7.0.0: */
+  ucp_Bassa_Vah,
+  ucp_Caucasian_Albanian,
+  ucp_Duployan,
+  ucp_Elbasan,
+  ucp_Grantha,
+  ucp_Khojki,
+  ucp_Khudawadi,
+  ucp_Linear_A,
+  ucp_Mahajani,
+  ucp_Manichaean,
+  ucp_Mende_Kikakui,
+  ucp_Modi,
+  ucp_Mro,
+  ucp_Nabataean,
+  ucp_Old_North_Arabian,
+  ucp_Old_Permic,
+  ucp_Pahawh_Hmong,
+  ucp_Palmyrene,
+  ucp_Psalter_Pahlavi,
+  ucp_Pau_Cin_Hau,
+  ucp_Siddham,
+  ucp_Tirhuta,
+  ucp_Warang_Citi
 };

 #endif