pcre2: Version 10.44 (#4478) (#4678)

* update(pcre2): Version 10.44 (#4478) * update(pcre2): Add new file to VS project files (version 10.44) (#4478) * fix(RegEx): Use generic pcre2 function names (without suffix _8). * update(pcre2): Fix configuration (define PCRE2_STATIC) (#4478)
2026-01-11 00:24:15 +01:00 · 2024-10-03 10:49:27 +02:00
parent 04fe04e3a4
commit f3975eba96
36 changed files with 6060 additions and 4480 deletions
--- a/Foundation/CMakeLists.txt
+++ b/Foundation/CMakeLists.txt
@@ -42,6 +42,7 @@ else()
 	POCO_SOURCES(SRCS pcre2
 		src/pcre2_auto_possess.c
 		src/pcre2_chartables.c
+		src/pcre2_chkdint.c
 		src/pcre2_compile.c
 		src/pcre2_config.c
 		src/pcre2_context.c
--- a/Foundation/Foundation_vs160.vcxproj
+++ b/Foundation/Foundation_vs160.vcxproj
@@ -1047,6 +1047,7 @@
    <ClCompile Include="src\PatternFormatter.cpp" />
    <ClCompile Include="src\pcre2_auto_possess.c" />
    <ClCompile Include="src\pcre2_chartables.c" />
+    <ClCompile Include="src\pcre2_chkdint.c" />
    <ClCompile Include="src\pcre2_compile.c" />
    <ClCompile Include="src\pcre2_config.c" />
    <ClCompile Include="src\pcre2_context.c" />
--- a/Foundation/Foundation_vs160.vcxproj.filters
+++ b/Foundation/Foundation_vs160.vcxproj.filters
@@ -846,6 +846,9 @@
    <ClCompile Include="src\pcre2_chartables.c">
      <Filter>RegularExpression\PCRE2 Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="src\pcre2_chkdint.c">
+      <Filter>RegularExpression\PCRE2 Source Files</Filter>
+    </ClCompile>
    <ClCompile Include="src\pcre2_compile.c">
      <Filter>RegularExpression\PCRE2 Source Files</Filter>
    </ClCompile>
--- a/Foundation/Foundation_vs170.vcxproj
+++ b/Foundation/Foundation_vs170.vcxproj
@@ -1509,6 +1509,7 @@
    <ClCompile Include="src\PatternFormatter.cpp" />
    <ClCompile Include="src\pcre2_auto_possess.c" />
    <ClCompile Include="src\pcre2_chartables.c" />
+    <ClCompile Include="src\pcre2_chkdint.c" />
    <ClCompile Include="src\pcre2_compile.c" />
    <ClCompile Include="src\pcre2_config.c" />
    <ClCompile Include="src\pcre2_context.c" />
--- a/Foundation/Foundation_vs170.vcxproj.filters
+++ b/Foundation/Foundation_vs170.vcxproj.filters
@@ -846,6 +846,9 @@
    <ClCompile Include="src\pcre2_chartables.c">
      <Filter>RegularExpression\PCRE2 Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="src\pcre2_chkdint.c">
+      <Filter>RegularExpression\PCRE2 Source Files</Filter>
+    </ClCompile>
    <ClCompile Include="src\pcre2_compile.c">
      <Filter>RegularExpression\PCRE2 Source Files</Filter>
    </ClCompile>
--- a/Foundation/Makefile
+++ b/Foundation/Makefile
@@ -35,7 +35,7 @@ objects = ArchiveStrategy Ascii ASCIIEncoding AsyncChannel AsyncNotificationCent
 zlib_objects = adler32 compress crc32 deflate \
 	infback inffast inflate inftrees trees zutil

-pcre_objects = pcre2_auto_possess pcre2_chartables pcre2_compile pcre2_config \
+pcre_objects = pcre2_auto_possess pcre2_chartables pcre2_chkdint pcre2_compile pcre2_config \
 	pcre2_context pcre2_convert pcre2_dfa_match pcre2_error pcre2_extuni \
 	pcre2_find_bracket pcre2_jit_compile pcre2_maketables pcre2_match \
 	pcre2_match_data pcre2_newline pcre2_ord2utf pcre2_pattern_info \
--- a/Foundation/src/RegularExpression.cpp
+++ b/Foundation/src/RegularExpression.cpp
@@ -29,34 +29,34 @@ namespace
 	class MatchData
 	{
 	public:
-		MatchData(pcre2_code_8* code):
-			_match(pcre2_match_data_create_from_pattern_8(reinterpret_cast<pcre2_code_8*>(code), nullptr))
+		MatchData(pcre2_code* code):
+			_match(pcre2_match_data_create_from_pattern(reinterpret_cast<pcre2_code*>(code), nullptr))
 		{
 			if (!_match) throw Poco::RegularExpressionException("cannot create match data");
 		}

 		~MatchData()
 		{
-			if (_match) pcre2_match_data_free_8(_match);
+			if (_match) pcre2_match_data_free(_match);
 		}

 		std::uint32_t count() const
 		{
-			return pcre2_get_ovector_count_8(_match);
+			return pcre2_get_ovector_count(_match);
 		}

 		const PCRE2_SIZE* data() const
 		{
-			return pcre2_get_ovector_pointer_8(_match);
+			return pcre2_get_ovector_pointer(_match);
 		}

-		operator pcre2_match_data_8*()
+		operator pcre2_match_data*()
 		{
 			return _match;
 		}

 	private:
-		pcre2_match_data_8* _match;
+		pcre2_match_data* _match;
 	};
 }

@@ -72,40 +72,40 @@ RegularExpression::RegularExpression(const std::string& pattern, int options, bo
 	unsigned nameEntrySize;
 	unsigned char* nameTable;

-	pcre2_compile_context_8* context = pcre2_compile_context_create_8(nullptr);
+	pcre2_compile_context* context = pcre2_compile_context_create(nullptr);
 	if (!context) throw Poco::RegularExpressionException("cannot create compile context");

 	if (options & RE_NEWLINE_LF)
-		pcre2_set_newline_8(context, PCRE2_NEWLINE_LF);
+		pcre2_set_newline(context, PCRE2_NEWLINE_LF);
 	else if (options & RE_NEWLINE_CRLF)
-		pcre2_set_newline_8(context, PCRE2_NEWLINE_CRLF);
+		pcre2_set_newline(context, PCRE2_NEWLINE_CRLF);
 	else if (options & RE_NEWLINE_ANY)
-		pcre2_set_newline_8(context, PCRE2_NEWLINE_ANY);
+		pcre2_set_newline(context, PCRE2_NEWLINE_ANY);
 	else if (options & RE_NEWLINE_ANYCRLF)
-		pcre2_set_newline_8(context, PCRE2_NEWLINE_ANYCRLF);
+		pcre2_set_newline(context, PCRE2_NEWLINE_ANYCRLF);
 	else // default RE_NEWLINE_CR
-		pcre2_set_newline_8(context, PCRE2_NEWLINE_CR);
+		pcre2_set_newline(context, PCRE2_NEWLINE_CR);

-	_pcre = pcre2_compile_8(reinterpret_cast<const PCRE2_SPTR>(pattern.c_str()), pattern.length(), compileOptions(options), &errorCode, &errorOffset, context);
-	pcre2_compile_context_free_8(context);
+	_pcre = pcre2_compile(reinterpret_cast<const PCRE2_SPTR>(pattern.c_str()), pattern.length(), compileOptions(options), &errorCode, &errorOffset, context);
+	pcre2_compile_context_free(context);

 	if (!_pcre)
 	{
 		PCRE2_UCHAR buffer[256];
-		pcre2_get_error_message_8(errorCode, buffer, sizeof(buffer));
+		pcre2_get_error_message(errorCode, buffer, sizeof(buffer));
 		std::ostringstream msg;
 		msg << reinterpret_cast<char*>(buffer) << " (at offset " << errorOffset << ")";
 		throw RegularExpressionException(msg.str());
 	}

-	pcre2_pattern_info_8(reinterpret_cast<pcre2_code_8*>(_pcre), PCRE2_INFO_NAMECOUNT, &nameCount);
-	pcre2_pattern_info_8(reinterpret_cast<pcre2_code_8*>(_pcre), PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
-	pcre2_pattern_info_8(reinterpret_cast<pcre2_code_8*>(_pcre), PCRE2_INFO_NAMETABLE, &nameTable);
+	pcre2_pattern_info(reinterpret_cast<pcre2_code*>(_pcre), PCRE2_INFO_NAMECOUNT, &nameCount);
+	pcre2_pattern_info(reinterpret_cast<pcre2_code*>(_pcre), PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
+	pcre2_pattern_info(reinterpret_cast<pcre2_code*>(_pcre), PCRE2_INFO_NAMETABLE, &nameTable);

 	for (int i = 0; i < nameCount; i++)
 	{
 		unsigned char* group = nameTable + 2 + (nameEntrySize * i);
-		int n = pcre2_substring_number_from_name_8(reinterpret_cast<pcre2_code_8*>(_pcre), group);
+		int n = pcre2_substring_number_from_name(reinterpret_cast<pcre2_code*>(_pcre), group);
 		_groups[n] = std::string(reinterpret_cast<char*>(group));
 	}
 }
@@ -113,7 +113,7 @@ RegularExpression::RegularExpression(const std::string& pattern, int options, bo

 RegularExpression::~RegularExpression()
 {
-	if (_pcre) pcre2_code_free_8(reinterpret_cast<pcre2_code_8*>(_pcre));
+	if (_pcre) pcre2_code_free(reinterpret_cast<pcre2_code*>(_pcre));
 }


@@ -121,8 +121,8 @@ int RegularExpression::match(const std::string& subject, std::string::size_type
 {
 	poco_assert (offset <= subject.length());

-	MatchData matchData(reinterpret_cast<pcre2_code_8*>(_pcre));
-	int rc = pcre2_match_8(reinterpret_cast<pcre2_code_8*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, matchOptions(options), matchData, nullptr);
+	MatchData matchData(reinterpret_cast<pcre2_code*>(_pcre));
+	int rc = pcre2_match(reinterpret_cast<pcre2_code*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, matchOptions(options), matchData, nullptr);
 	if (rc == PCRE2_ERROR_NOMATCH)
 	{
 		mtch.offset = std::string::npos;
@@ -140,7 +140,7 @@ int RegularExpression::match(const std::string& subject, std::string::size_type
 	else if (rc < 0)
 	{
 		PCRE2_UCHAR buffer[256];
-		pcre2_get_error_message_8(rc, buffer, sizeof(buffer));
+		pcre2_get_error_message(rc, buffer, sizeof(buffer));
 		throw RegularExpressionException(std::string(reinterpret_cast<char*>(buffer)));
 	}
 	const PCRE2_SIZE* ovec = matchData.data();
@@ -156,8 +156,8 @@ int RegularExpression::match(const std::string& subject, std::string::size_type

 	matches.clear();

-	MatchData matchData(reinterpret_cast<pcre2_code_8*>(_pcre));
-	int rc = pcre2_match_8(reinterpret_cast<pcre2_code_8*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, options & 0xFFFF, matchData, nullptr);
+	MatchData matchData(reinterpret_cast<pcre2_code*>(_pcre));
+	int rc = pcre2_match(reinterpret_cast<pcre2_code*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, options & 0xFFFF, matchData, nullptr);
 	if (rc == PCRE2_ERROR_NOMATCH)
 	{
 		return 0;
@@ -173,7 +173,7 @@ int RegularExpression::match(const std::string& subject, std::string::size_type
 	else if (rc < 0)
 	{
 		PCRE2_UCHAR buffer[256];
-		pcre2_get_error_message_8(rc, buffer, sizeof(buffer));
+		pcre2_get_error_message(rc, buffer, sizeof(buffer));
 		throw RegularExpressionException(std::string(reinterpret_cast<char*>(buffer)));
 	}
 	matches.reserve(rc);
@@ -279,8 +279,8 @@ std::string::size_type RegularExpression::substOne(std::string& subject, std::st
 {
 	if (offset >= subject.length()) return std::string::npos;

-	MatchData matchData(reinterpret_cast<pcre2_code_8*>(_pcre));
-	int rc = pcre2_match_8(reinterpret_cast<pcre2_code_8*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, matchOptions(options), matchData, nullptr);
+	MatchData matchData(reinterpret_cast<pcre2_code*>(_pcre));
+	int rc = pcre2_match(reinterpret_cast<pcre2_code*>(_pcre), reinterpret_cast<const PCRE2_SPTR>(subject.c_str()), subject.size(), offset, matchOptions(options), matchData, nullptr);
 	if (rc == PCRE2_ERROR_NOMATCH)
 	{
 		return std::string::npos;
@@ -296,7 +296,7 @@ std::string::size_type RegularExpression::substOne(std::string& subject, std::st
 	else if (rc < 0)
 	{
 		PCRE2_UCHAR buffer[256];
-		pcre2_get_error_message_8(rc, buffer, sizeof(buffer));
+		pcre2_get_error_message(rc, buffer, sizeof(buffer));
 		throw RegularExpressionException(std::string(reinterpret_cast<char*>(buffer)));
 	}
 	const PCRE2_SIZE* ovec = matchData.data();
--- a/Foundation/src/Unicode.cpp
+++ b/Foundation/src/Unicode.cpp
@@ -29,7 +29,7 @@ void Unicode::properties(int ch, CharacterProperties& props)
 {
 	if (ch > UCP_MAX_CODEPOINT) ch = 0;
 	const ucd_record* ucd = GET_UCD(ch);
-	props.category = static_cast<CharacterCategory>(PRIV(ucp_gentype_8)[ucd->chartype]);
+	props.category = static_cast<CharacterCategory>(PRIV(ucp_gentype)[ucd->chartype]);
 	props.type     = static_cast<CharacterType>(ucd->chartype);
 	props.script   = static_cast<Script>(ucd->script);
 }
--- a/Foundation/src/pcre2.h
+++ b/Foundation/src/pcre2.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, second API, to be
 #included by applications that call PCRE2 functions.

-           Copyright (c) 2016-2021 University of Cambridge
+           Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           42
-#define PCRE2_PRERELEASE
-#define PCRE2_DATE            2022-12-11
+#define PCRE2_MINOR           44
+#define PCRE2_PRERELEASE      
+#define PCRE2_DATE            2024-06-07

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -153,6 +153,12 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTRA_ESCAPED_CR_IS_LF         0x00000010u  /* C */
 #define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
 #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     0x00000040u  /* C */
+#define PCRE2_EXTRA_CASELESS_RESTRICT        0x00000080u  /* C */
+#define PCRE2_EXTRA_ASCII_BSD                0x00000100u  /* C */
+#define PCRE2_EXTRA_ASCII_BSS                0x00000200u  /* C */
+#define PCRE2_EXTRA_ASCII_BSW                0x00000400u  /* C */
+#define PCRE2_EXTRA_ASCII_POSIX              0x00000800u  /* C */
+#define PCRE2_EXTRA_ASCII_DIGIT              0x00001000u  /* C */

 /* These are for pcre2_jit_compile(). */

@@ -180,11 +186,12 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
 #define PCRE2_SUBSTITUTE_UNSET_EMPTY      0x00000400u  /* pcre2_substitute() only */
 #define PCRE2_SUBSTITUTE_UNKNOWN_UNSET    0x00000800u  /* pcre2_substitute() only */
 #define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  0x00001000u  /* pcre2_substitute() only */
-#define PCRE2_NO_JIT                      0x00002000u  /* Not for pcre2_dfa_match() */
+#define PCRE2_NO_JIT                      0x00002000u  /* not for pcre2_dfa_match() */
 #define PCRE2_COPY_MATCHED_SUBJECT        0x00004000u
 #define PCRE2_SUBSTITUTE_LITERAL          0x00008000u  /* pcre2_substitute() only */
 #define PCRE2_SUBSTITUTE_MATCHED          0x00010000u  /* pcre2_substitute() only */
 #define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u  /* pcre2_substitute() only */
+#define PCRE2_DISABLE_RECURSELOOP_CHECK   0x00040000u  /* not for pcre2_dfa_match() or pcre2_jit_match() */

 /* Options for pcre2_pattern_convert(). */

@@ -399,6 +406,7 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_CONVERT_SYNTAX    (-64)
 #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
 #define PCRE2_ERROR_DFA_UINVALID_UTF  (-66)
+#define PCRE2_ERROR_INVALIDOFFSET     (-67)


 /* Request types for pcre2_pattern_info() */
@@ -575,7 +583,7 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
 PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
  pcre2_general_context_copy(pcre2_general_context *); \
 PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
-  pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
+  pcre2_general_context_create(void *(*)(size_t, void *), \
    void (*)(void *, void *), void *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
  pcre2_general_context_free(pcre2_general_context *);
@@ -595,6 +603,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_newline(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -628,7 +640,7 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_recursion_memory_management(pcre2_match_context *, \
-    void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
+    void *(*)(size_t, void *), void (*)(void *, void *), void *);

 #define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
 PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
@@ -687,6 +699,8 @@ PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
  pcre2_get_mark(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
  pcre2_get_match_data_size(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+  pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
 PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
  pcre2_get_ovector_count(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
@@ -722,7 +736,7 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
-  pcre2_substring_list_free(PCRE2_SPTR *); \
+  pcre2_substring_list_free(PCRE2_UCHAR **); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);

@@ -771,7 +785,7 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
  pcre2_jit_free_unused_memory(pcre2_general_context *); \
 PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
-  pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
+  pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
  pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
@@ -851,6 +865,7 @@ pcre2_compile are called by application code. */
 #define pcre2_general_context_free            PCRE2_SUFFIX(pcre2_general_context_free_)
 #define pcre2_get_error_message               PCRE2_SUFFIX(pcre2_get_error_message_)
 #define pcre2_get_mark                        PCRE2_SUFFIX(pcre2_get_mark_)
+#define pcre2_get_match_data_heapframes_size  PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
 #define pcre2_get_match_data_size             PCRE2_SUFFIX(pcre2_get_match_data_size_)
 #define pcre2_get_ovector_pointer             PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
 #define pcre2_get_ovector_count               PCRE2_SUFFIX(pcre2_get_ovector_count_)
@@ -886,7 +901,9 @@ pcre2_compile are called by application code. */
 #define pcre2_set_glob_separator              PCRE2_SUFFIX(pcre2_set_glob_separator_)
 #define pcre2_set_heap_limit                  PCRE2_SUFFIX(pcre2_set_heap_limit_)
 #define pcre2_set_match_limit                 PCRE2_SUFFIX(pcre2_set_match_limit_)
+#define pcre2_set_max_varlookbehind           PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
 #define pcre2_set_max_pattern_length          PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
+#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
 #define pcre2_set_newline                     PCRE2_SUFFIX(pcre2_set_newline_)
 #define pcre2_set_parens_nest_limit           PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
 #define pcre2_set_offset_limit                PCRE2_SUFFIX(pcre2_set_offset_limit_)
--- a/Foundation/src/pcre2_auto_possess.c
+++ b/Foundation/src/pcre2_auto_possess.c
@@ -556,6 +556,8 @@ matches to an empty string (also represented by a non-zero value). */

 for(;;)
  {
+  PCRE2_SPTR bracode;
+
  /* All operations move the code pointer forward.
  Therefore infinite recursions are not possible. */

@@ -613,7 +615,8 @@ for(;;)
    recursions. (This could be improved by keeping a list of group numbers that
    are called by recursion.) */

-    switch(*(code - GET(code, 1)))
+    bracode = code - GET(code, 1);
+    switch(*bracode)
      {
      case OP_CBRA:
      case OP_SCBRA:
@@ -632,16 +635,19 @@ for(;;)
      break;

      /* Atomic sub-patterns and assertions can always auto-possessify their
-      last iterator. However, if the group was entered as a result of checking
-      a previous iterator, this is not possible. */
+      last iterator except for variable length lookbehinds. However, if the
+      group was entered as a result of checking a previous iterator, this is
+      not possible. */

      case OP_ASSERT:
      case OP_ASSERT_NOT:
-      case OP_ASSERTBACK:
-      case OP_ASSERTBACK_NOT:
      case OP_ONCE:
      return !entered_a_group;

+      case OP_ASSERTBACK:
+      case OP_ASSERTBACK_NOT:
+      return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group;
+
      /* Non-atomic assertions - don't possessify last iterator. This needs
      more thought. */

--- a/Foundation/src/pcre2_chartables.c
+++ b/Foundation/src/pcre2_chartables.c
@@ -5,7 +5,8 @@
 /* This file was automatically written by the pcre2_dftables auxiliary
 program. It contains character tables that are used when no external
 tables are passed to PCRE2 by the application that calls it. The tables
-are used only for characters whose code values are less than 256. */
+are used only for characters whose code values are less than 256, and
+only relevant if not in UCP mode. */

 /* This set of tables was written in the C locale. */

@@ -160,7 +161,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
  0x02   letter
  0x04   lower case letter
  0x08   decimal digit
-  0x10   alphanumeric or '_'
+  0x10   word (alphanumeric or '_')
 */

  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
--- a/Foundation/src/pcre2_chkdint.c
+++ b/Foundation/src/pcre2_chkdint.c
@@ -0,0 +1,93 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                     Written by Philip Hazel
+            Copyright (c) 2023 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This file contains functions to implement checked integer operation */
+
+#ifndef PCRE2_PCRE2TEST
+#include "pcre2_config.h"
+#include "pcre2_internal.h"
+#endif
+
+/*************************************************
+*        Checked Integer Multiplication          *
+*************************************************/
+
+/*
+Arguments:
+  r         A pointer to PCRE2_SIZE to store the answer
+  a, b      Two integers
+
+Returns:    Bool indicating if the operation overflows
+
+It is modeled after C23's <stdckdint.h> interface
+The INT64_OR_DOUBLE type is a 64-bit integer type when available,
+otherwise double. */
+
+BOOL
+PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
+{
+#ifdef HAVE_BUILTIN_MUL_OVERFLOW
+PCRE2_SIZE m;
+
+if (__builtin_mul_overflow(a, b, &m)) return TRUE;
+
+*r = m;
+#else
+INT64_OR_DOUBLE m;
+
+#ifdef PCRE2_DEBUG
+if (a < 0 || b < 0) abort();
+#endif
+
+m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
+
+#if defined INT64_MAX || defined int64_t
+if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
+*r = (PCRE2_SIZE)m;
+#else
+if (m > PCRE2_SIZE_MAX) return TRUE;
+*r = m;
+#endif
+
+#endif
+
+return FALSE;
+}
+
+/* End of pcre_chkdint.c */
--- a/Foundation/src/pcre2_compile.c
+++ b/Foundation/src/pcre2_compile.c
--- a/Foundation/src/pcre2_config.h
+++ b/Foundation/src/pcre2_config.h
@@ -57,9 +57,12 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define this if your compiler supports __attribute__((uninitialized)) */
 /* #undef HAVE_ATTRIBUTE_UNINITIALIZED */

-/* Define to 1 if you have the `bcopy' function. */
+/* Define to 1 if you have the 'bcopy' function. */
 /* #undef HAVE_BCOPY */

+/* Define this if your compiler provides __builtin_mul_overflow() */
+/* #undef HAVE_BUILTIN_MUL_OVERFLOW */
+
 /* Define to 1 if you have the <bzlib.h> header file. */
 /* #undef HAVE_BZLIB_H */

@@ -81,17 +84,17 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <limits.h> header file. */
 /* #undef HAVE_LIMITS_H */

-/* Define to 1 if you have the `memfd_create' function. */
+/* Define to 1 if you have the 'memfd_create' function. */
 /* #undef HAVE_MEMFD_CREATE */

-/* Define to 1 if you have the `memmove' function. */
+/* Define to 1 if you have the 'memmove' function. */
 /* #undef HAVE_MEMMOVE */
 #define HAVE_MEMMOVE 1

 /* Define to 1 if you have the <minix/config.h> header file. */
 /* #undef HAVE_MINIX_CONFIG_H */

-/* Define to 1 if you have the `mkostemp' function. */
+/* Define to 1 if you have the 'mkostemp' function. */
 /* #undef HAVE_MKOSTEMP */

 /* Define if you have POSIX threads libraries and header files. */
@@ -112,7 +115,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the `realpath' function. */
 /* #undef HAVE_REALPATH */

-/* Define to 1 if you have the `secure_getenv' function. */
+/* Define to 1 if you have the 'secure_getenv' function. */
 /* #undef HAVE_SECURE_GETENV */

 /* Define to 1 if you have the <stdint.h> header file. */
@@ -124,7 +127,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <stdlib.h> header file. */
 /* #undef HAVE_STDLIB_H */

-/* Define to 1 if you have the `strerror' function. */
+/* Define to 1 if you have the 'strerror' function. */
 /* #undef HAVE_STRERROR */

 /* Define to 1 if you have the <strings.h> header file. */
@@ -184,7 +187,7 @@ sure both macros are undefined; an emulation function will then be used. */
   matching attempt. The value is also used to limit a loop counter in
   pcre2_dfa_match(). There is a runtime interface for setting a different
   limit. The limit exists in order to catch runaway regular expressions that
-   take for ever to determine that they do not match. The default is set very
+   take forever to determine that they do not match. The default is set very
   large so that it does not accidentally catch legitimate cases. */
 #ifndef MATCH_LIMIT
 #define MATCH_LIMIT 10000000
@@ -215,7 +218,13 @@ sure both macros are undefined; an emulation function will then be used. */
   Care must be taken if it is increased, because it guards against integer
   overflow caused by enormously large patterns. */
 #ifndef MAX_NAME_SIZE
-#define MAX_NAME_SIZE 32
+#define MAX_NAME_SIZE 128
+#endif
+
+/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
+   characters, for a variable-length lookbehind assertion. */
+#ifndef MAX_VARLOOKBEHIND
+#define MAX_VARLOOKBEHIND 255
 #endif

 /* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
@@ -239,7 +248,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.42"
+#define PACKAGE_STRING "PCRE2 10.44"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -248,7 +257,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.42"
+#define PACKAGE_VERSION "10.44"

 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
   parentheses (of any kind) in a pattern. This limits the amount of system
@@ -278,12 +287,16 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value to include debugging code. */
 /* #undef PCRE2_DEBUG */

+/* to make a symbol visible */
+#define PCRE2_EXPORT
+
 /* If you are compiling for a system other than a Unix-like system or
   Win32, and it needs some magic to be inserted before the definition
   of a function that is exported by the library, define this macro to
   contain the relevant magic. If you do not define this macro, a suitable
-    __declspec value is used for Windows systems; in other environments
-   "extern" is used for a C compiler and "extern C" for a C++ compiler.
+   __declspec value is used for Windows systems; in other environments
+   a compiler relevant "extern" is used with any "visibility" related
+   attributes from PCRE2_EXPORT included.
   This macro apears at the start of every exported function that is part
   of the external API. It does not appear on functions that are "external"
   in the C sense, but which are internal to the library. */
@@ -304,11 +317,14 @@ sure both macros are undefined; an emulation function will then be used. */
   unless SUPPORT_JIT is also defined. */
 /* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */

-/* Define to 1 if all of the C90 standard headers exist (not just the ones
+/* Define to 1 if all of the C89 standard headers exist (not just the ones
   required in a freestanding environment). This macro is provided for
   backward compatibility; new code need not use it. */
 /* #undef STDC_HEADERS */

+/* Define to any value to enable differential fuzzing support. */
+/* #undef SUPPORT_DIFF_FUZZ */
+
 /* Define to any value to enable support for Just-In-Time compiling. */
 /* #undef SUPPORT_JIT */

@@ -357,7 +373,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value for valgrind support to find invalid memory reads. */
 /* #undef SUPPORT_VALGRIND */

-/* Enable extensions on AIX 3, Interix.  */
+/* Enable extensions on AIX, Interix, z/OS.  */
 #ifndef _ALL_SOURCE
 # define _ALL_SOURCE 1
 #endif
@@ -418,11 +434,15 @@ sure both macros are undefined; an emulation function will then be used. */
 #ifndef __STDC_WANT_IEC_60559_DFP_EXT__
 # define __STDC_WANT_IEC_60559_DFP_EXT__ 1
 #endif
+/* Enable extensions specified by C23 Annex F.  */
+#ifndef __STDC_WANT_IEC_60559_EXT__
+# define __STDC_WANT_IEC_60559_EXT__ 1
+#endif
 /* Enable extensions specified by ISO/IEC TS 18661-4:2015.  */
 #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
 # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
 #endif
-/* Enable extensions specified by ISO/IEC TS 18661-3:2015.  */
+/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015.  */
 #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
 # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
 #endif
@@ -445,20 +465,26 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif

 /* Version number of package */
-#define VERSION "10.42"
+#define VERSION "10.44"

 /* Number of bits in a file offset, on hosts where this is settable. */
 /* #undef _FILE_OFFSET_BITS */

-/* Define for large files, on AIX-style hosts. */
+/* Define to 1 on platforms where this makes off_t a 64-bit type. */
 /* #undef _LARGE_FILES */

-/* Define to empty if `const' does not conform to ANSI C. */
+/* Number of bits in time_t, on hosts where this is settable. */
+/* #undef _TIME_BITS */
+
+/* Define to 1 on platforms where this makes time_t a 64-bit type. */
+/* #undef __MINGW_USE_VC2005_COMPAT */
+
+/* Define to empty if 'const' does not conform to ANSI C. */
 /* #undef const */

 /* Define to the type of a signed integer type of width exactly 64 bits if
   such a type exists and the standard includes do not define it. */
 /* #undef int64_t */

-/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* Define as 'unsigned int' if <stddef.h> doesn't define. */
 /* #undef size_t */
--- a/Foundation/src/pcre2_context.c
+++ b/Foundation/src/pcre2_context.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -133,10 +133,13 @@ const pcre2_compile_context PRIV(default_compile_context) = {
  NULL,                                      /* Stack guard data */
  PRIV(default_tables),                      /* Character tables */
  PCRE2_UNSET,                               /* Max pattern length */
+  PCRE2_UNSET,                               /* Max pattern compiled length */
  BSR_DEFAULT,                               /* Backslash R default */
  NEWLINE_DEFAULT,                           /* Newline convention */
  PARENS_NEST_LIMIT,                         /* As it says */
-  0 };                                       /* Extra options */
+  0,                                         /* Extra options */
+  MAX_VARLOOKBEHIND                          /* As it says */
+  };

 /* The create function copies the default into the new memory, but must
 override the default memory handling functions if a gcontext was provided. */
@@ -225,49 +228,48 @@ return ccontext;
 PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
 pcre2_general_context_copy(pcre2_general_context *gcontext)
 {
-pcre2_general_context *new =
+pcre2_general_context *newcontext =
  gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
  gcontext->memctl.memory_data);
-if (new == NULL) return NULL;
-memcpy(new, gcontext, sizeof(pcre2_real_general_context));
-return new;
+if (newcontext == NULL) return NULL;
+memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
+return newcontext;
 }


 PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
 pcre2_compile_context_copy(pcre2_compile_context *ccontext)
 {
-pcre2_compile_context *new =
+pcre2_compile_context *newcontext =
  ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
  ccontext->memctl.memory_data);
-if (new == NULL) return NULL;
-memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
-return new;
+if (newcontext == NULL) return NULL;
+memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
+return newcontext;
 }


 PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
 pcre2_match_context_copy(pcre2_match_context *mcontext)
 {
-pcre2_match_context *new =
+pcre2_match_context *newcontext =
  mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
  mcontext->memctl.memory_data);
-if (new == NULL) return NULL;
-memcpy(new, mcontext, sizeof(pcre2_real_match_context));
-return new;
+if (newcontext == NULL) return NULL;
+memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
+return newcontext;
 }


-
 PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
 pcre2_convert_context_copy(pcre2_convert_context *ccontext)
 {
-pcre2_convert_context *new =
+pcre2_convert_context *newcontext =
  ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
  ccontext->memctl.memory_data);
-if (new == NULL) return NULL;
-memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
-return new;
+if (newcontext == NULL) return NULL;
+memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
+return newcontext;
 }


@@ -348,6 +350,13 @@ ccontext->max_pattern_length = length;
 return 0;
 }

+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
+{
+ccontext->max_pattern_compiled_length = length;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
 {
@@ -367,6 +376,13 @@ switch(newline)
  }
 }

+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
+{
+ccontext->max_varlookbehind = limit;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
 {
--- a/Foundation/src/pcre2_convert.c
+++ b/Foundation/src/pcre2_convert.c
@@ -537,6 +537,14 @@ Returns:   !0 => character is found in the class
 static BOOL
 convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
 {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+if (c > 0xff)
+  {
+  /* ctype functions are not sane for c > 0xff */
+  return 0;
+  }
+#endif
+
 switch (class_index)
  {
  case 1: return isalnum(c);
--- a/Foundation/src/pcre2_dfa_match.c
+++ b/Foundation/src/pcre2_dfa_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -166,7 +166,7 @@ static const uint8_t coptable[] = {
  0,                             /* KetRmax                                */
  0,                             /* KetRmin                                */
  0,                             /* KetRpos                                */
-  0,                             /* Reverse                                */
+  0, 0,                          /* Reverse, Vreverse                      */
  0,                             /* Assert                                 */
  0,                             /* Assert not                             */
  0,                             /* Assert behind                          */
@@ -185,7 +185,8 @@ static const uint8_t coptable[] = {
  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
  0, 0,                          /* COMMIT, COMMIT_ARG                     */
  0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
-  0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
+  0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
+  0, 0                           /* \B and \b in UCP mode                  */
 };

 /* This table identifies those opcodes that inspect a character. It is used to
@@ -243,7 +244,7 @@ static const uint8_t poptable[] = {
  0,                             /* KetRmax                                */
  0,                             /* KetRmin                                */
  0,                             /* KetRpos                                */
-  0,                             /* Reverse                                */
+  0, 0,                          /* Reverse, Vreverse                      */
  0,                             /* Assert                                 */
  0,                             /* Assert not                             */
  0,                             /* Assert behind                          */
@@ -262,7 +263,8 @@ static const uint8_t poptable[] = {
  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
  0, 0,                          /* COMMIT, COMMIT_ARG                     */
  0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
-  0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
+  0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
+  1, 1                           /* \B and \b in UCP mode                  */
 };

 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@@ -424,7 +426,7 @@ overflow. */

 else
  {
-  uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2;
+  uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
  uint32_t newsizeK = newsize/(1024/sizeof(int));

  if (newsizeK + mb->heap_used > mb->heap_limit)
@@ -587,7 +589,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
  end_code = this_start_code;
  do
    {
-    size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+    size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
    if (back > max_back) max_back = back;
    end_code += GET(end_code, 1);
    }
@@ -631,8 +633,8 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
  end_code = this_start_code;
  do
    {
-    uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
-    size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
+    uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
+    size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
    if (back <= gone_back)
      {
      int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
@@ -1098,6 +1100,8 @@ for (;;)
      /*-----------------------------------------------------------------*/
      case OP_WORD_BOUNDARY:
      case OP_NOT_WORD_BOUNDARY:
+      case OP_NOT_UCP_WORD_BOUNDARY:
+      case OP_UCP_WORD_BOUNDARY:
        {
        int left_word, right_word;

@@ -1110,13 +1114,13 @@ for (;;)
 #endif
          GETCHARTEST(d, temp);
 #ifdef SUPPORT_UNICODE
-          if ((mb->poptions & PCRE2_UCP) != 0)
+          if (codevalue == OP_UCP_WORD_BOUNDARY ||
+              codevalue == OP_NOT_UCP_WORD_BOUNDARY)
            {
-            if (d == '_') left_word = TRUE; else
-              {
-              uint32_t cat = UCD_CATEGORY(d);
-              left_word = (cat == ucp_L || cat == ucp_N);
-              }
+            int chartype = UCD_CHARTYPE(d);
+            int category = PRIV(ucp_gentype)[chartype];
+            left_word = (category == ucp_L || category == ucp_N ||
+              chartype == ucp_Mn || chartype == ucp_Pc);
            }
          else
 #endif
@@ -1135,13 +1139,13 @@ for (;;)
            mb->last_used_ptr = temp;
            }
 #ifdef SUPPORT_UNICODE
-          if ((mb->poptions & PCRE2_UCP) != 0)
+          if (codevalue == OP_UCP_WORD_BOUNDARY ||
+              codevalue == OP_NOT_UCP_WORD_BOUNDARY)
            {
-            if (c == '_') right_word = TRUE; else
-              {
-              uint32_t cat = UCD_CATEGORY(c);
-              right_word = (cat == ucp_L || cat == ucp_N);
-              }
+            int chartype = UCD_CHARTYPE(c);
+            int category = PRIV(ucp_gentype)[chartype];
+            right_word = (category == ucp_L || category == ucp_N ||
+              chartype == ucp_Mn || chartype == ucp_Pc);
            }
          else
 #endif
@@ -1149,7 +1153,9 @@ for (;;)
          }
        else right_word = FALSE;

-        if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
+        if ((left_word == right_word) ==
+            (codevalue == OP_NOT_WORD_BOUNDARY ||
+             codevalue == OP_NOT_UCP_WORD_BOUNDARY))
          { ADD_ACTIVE(state_offset + 1, 0); }
        }
      break;
@@ -1166,6 +1172,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
+        int chartype;
        const uint32_t *cp;
        const ucd_record * prop = GET_UCD(c);
        switch(code[1])
@@ -1175,8 +1182,9 @@ for (;;)
          break;

          case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-               prop->chartype == ucp_Lt;
+          chartype = prop->chartype;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+               chartype == ucp_Lt;
          break;

          case PT_GC:
@@ -1199,8 +1207,9 @@ for (;;)
          /* These are specials for combination cases. */

          case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
          break;

          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
@@ -1223,12 +1232,20 @@ for (;;)
          break;

          case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-               c == CHAR_UNDERSCORE;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               chartype == ucp_Mn || chartype == ucp_Pc;
          break;

          case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
          cp = PRIV(ucd_caseless_sets) + code[2];
          for (;;)
            {
@@ -1438,6 +1455,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
+        int chartype;
        const uint32_t *cp;
        const ucd_record * prop = GET_UCD(c);
        switch(code[2])
@@ -1447,8 +1465,8 @@ for (;;)
          break;

          case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          chartype = prop->chartype;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
          break;

          case PT_GC:
@@ -1471,8 +1489,9 @@ for (;;)
          /* These are specials for combination cases. */

          case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
          break;

          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
@@ -1495,12 +1514,20 @@ for (;;)
          break;

          case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-               c == CHAR_UNDERSCORE;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               chartype == ucp_Mn || chartype == ucp_Pc;
          break;

          case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
          cp = PRIV(ucd_caseless_sets) + code[3];
          for (;;)
            {
@@ -1693,6 +1720,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
+        int chartype;
        const uint32_t *cp;
        const ucd_record * prop = GET_UCD(c);
        switch(code[2])
@@ -1702,8 +1730,8 @@ for (;;)
          break;

          case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          chartype = prop->chartype;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
          break;

          case PT_GC:
@@ -1726,8 +1754,9 @@ for (;;)
          /* These are specials for combination cases. */

          case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
          break;

          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
@@ -1750,12 +1779,20 @@ for (;;)
          break;

          case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-               c == CHAR_UNDERSCORE;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               chartype == ucp_Mn || chartype == ucp_Pc;
          break;

          case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
          cp = PRIV(ucd_caseless_sets) + code[3];
          for (;;)
            {
@@ -1973,6 +2010,7 @@ for (;;)
      if (clen > 0)
        {
        BOOL OK;
+        int chartype;
        const uint32_t *cp;
        const ucd_record * prop = GET_UCD(c);
        switch(code[1 + IMM2_SIZE + 1])
@@ -1982,8 +2020,8 @@ for (;;)
          break;

          case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          chartype = prop->chartype;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
          break;

          case PT_GC:
@@ -2007,8 +2045,9 @@ for (;;)
          /* These are specials for combination cases. */

          case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
          break;

          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
@@ -2031,12 +2070,20 @@ for (;;)
          break;

          case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-               c == CHAR_UNDERSCORE;
+          chartype = prop->chartype;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               chartype == ucp_Mn || chartype == ucp_Pc;
          break;

          case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
          cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
          for (;;)
            {
@@ -2892,7 +2939,6 @@ for (;;)
        int *local_workspace;
        PCRE2_SIZE *local_offsets;
        RWS_anchor *rws = (RWS_anchor *)RWS;
-        dfa_recursion_info *ri;
        PCRE2_SPTR callpat = start_code + GET(code, 1);
        uint32_t recno = (callpat == mb->start_code)? 0 :
          GET2(callpat, 1 + LINK_SIZE);
@@ -2909,18 +2955,24 @@ for (;;)
        rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;

        /* Check for repeating a recursion without advancing the subject
-        pointer. This should catch convoluted mutual recursions. (Some simple
-        cases are caught at compile time.) */
+        pointer or last used character. This should catch convoluted mutual
+        recursions. (Some simple cases are caught at compile time.) */

-        for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
-          if (recno == ri->group_num && ptr == ri->subject_position)
+        for (dfa_recursion_info *ri = mb->recursive;
+             ri != NULL;
+             ri = ri->prevrec)
+          {
+          if (recno == ri->group_num && ptr == ri->subject_position &&
+              mb->last_used_ptr == ri->last_used_ptr)
            return PCRE2_ERROR_RECURSELOOP;
+          }

        /* Remember this recursion and where we started it so as to
        catch infinite loops. */

        new_recursive.group_num = recno;
        new_recursive.subject_position = ptr;
+        new_recursive.last_used_ptr = mb->last_used_ptr;
        new_recursive.prevrec = mb->recursive;
        mb->recursive = &new_recursive;

@@ -3422,7 +3474,7 @@ anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
 where to start. */

 startline = (re->flags & PCRE2_STARTLINE) != 0;
-firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
+firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
 bumpalong_limit = end_subject;

 /* Initialize and set up the fixed fields in the callout block, with a pointer
@@ -3992,8 +4044,9 @@ for (;;)
      match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
      match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
      }
+    match_data->subject_length = length;
    match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
-    match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
+    match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
    match_data->startchar = (PCRE2_SIZE)(start_match - subject);
    match_data->rc = rc;

--- a/Foundation/src/pcre2_error.c
+++ b/Foundation/src/pcre2_error.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,7 @@ static const unsigned char compile_error_texts[] =
  "missing closing parenthesis\0"
  /* 15 */
  "reference to non-existent subpattern\0"
-  "pattern passed as NULL\0"
+  "pattern passed as NULL with non-zero length\0"
  "unrecognised compile-time option bit(s)\0"
  "missing ) after (?# comment\0"
  "parentheses are too deeply nested\0"
@@ -90,7 +90,7 @@ static const unsigned char compile_error_texts[] =
  "internal error: code overflow\0"
  "missing closing parenthesis for condition\0"
  /* 25 */
-  "lookbehind assertion is not fixed length\0"
+  "length of lookbehind assertion is not limited\0"
  "a relative value of zero is not allowed\0"
  "conditional subpattern contains more than two branches\0"
  "assertion expected after (?( or (?(?C)\0"
@@ -184,6 +184,9 @@ static const unsigned char compile_error_texts[] =
  "too many capturing groups (maximum 65535)\0"
  "atomic assertion expected after (?( or (?(?C)\0"
  "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
+  /* 100 */
+  "branch too long in variable-length lookbehind assertion\0"
+  "compiled pattern would be longer than the limit set by the application\0"
  ;

 /* Match-time and UTF error texts are in the same format. */
@@ -269,6 +272,7 @@ static const unsigned char match_error_texts[] =
  /* 65 */
  "internal error - duplicate substitution match\0"
  "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
+  "INTERNAL ERROR: invalid substring offset\0"
  ;


--- a/Foundation/src/pcre2_extuni.c
+++ b/Foundation/src/pcre2_extuni.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -71,7 +71,11 @@ return NULL;
 *      Match an extended grapheme sequence       *
 *************************************************/

-/*
+/* NOTE: The logic contained in this function is replicated in three special-
+purpose functions in the pcre2_jit_compile.c module. If the logic below is
+changed, they must be kept in step so that the interpreter and the JIT have the
+same behaviour.
+
 Arguments:
  c              the first character
  eptr           pointer to next character
@@ -88,6 +92,7 @@ PCRE2_SPTR
 PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
 {
+BOOL was_ep_ZWJ = FALSE;
 int lgb = UCD_GRAPHBREAK(c);

 while (eptr < end_subject)
@@ -98,6 +103,12 @@ while (eptr < end_subject)
  rgb = UCD_GRAPHBREAK(c);
  if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;

+  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+  preceded by Extended Pictographic. */
+
+  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+    break;
+
  /* Not breaking between Regional Indicators is allowed only if there
  are an even number of preceding RIs. */

@@ -125,12 +136,15 @@ while (eptr < end_subject)
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
    }

-  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
-  allows any number of them before a following Extended_Pictographic. */
+  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+  between; see next statement). */

-  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
-       lgb != ucp_gbExtended_Pictographic)
-    lgb = rgb;
+  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+  any number of them before a following ZWJ. */
+
+  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;

  eptr += len;
  if (xcount != NULL) *xcount += 1;
--- a/Foundation/src/pcre2_find_bracket.c
+++ b/Foundation/src/pcre2_find_bracket.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -41,9 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.

 /* This module contains a single function that scans through a compiled pattern
 until it finds a capturing bracket with the given number, or, if the number is
-negative, an instance of OP_REVERSE for a lookbehind. The function is called
-from pcre2_compile.c and also from pcre2_study.c when finding the minimum
-matching length. */
+negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
+function is called from pcre2_compile.c and also from pcre2_study.c when
+finding the minimum matching length. */


 #include "pcre2_config.h"
@@ -82,7 +82,7 @@ for (;;)

  /* Handle lookbehind */

-  else if (c == OP_REVERSE)
+  else if (c == OP_REVERSE || c == OP_VREVERSE)
    {
    if (number < 0) return (PCRE2_UCHAR *)code;
    code += PRIV(OP_lengths)[c];
--- a/Foundation/src/pcre2_internal.h
+++ b/Foundation/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,24 @@ pcre2test.c with CODE_UNIT_WIDTH == 0. */
 #error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
 #endif

+/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must
+be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but
+other other building methods may not, so here is a check. It is cut out when
+building pcre2test, bcause that sets the value to zero. No other source should
+be including this file. There is no explicit way of forcing a compile to be
+abandoned, but trying to include a non-existent file seems cleanest. Otherwise
+there will be many irrelevant consequential errors. */
+
+#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \
+  (!defined PCRE2_CODE_UNIT_WIDTH ||     \
+    (PCRE2_CODE_UNIT_WIDTH != 8 &&       \
+     PCRE2_CODE_UNIT_WIDTH != 16 &&      \
+     PCRE2_CODE_UNIT_WIDTH != 32))
+#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
+#include <AbandonCompile>
+#endif
+
+
 /* Standard C headers */

 #include <ctype.h>
@@ -116,23 +134,27 @@ special-purpose environments) might want to stick other stuff in front of
 exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN
 only if it is not already set. */

+#if !defined(PCRE2_EXPORT)
+#define PCRE2_EXPORT
+#endif
+
 #ifndef PCRE2_EXP_DECL
 #  ifdef _WIN32
 #    ifndef PCRE2_STATIC
-#      define PCRE2_EXP_DECL       extern __declspec(dllexport)
-#      define PCRE2_EXP_DEFN       __declspec(dllexport)
+#      define PCRE2_EXP_DECL		extern __declspec(dllexport)
+#      define PCRE2_EXP_DEFN		__declspec(dllexport)
 #    else
-#      define PCRE2_EXP_DECL       extern
+#      define PCRE2_EXP_DECL		extern PCRE2_EXPORT
 #      define PCRE2_EXP_DEFN
 #    endif
 #  else
 #    ifdef __cplusplus
-#      define PCRE2_EXP_DECL       extern "C"
+#      define PCRE2_EXP_DECL		extern "C" PCRE2_EXPORT
 #    else
-#      define PCRE2_EXP_DECL       extern
+#      define PCRE2_EXP_DECL		extern PCRE2_EXPORT
 #    endif
 #    ifndef PCRE2_EXP_DEFN
-#      define PCRE2_EXP_DEFN       PCRE2_EXP_DECL
+#      define PCRE2_EXP_DEFN		PCRE2_EXP_DECL
 #    endif
 #  endif
 #endif
@@ -156,8 +178,8 @@ pcre2_match() because of the way it backtracks. */
 #define PCRE2_SPTR CUSTOM_SUBJECT_PTR
 #endif

-/* When checking for integer overflow in pcre2_compile(), we need to handle
-large integers. If a 64-bit integer type is available, we can use that.
+/* When checking for integer overflow, we need to handle large integers.
+If a 64-bit integer type is available, we can use that.
 Otherwise we have to cast to double, which of course requires floating point
 arithmetic. Handle this by defining a macro for the appropriate type. */

@@ -1281,7 +1303,7 @@ match. */
 #define PT_ALNUM      6    /* Alphanumeric - the union of L and N */
 #define PT_SPACE      7    /* Perl space - general category Z plus 9,10,12,13 */
 #define PT_PXSPACE    8    /* POSIX space - Z plus 9,10,11,12,13 */
-#define PT_WORD       9    /* Word - L plus N plus underscore */
+#define PT_WORD       9    /* Word - L, N, Mn, or Pc */
 #define PT_CLIST     10    /* Pseudo-property: match character list */
 #define PT_UCNC      11    /* Universal Character nameable character */
 #define PT_BIDICL    12    /* Specified bidi class */
@@ -1297,6 +1319,7 @@ table. */
 #define PT_PXGRAPH   14    /* [:graph:] - characters that mark the paper */
 #define PT_PXPRINT   15    /* [:print:] - [:graph:] plus non-control spaces */
 #define PT_PXPUNCT   16    /* [:punct:] - punctuation characters */
+#define PT_PXXDIGIT  17    /* [:xdigit:] - hex digits */

 /* This value is used when parsing \p and \P escapes to indicate that neither
 \p{script:...} nor \p{scx:...} has been encountered. */
@@ -1327,6 +1350,12 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript
 compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
 like \N.

+ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not
+followed by hex digits and }, in which case it should mean a literal "u"
+followed by a literal "{". This hack is necessary for cases like \u{ 12}
+because without it, this is interpreted as u{12} now that spaces are allowed in
+quantifiers.
+
 Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
 check_escape(). There are tests in the code for an escape greater than ESC_b
 and less than ESC_Z to detect the types that may be repeated. These are the
@@ -1336,7 +1365,7 @@ consume a character, that code will have to change. */
 enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
       ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
       ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
-       ESC_E, ESC_Q, ESC_g, ESC_k };
+       ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub };


 /********************** Opcode definitions ******************/
@@ -1372,8 +1401,8 @@ enum {
  OP_SOD,            /* 1 Start of data: \A */
  OP_SOM,            /* 2 Start of match (subject + offset): \G */
  OP_SET_SOM,        /* 3 Set start of match (\K) */
-  OP_NOT_WORD_BOUNDARY,  /*  4 \B */
-  OP_WORD_BOUNDARY,      /*  5 \b */
+  OP_NOT_WORD_BOUNDARY,  /*  4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */
+  OP_WORD_BOUNDARY,      /*  5 \b -- see also OP_UCP_WORD_BOUNDARY */
  OP_NOT_DIGIT,          /*  6 \D */
  OP_DIGIT,              /*  7 \d */
  OP_NOT_WHITESPACE,     /*  8 \S */
@@ -1547,78 +1576,85 @@ enum {
  /* The assertions must come before BRA, CBRA, ONCE, and COND. */

  OP_REVERSE,        /* 125 Move pointer back - used in lookbehind assertions */
-  OP_ASSERT,         /* 126 Positive lookahead */
-  OP_ASSERT_NOT,     /* 127 Negative lookahead */
-  OP_ASSERTBACK,     /* 128 Positive lookbehind */
-  OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
-  OP_ASSERT_NA,      /* 130 Positive non-atomic lookahead */
-  OP_ASSERTBACK_NA,  /* 131 Positive non-atomic lookbehind */
+  OP_VREVERSE,       /* 126 Move pointer back - variable */
+  OP_ASSERT,         /* 127 Positive lookahead */
+  OP_ASSERT_NOT,     /* 128 Negative lookahead */
+  OP_ASSERTBACK,     /* 129 Positive lookbehind */
+  OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */
+  OP_ASSERT_NA,      /* 131 Positive non-atomic lookahead */
+  OP_ASSERTBACK_NA,  /* 132 Positive non-atomic lookbehind */

  /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
  immediately after the assertions, with ONCE first, as there's a test for >=
  ONCE for a subpattern that isn't an assertion. The POS versions must
  immediately follow the non-POS versions in each case. */

-  OP_ONCE,           /* 132 Atomic group, contains captures */
-  OP_SCRIPT_RUN,     /* 133 Non-capture, but check characters' scripts */
-  OP_BRA,            /* 134 Start of non-capturing bracket */
-  OP_BRAPOS,         /* 135 Ditto, with unlimited, possessive repeat */
-  OP_CBRA,           /* 136 Start of capturing bracket */
-  OP_CBRAPOS,        /* 137 Ditto, with unlimited, possessive repeat */
-  OP_COND,           /* 138 Conditional group */
+  OP_ONCE,           /* 133 Atomic group, contains captures */
+  OP_SCRIPT_RUN,     /* 134 Non-capture, but check characters' scripts */
+  OP_BRA,            /* 135 Start of non-capturing bracket */
+  OP_BRAPOS,         /* 136 Ditto, with unlimited, possessive repeat */
+  OP_CBRA,           /* 137 Start of capturing bracket */
+  OP_CBRAPOS,        /* 138 Ditto, with unlimited, possessive repeat */
+  OP_COND,           /* 139 Conditional group */

  /* These five must follow the previous five, in the same order. There's a
  check for >= SBRA to distinguish the two sets. */

-  OP_SBRA,           /* 139 Start of non-capturing bracket, check empty  */
-  OP_SBRAPOS,        /* 149 Ditto, with unlimited, possessive repeat */
-  OP_SCBRA,          /* 141 Start of capturing bracket, check empty */
-  OP_SCBRAPOS,       /* 142 Ditto, with unlimited, possessive repeat */
-  OP_SCOND,          /* 143 Conditional group, check empty */
+  OP_SBRA,           /* 140 Start of non-capturing bracket, check empty  */
+  OP_SBRAPOS,        /* 141 Ditto, with unlimited, possessive repeat */
+  OP_SCBRA,          /* 142 Start of capturing bracket, check empty */
+  OP_SCBRAPOS,       /* 143 Ditto, with unlimited, possessive repeat */
+  OP_SCOND,          /* 144 Conditional group, check empty */

  /* The next two pairs must (respectively) be kept together. */

-  OP_CREF,           /* 144 Used to hold a capture number as condition */
-  OP_DNCREF,         /* 145 Used to point to duplicate names as a condition */
-  OP_RREF,           /* 146 Used to hold a recursion number as condition */
-  OP_DNRREF,         /* 147 Used to point to duplicate names as a condition */
-  OP_FALSE,          /* 148 Always false (used by DEFINE and VERSION) */
-  OP_TRUE,           /* 149 Always true (used by VERSION) */
+  OP_CREF,           /* 145 Used to hold a capture number as condition */
+  OP_DNCREF,         /* 146 Used to point to duplicate names as a condition */
+  OP_RREF,           /* 147 Used to hold a recursion number as condition */
+  OP_DNRREF,         /* 148 Used to point to duplicate names as a condition */
+  OP_FALSE,          /* 149 Always false (used by DEFINE and VERSION) */
+  OP_TRUE,           /* 150 Always true (used by VERSION) */

-  OP_BRAZERO,        /* 150 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 151 order. */
-  OP_BRAPOSZERO,     /* 152 */
+  OP_BRAZERO,        /* 151 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 152 order. */
+  OP_BRAPOSZERO,     /* 153 */

  /* These are backtracking control verbs */

-  OP_MARK,           /* 153 always has an argument */
-  OP_PRUNE,          /* 154 */
-  OP_PRUNE_ARG,      /* 155 same, but with argument */
-  OP_SKIP,           /* 156 */
-  OP_SKIP_ARG,       /* 157 same, but with argument */
-  OP_THEN,           /* 158 */
-  OP_THEN_ARG,       /* 159 same, but with argument */
-  OP_COMMIT,         /* 160 */
-  OP_COMMIT_ARG,     /* 161 same, but with argument */
+  OP_MARK,           /* 154 always has an argument */
+  OP_PRUNE,          /* 155 */
+  OP_PRUNE_ARG,      /* 156 same, but with argument */
+  OP_SKIP,           /* 157 */
+  OP_SKIP_ARG,       /* 158 same, but with argument */
+  OP_THEN,           /* 159 */
+  OP_THEN_ARG,       /* 160 same, but with argument */
+  OP_COMMIT,         /* 161 */
+  OP_COMMIT_ARG,     /* 162 same, but with argument */

  /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
  argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
  without the need for a special opcode. */

-  OP_FAIL,           /* 162 */
-  OP_ACCEPT,         /* 163 */
-  OP_ASSERT_ACCEPT,  /* 164 Used inside assertions */
-  OP_CLOSE,          /* 165 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 163 */
+  OP_ACCEPT,         /* 164 */
+  OP_ASSERT_ACCEPT,  /* 165 Used inside assertions */
+  OP_CLOSE,          /* 166 Used before OP_ACCEPT to close open captures */

  /* This is used to skip a subpattern with a {0} quantifier */

-  OP_SKIPZERO,       /* 166 */
+  OP_SKIPZERO,       /* 167 */

  /* This is used to identify a DEFINE group during compilation so that it can
  be checked for having only one branch. It is changed to OP_FALSE before
  compilation finishes. */

-  OP_DEFINE,         /* 167 */
+  OP_DEFINE,         /* 168 */
+
+  /* These opcodes replace their normal counterparts in UCP mode when
+  PCRE2_EXTRA_ASCII_BSW is not set. */
+
+  OP_NOT_UCP_WORD_BOUNDARY, /* 169 */
+  OP_UCP_WORD_BOUNDARY,     /* 170 */

  /* This is not an opcode, but is used to check that tables indexed by opcode
  are the correct length, in order to catch updating errors - there have been
@@ -1664,7 +1700,7 @@ some cases doesn't actually use these names at all). */
  "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi",  \
  "Recurse", "Callout", "CalloutStr",                             \
  "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
-  "Reverse", "Assert", "Assert not",                              \
+  "Reverse", "VReverse", "Assert", "Assert not",                  \
  "Assert back", "Assert back not",                               \
  "Non-atomic assert", "Non-atomic assert back",                  \
  "Once",                                                         \
@@ -1679,7 +1715,7 @@ some cases doesn't actually use these names at all). */
  "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
  "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL",                \
  "*ACCEPT", "*ASSERT_ACCEPT",                                    \
-  "Close", "Skip zero", "Define"
+  "Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)"


 /* This macro defines the length of fixed length operations in the compiled
@@ -1746,7 +1782,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  1+LINK_SIZE,                   /* KetRmax                                */ \
  1+LINK_SIZE,                   /* KetRmin                                */ \
  1+LINK_SIZE,                   /* KetRpos                                */ \
-  1+LINK_SIZE,                   /* Reverse                                */ \
+  1+IMM2_SIZE,                   /* Reverse                                */ \
+  1+2*IMM2_SIZE,                 /* VReverse                               */ \
  1+LINK_SIZE,                   /* Assert                                 */ \
  1+LINK_SIZE,                   /* Assert not                             */ \
  1+LINK_SIZE,                   /* Assert behind                          */ \
@@ -1775,7 +1812,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  1, 3,                          /* COMMIT, COMMIT_ARG                     */ \
  1, 1, 1,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */ \
  1+IMM2_SIZE, 1,                /* CLOSE, SKIPZERO                        */ \
-  1                              /* DEFINE                                 */
+  1,                             /* DEFINE                                 */ \
+  1, 1                           /* \B and \b in UCP mode                  */

 /* A magic value for OP_RREF to indicate the "any recursion" condition. */

@@ -2042,6 +2080,9 @@ extern void *       _pcre2_memmove(void *, const void *, size_t);
 #endif

 #endif  /* PCRE2_CODE_UNIT_WIDTH */
+
+extern BOOL         PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
+
 #endif  /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */

 /* End of pcre2_internal.h */
--- a/Foundation/src/pcre2_intmodedep.h
+++ b/Foundation/src/pcre2_intmodedep.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -568,10 +568,12 @@ typedef struct pcre2_real_compile_context {
  void *stack_guard_data;
  const uint8_t *tables;
  PCRE2_SIZE max_pattern_length;
+  PCRE2_SIZE max_pattern_compiled_length;
  uint16_t bsr_convention;
  uint16_t newline_convention;
  uint32_t parens_nest_limit;
  uint32_t extra_options;
+  uint32_t max_varlookbehind;
 } pcre2_real_compile_context;

 /* The real match context structure. */
@@ -605,12 +607,12 @@ defined specially because it is required in pcre2_serialize_decode() when
 copying the size from possibly unaligned memory into a variable of the same
 type. Use a macro rather than a typedef to avoid compiler warnings when this
 file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
-largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
-argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
-here.) */
+largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern
+have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a
+16-bit field here.) */

 #undef  CODE_BLOCKSIZE_TYPE
-#define CODE_BLOCKSIZE_TYPE size_t
+#define CODE_BLOCKSIZE_TYPE PCRE2_SIZE

 #undef  LOOKBEHIND_MAX
 #define LOOKBEHIND_MAX UINT16_MAX
@@ -658,6 +660,7 @@ typedef struct pcre2_real_match_data {
  PCRE2_SPTR       mark;             /* Pointer to last mark */
  struct heapframe *heapframes;      /* Backtracking frames heap memory */
  PCRE2_SIZE       heapframes_size;  /* Malloc-ed size */
+  PCRE2_SIZE       subject_length;   /* Subject length */
  PCRE2_SIZE       leftchar;         /* Offset to leftmost code unit */
  PCRE2_SIZE       rightchar;        /* Offset to rightmost code unit */
  PCRE2_SIZE       startchar;        /* Offset to starting code unit */
@@ -675,8 +678,8 @@ typedef struct pcre2_real_match_data {

 #ifndef PCRE2_PCRE2TEST

-/* Structures for checking for mutual recursion when scanning compiled or
-parsed code. */
+/* Structures for checking for mutual function recursion when scanning compiled
+or parsed code. */

 typedef struct recurse_check {
  struct recurse_check *prev;
@@ -688,7 +691,7 @@ typedef struct parsed_recurse_check {
  uint32_t *groupptr;
 } parsed_recurse_check;

-/* Structure for building a cache when filling in recursion offsets. */
+/* Structure for building a cache when filling in pattern recursion offsets. */

 typedef struct recurse_cache {
  PCRE2_SPTR group;
@@ -734,7 +737,6 @@ typedef struct compile_block {
  uint16_t name_entry_size;        /* Size of each entry */
  uint16_t parens_depth;           /* Depth of nested parentheses */
  uint16_t assert_depth;           /* Depth of nested assertions */
-  open_capitem *open_caps;         /* Chain of open capture items */
  named_group *named_groups;       /* Points to vector in pre-compile */
  uint32_t named_group_list_size;  /* Number of entries in the list */
  uint32_t external_options;       /* External (initial) options */
@@ -752,10 +754,11 @@ typedef struct compile_block {
  uint32_t class_range_end;        /* Overall class range end */
  PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
  uint32_t req_varyopt;            /* "After variable item" flag for reqbyte */
-  int  max_lookbehind;             /* Maximum lookbehind (characters) */
+  uint32_t max_varlookbehind;      /* Limit for variable lookbehinds */
+  int  max_lookbehind;             /* Maximum lookbehind encountered (characters) */
  BOOL had_accept;                 /* (*ACCEPT) encountered */
  BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
-  BOOL had_recurse;                /* Had a recursion or subroutine call */
+  BOOL had_recurse;                /* Had a pattern recursion or subroutine call */
  BOOL dupnames;                   /* Duplicate names exist */
 } compile_block;

@@ -773,6 +776,7 @@ call within the pattern when running pcre2_dfa_match(). */
 typedef struct dfa_recursion_info {
  struct dfa_recursion_info *prevrec;
  PCRE2_SPTR subject_position;
+  PCRE2_SPTR last_used_ptr;
  uint32_t group_num;
 } dfa_recursion_info;

@@ -793,7 +797,7 @@ typedef struct heapframe {
  PCRE2_SIZE length;         /* Used for character, string, or code lengths */
  PCRE2_SIZE back_frame;     /* Amount to subtract on RRETURN */
  PCRE2_SIZE temp_size;      /* Used for short-term PCRE2_SIZE values */
-  uint32_t rdepth;           /* "Recursion" depth */
+  uint32_t rdepth;           /* Function "recursion" depth within pcre2_match() */
  uint32_t group_frame_type; /* Type information for group frames */
  uint32_t temp_32[4];       /* Used for short-term 32-bit or BOOL values */
  uint8_t return_id;         /* Where to go on in internal "return" */
@@ -826,14 +830,15 @@ typedef struct heapframe {
  allows for exactly the right size ovector for the number of capturing
  parentheses. (See also the comment for pcre2_real_match_data above.) */

-  PCRE2_SPTR eptr;           /* MUST BE FIRST */
-  PCRE2_SPTR start_match;    /* Can be adjusted by \K */
-  PCRE2_SPTR mark;           /* Most recent mark on the success path */
-  uint32_t current_recurse;  /* Current (deepest) recursion number */
-  uint32_t capture_last;     /* Most recent capture */
-  PCRE2_SIZE last_group_offset;  /* Saved offset to most recent group frame */
-  PCRE2_SIZE offset_top;     /* Offset after highest capture */
-  PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
+  PCRE2_SPTR eptr;              /* MUST BE FIRST */
+  PCRE2_SPTR start_match;       /* Can be adjusted by \K */
+  PCRE2_SPTR mark;              /* Most recent mark on the success path */
+  PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */
+  uint32_t current_recurse;     /* Group number of current (deepest) pattern recursion */
+  uint32_t capture_last;        /* Most recent capture */
+  PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
+  PCRE2_SIZE offset_top;        /* Offset after highest capture */
+  PCRE2_SIZE ovector[131072];   /* Must be last in the structure */
 } heapframe;

 /* This typedef is a check that the size of the heapframe structure is a
@@ -858,7 +863,7 @@ doing traditional NFA matching (pcre2_match() and friends). */

 typedef struct match_block {
  pcre2_memctl memctl;            /* For general use */
-  PCRE2_SIZE heap_limit;          /* As it says */
+  uint32_t heap_limit;            /* As it says */
  uint32_t match_limit;           /* As it says */
  uint32_t match_limit_depth;     /* As it says */
  uint32_t match_call_count;      /* Number of times a new frame is created */
@@ -875,10 +880,11 @@ typedef struct match_block {
  uint16_t name_count;            /* Number of names in name table */
  uint16_t name_entry_size;       /* Size of entry in names table */
  PCRE2_SPTR name_table;          /* Table of group names */
-  PCRE2_SPTR start_code;          /* For use when recursing */
+  PCRE2_SPTR start_code;          /* For use in pattern recursion */
  PCRE2_SPTR start_subject;       /* Start of the subject string */
  PCRE2_SPTR check_subject;       /* Where UTF-checked from */
-  PCRE2_SPTR end_subject;         /* End of the subject string */
+  PCRE2_SPTR end_subject;         /* Usable end of the subject string */
+  PCRE2_SPTR true_end_subject;    /* Actual end of the subject string */
  PCRE2_SPTR end_match_ptr;       /* Subject position at end match */
  PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
  PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
@@ -886,7 +892,7 @@ typedef struct match_block {
  PCRE2_SPTR nomatch_mark;        /* Mark pointer to pass back on failure */
  PCRE2_SPTR verb_ecode_ptr;      /* For passing back info */
  PCRE2_SPTR verb_skip_ptr;       /* For passing back a (*SKIP) name */
-  uint32_t verb_current_recurse;  /* Current recurse when (*VERB) happens */
+  uint32_t verb_current_recurse;  /* Current recursion group when (*VERB) happens */
  uint32_t moptions;              /* Match options */
  uint32_t poptions;              /* Pattern options */
  uint32_t skip_arg_count;        /* For counting SKIP_ARGs */
@@ -911,7 +917,7 @@ typedef struct dfa_match_block {
  PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
  const uint8_t *tables;          /* Character tables */
  PCRE2_SIZE start_offset;        /* The start offset value */
-  PCRE2_SIZE heap_limit;          /* As it says */
+  uint32_t heap_limit;            /* As it says */
  PCRE2_SIZE heap_used;           /* As it says */
  uint32_t match_limit;           /* As it says */
  uint32_t match_limit_depth;     /* As it says */
@@ -926,7 +932,7 @@ typedef struct dfa_match_block {
  pcre2_callout_block *cb;        /* Points to a callout block */
  void *callout_data;             /* To pass back to callouts */
  int (*callout)(pcre2_callout_block *,void *);  /* Callout function or NULL */
-  dfa_recursion_info *recursive;  /* Linked list of recursion data */
+  dfa_recursion_info *recursive;  /* Linked list of pattern recursion data */
 } dfa_match_block;

 #endif  /* PCRE2_PCRE2TEST */
--- a/Foundation/src/pcre2_jit_compile.c
+++ b/Foundation/src/pcre2_jit_compile.c
--- a/Foundation/src/pcre2_jit_match.c
+++ b/Foundation/src/pcre2_jit_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,12 @@ POSSIBILITY OF SUCH DAMAGE.
 #error This file must be included from pcre2_jit_compile.c.
 #endif

+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
 #ifdef SUPPORT_JIT

 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
@@ -171,6 +177,7 @@ if (rc > (int)oveccount)
  rc = 0;
 match_data->code = re;
 match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
+match_data->subject_length = length;
 match_data->rc = rc;
 match_data->startchar = arguments.startchar_ptr - subject;
 match_data->leftchar = 0;
@@ -178,6 +185,13 @@ match_data->rightchar = 0;
 match_data->mark = arguments.mark_ptr;
 match_data->matchedby = PCRE2_MATCHEDBY_JIT;

+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+if (rc > 0)
+  __msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0]));
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
 return match_data->rc;

 #endif  /* SUPPORT_JIT */
--- a/Foundation/src/pcre2_jit_misc.c
+++ b/Foundation/src/pcre2_jit_misc.c
@@ -141,8 +141,8 @@ if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
  return NULL;
 if (startsize > maxsize)
  startsize = maxsize;
-startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
-maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
+maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));

 jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
 if (jit_stack == NULL) return NULL;
--- a/Foundation/src/pcre2_maketables.c
+++ b/Foundation/src/pcre2_maketables.c
@@ -96,7 +96,11 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);

 /* Next the case-flipping table */

-for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
+for (i = 0; i < 256; i++)
+  {
+  int c = islower(i)? toupper(i) : tolower(i);
+  *p++ = (c < 256)? c : i;
+  }

 /* Then the character class tables. Don't try to be clever and save effort on
 exclusive ones - in some locales things may be different.
--- a/Foundation/src/pcre2_match.c
+++ b/Foundation/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2015-2022 University of Cambridge
+          New API code Copyright (c) 2015-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.

 #include "pcre2_config.h"

+#include "pcre2_internal.h"
+
 /* These defines enable debugging code */

 /* #define DEBUG_FRAMES_DISPLAY */
@@ -51,6 +53,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <stdarg.h>
 #endif

+#ifdef DEBUG_SHOW_OPS
+static const char *OP_names[] = { OP_NAME_LIST };
+#endif
+
 /* These defines identify the name of the block containing "static"
 information, and fields within it. */

@@ -58,8 +64,6 @@ information, and fields within it. */
 #define PSSTART start_subject   /* Field containing processed string start */
 #define PSEND   end_subject     /* Field containing processed string end */

-#include "pcre2_internal.h"
-
 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */

 /* Masks for identifying the public options that are permitted at match time. */
@@ -67,7 +71,8 @@ information, and fields within it. */
 #define PUBLIC_MATCH_OPTIONS \
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
-   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
+   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
+   PCRE2_DISABLE_RECURSELOOP_CHECK)

 #define PUBLIC_JIT_MATCH_OPTIONS \
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
@@ -148,7 +153,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync.  */
 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
-       RM31,  RM32, RM33, RM34, RM35, RM36 };
+       RM31,  RM32, RM33, RM34, RM35, RM36, RM37 };

 #ifdef SUPPORT_WIDE_CHARS
 enum { RM100=100, RM101 };
@@ -595,11 +600,12 @@ heapframe *P = NULL;

 heapframe *frames_top;  /* End of frames vector */
 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
-PCRE2_SIZE heapframes_size;   /* Usable size of frames vector */
 PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */

 /* Local variables that do not need to be preserved over calls to RRMATCH(). */

+PCRE2_SPTR branch_end = NULL;
+PCRE2_SPTR branch_start;
 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
 PCRE2_SIZE offset;      /* Used for group offsets */
 PCRE2_SIZE length;      /* Used for various length calculations */
@@ -633,13 +639,10 @@ copied when a new frame is created. */

 frame_copy_size = frame_size - offsetof(heapframe, eptr);

-/* Set up the first frame and the end of the frames vector. We set the local
-heapframes_size to the usuable amount of the vector, that is, a whole number of
-frames. */
+/* Set up the first frame and the end of the frames vector. */

 F = match_data->heapframes;
-heapframes_size = (match_data->heapframes_size / frame_size) * frame_size;
-frames_top = (heapframe *)((char *)F + heapframes_size);
+frames_top = (heapframe *)((char *)F + match_data->heapframes_size);

 Frdepth = 0;                        /* "Recursion" depth */
 Fcapture_last = 0;                  /* Number of most recent capture */
@@ -660,35 +663,54 @@ MATCH_RECURSE:
 doubling the size, but constrained by the heap limit (which is in KiB). */

 N = (heapframe *)((char *)F + frame_size);
-if (N >= frames_top)
+if ((heapframe *)((char *)N + frame_size) >= frames_top)
  {
  heapframe *new;
-  PCRE2_SIZE newsize = match_data->heapframes_size * 2;
+  PCRE2_SIZE newsize;
+  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);

-  if (newsize > mb->heap_limit)
+  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
    {
-    PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size;
-    if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
-    newsize = maxsize;
+    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
+      return PCRE2_ERROR_NOMEMORY;
+    newsize = PCRE2_SIZE_MAX - 1;
+    }
+  else
+    newsize = match_data->heapframes_size * 2;
+
+  if (newsize / 1024 >= mb->heap_limit)
+    {
+    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
+    if (mb->heap_limit <= old_size)
+      return PCRE2_ERROR_HEAPLIMIT;
+    else
+      {
+      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
+      int over_bytes = match_data->heapframes_size % 1024;
+      if (over_bytes) max_delta -= (1024 - over_bytes);
+      newsize = match_data->heapframes_size + max_delta;
+      }
    }

+  /* With a heap limit set, the permitted additional size may not be enough for
+  another frame, so do a final check. */
+
+  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
-  memcpy(new, match_data->heapframes, heapframes_size);
+  memcpy(new, match_data->heapframes, usedsize);

-  F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes));
-  N = (heapframe *)((char *)F + frame_size);
+  N = (heapframe *)((char *)new + usedsize);
+  F = (heapframe *)((char *)N - frame_size);

  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
  match_data->heapframes = new;
  match_data->heapframes_size = newsize;
-
-  heapframes_size = (newsize / frame_size) * frame_size;
-  frames_top = (heapframe *)((char *)new + heapframes_size);
+  frames_top = (heapframe *)((char *)new + newsize);
  }

 #ifdef DEBUG_SHOW_RMATCH
-fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
+fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
 if (group_frame_type != 0)
  {
  fprintf(stderr, " type=%x ", group_frame_type);
@@ -758,10 +780,16 @@ opcodes. */
 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;

+#ifdef DEBUG_SHOW_OPS
+fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
+  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
+#endif
+
 for (;;)
  {
 #ifdef DEBUG_SHOW_OPS
-fprintf(stderr, "++ op=%d\n", *Fecode);
+fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+  OP_names[*Fecode]);
 #endif

  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
@@ -809,15 +837,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    assert_accept_frame = F;
    RRETURN(MATCH_ACCEPT);

-    /* If recursing, we have to find the most recent recursion. */
+    /* For ACCEPT within a recursion, we have to find the most recent
+    recursion. If not in a recursion, fall through to code that is common with
+    OP_END. */

    case OP_ACCEPT:
-    case OP_END:
-
-    /* Handle end of a recursion. */
-
    if (Fcurrent_recurse != RECURSE_UNSET)
      {
+#ifdef DEBUG_SHOW_OPS
+      fprintf(stderr, "++ Accept within recursion\n");
+#endif
      offset = Flast_group_offset;
      for(;;)
        {
@@ -840,27 +869,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      Fecode += 1 + LINK_SIZE;
      continue;
      }
+    /* Fall through */

-    /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
-    is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
-    start of the subject. In both cases, backtracking will then try other
-    alternatives, if any. */
+    /* OP_END itself can never be reached within a recursion because that is
+    picked up when the OP_KET that always precedes OP_END is reached. */
+
+    case OP_END:
+
+    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
+    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
+    subject. In both cases, backtracking will then try other alternatives, if
+    any. */

    if (Feptr == Fstart_match &&
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
             Fstart_match == mb->start_subject + mb->start_offset)))
+      {
+#ifdef DEBUG_SHOW_OPS
+      fprintf(stderr, "++ Backtrack because empty string\n");
+#endif
      RRETURN(MATCH_NOMATCH);
+      }

-    /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
+    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
-    position) but backtrack on reaching the end of the pattern. */
+    position) but backtrack if we've reached the end of the pattern. This
+    applies whether or not we are in a recursion. */

    if (Feptr < mb->end_subject &&
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
      {
-      if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
-      return MATCH_NOMATCH;
+      if (Fop == OP_END)
+        {
+#ifdef DEBUG_SHOW_OPS
+        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
+#endif
+        RRETURN(MATCH_NOMATCH);
+        }
+
+#ifdef DEBUG_SHOW_OPS
+      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
+#endif
+      return MATCH_NOMATCH;   /* (*ACCEPT) */
      }

    /* We have a successful match of the whole pattern. Record the result and
@@ -2433,6 +2484,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    GETCHARINCTEST(fc, Feptr);
      {
      const uint32_t *cp;
+      uint32_t chartype;
      const ucd_record *prop = GET_UCD(fc);
      BOOL notmatch = Fop == OP_NOTPROP;

@@ -2443,9 +2495,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        break;

        case PT_LAMP:
-        if ((prop->chartype == ucp_Lu ||
-             prop->chartype == ucp_Ll ||
-             prop->chartype == ucp_Lt) == notmatch)
+        chartype = prop->chartype;
+        if ((chartype == ucp_Lu ||
+             chartype == ucp_Ll ||
+             chartype == ucp_Lt) == notmatch)
          RRETURN(MATCH_NOMATCH);
        break;

@@ -2475,8 +2528,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        /* These are specials */

        case PT_ALNUM:
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
+        chartype = prop->chartype;
+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
          RRETURN(MATCH_NOMATCH);
        break;

@@ -2501,13 +2555,22 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        break;

        case PT_WORD:
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-             fc == CHAR_UNDERSCORE) == notmatch)
+        chartype = prop->chartype;
+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+             PRIV(ucp_gentype)[chartype] == ucp_N ||
+             chartype == ucp_Mn ||
+             chartype == ucp_Pc) == notmatch)
          RRETURN(MATCH_NOMATCH);
        break;

        case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (notmatch) break;;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
        for (;;)
          {
@@ -2803,16 +2866,17 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          case PT_WORD:
          for (i = 1; i <= Lmin; i++)
            {
-            int category;
+            int chartype, category;
            if (Feptr >= mb->end_subject)
              {
              SCHECK_PARTIAL();
              RRETURN(MATCH_NOMATCH);
              }
            GETCHARINCTEST(fc, Feptr);
-            category = UCD_CATEGORY(fc);
+            chartype = UCD_CHARTYPE(fc);
+            category = PRIV(ucp_gentype)[chartype];
            if ((category == ucp_L || category == ucp_N ||
-                fc == CHAR_UNDERSCORE) == notmatch)
+                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
              RRETURN(MATCH_NOMATCH);
            }
          break;
@@ -2827,6 +2891,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              RRETURN(MATCH_NOMATCH);
              }
            GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (notmatch) continue;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
            for (;;)
              {
@@ -3607,7 +3678,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          case PT_WORD:
          for (;;)
            {
-            int category;
+            int chartype, category;
            RMATCH(Fecode, RM215);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
@@ -3617,10 +3688,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              RRETURN(MATCH_NOMATCH);
              }
            GETCHARINCTEST(fc, Feptr);
-            category = UCD_CATEGORY(fc);
+            chartype = UCD_CHARTYPE(fc);
+            category = PRIV(ucp_gentype)[chartype];
            if ((category == ucp_L ||
                 category == ucp_N ||
-                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
+                 chartype == ucp_Mn ||
+                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
              RRETURN(MATCH_NOMATCH);
            }
          /* Control never gets here */
@@ -3638,6 +3711,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              RRETURN(MATCH_NOMATCH);
              }
            GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (Lctype == OP_NOTPROP) continue;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
            for (;;)
              {
@@ -4188,7 +4268,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          case PT_WORD:
          for (i = Lmin; i < Lmax; i++)
            {
-            int category;
+            int chartype, category;
            int len = 1;
            if (Feptr >= mb->end_subject)
              {
@@ -4196,9 +4276,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              break;
              }
            GETCHARLENTEST(fc, Feptr, len);
-            category = UCD_CATEGORY(fc);
-            if ((category == ucp_L || category == ucp_N ||
-                 fc == CHAR_UNDERSCORE) == notmatch)
+            chartype = UCD_CHARTYPE(fc);
+            category = PRIV(ucp_gentype)[chartype];
+            if ((category == ucp_L ||
+                 category == ucp_N ||
+                 chartype == ucp_Mn ||
+                 chartype == ucp_Pc) == notmatch)
              break;
            Feptr+= len;
            }
@@ -4215,14 +4298,24 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              break;
              }
            GETCHARLENTEST(fc, Feptr, len);
-            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
-            for (;;)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
              {
-              if (fc < *cp)
-                { if (notmatch) break; else goto GOT_MAX; }
-              if (fc == *cp++)
-                { if (notmatch) goto GOT_MAX; else break; }
+              if (!notmatch) goto GOT_MAX;
              }
+            else
+#endif
+              {
+              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+              for (;;)
+                {
+                if (fc < *cp)
+                  { if (notmatch) break; else goto GOT_MAX; }
+                if (fc == *cp++)
+                  { if (notmatch) goto GOT_MAX; else break; }
+                }
+              }
+
            Feptr += len;
            }
          GOT_MAX:
@@ -5320,9 +5413,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);


    /* ===================================================================== */
-    /* Recursion either matches the current regex, or some subexpression. The
-    offset data is the offset to the starting bracket from the start of the
-    whole pattern. (This is so that it works from duplicated subpatterns.) */
+    /* Pattern recursion either matches the current regex, or some
+    subexpression. The offset data is the offset to the starting bracket from
+    the start of the whole pattern. This is so that it works from duplicated
+    subpatterns. For a whole-pattern recursion, we have to infer the number
+    zero. */

 #define Lframe_type F->temp_32[0]
 #define Lstart_branch F->temp_sptr[0]
@@ -5331,9 +5426,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    bracode = mb->start_code + GET(Fecode, 1);
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);

-    /* If we are already in a recursion, check for repeating the same one
-    without advancing the subject pointer. This should catch convoluted mutual
-    recursions. (Some simple cases are caught at compile time.) */
+    /* If we are already in a pattern recursion, check for repeating the same
+    one without changing the subject pointer or the last referenced character
+    in the subject. This should catch convoluted mutual recursions; some
+    simple cases are caught at compile time. However, there are rare cases when
+    this check needs to be turned off. In this case, actual recursion loops
+    will be caught by the match or heap limits. */

    if (Fcurrent_recurse != RECURSE_UNSET)
      {
@@ -5344,15 +5442,19 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        P = (heapframe *)((char *)N - frame_size);
        if (N->group_frame_type == (GF_RECURSE | number))
          {
-          if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
+          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
+               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
+            return PCRE2_ERROR_RECURSELOOP;
          break;
          }
        offset = P->last_group_offset;
        }
      }

-    /* Now run the recursion, branch by branch. */
+    /* Remember the current last referenced character and then run the
+    recursion branch by branch. */

+    F->recurse_last_used = mb->last_used_ptr;
    Lstart_branch = bracode;
    Lframe_type = GF_RECURSE | number;

@@ -5681,13 +5783,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);


    /* ===================================================================== */
-    /* Move the subject pointer back. This occurs only at the start of each
-    branch of a lookbehind assertion. If we are too close to the start to move
-    back, fail. When working with UTF-8 we move back a number of characters,
-    not bytes. */
+    /* Move the subject pointer back by one fixed amount. This occurs at the
+    start of each branch that has a fixed length in a lookbehind assertion. If
+    we are too close to the start to move back, fail. When working with UTF-8
+    we move back a number of characters, not bytes. */

    case OP_REVERSE:
-    number = GET(Fecode, 1);
+    number = GET2(Fecode, 1);
 #ifdef SUPPORT_UNICODE
    if (utf)
      {
@@ -5701,7 +5803,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    else
 #endif

-    /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
+    /* No UTF support, or not in UTF mode: count is code unit count */

      {
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
@@ -5711,15 +5813,84 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    /* Save the earliest consulted character, then skip to next opcode */

    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
-    Fecode += 1 + LINK_SIZE;
+    Fecode += 1 + IMM2_SIZE;
    break;


+    /* ===================================================================== */
+    /* Move the subject pointer back by a variable amount. This occurs at the
+    start of each branch of a lookbehind assertion when the branch has a
+    variable, but limited, length. A loop is needed to try matching the branch
+    after moving back different numbers of characters. If we are too close to
+    the start to move back even the minimum amount, fail. When working with
+    UTF-8 we move back a number of characters, not bytes. */
+
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Leptr F->temp_sptr[0]
+
+    case OP_VREVERSE:
+    Lmin = GET2(Fecode, 1);
+    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
+    Leptr = Feptr;
+
+    /* Move back by the maximum branch length and then work forwards. This
+    ensures that items such as \d{3,5} get the maximum length, which is
+    relevant for captures, and makes for Perl compatibility. */
+
+#ifdef SUPPORT_UNICODE
+    if (utf)
+      {
+      for (i = 0; i < Lmax; i++)
+        {
+        if (Feptr == mb->start_subject)
+          {
+          if (i < Lmin) RRETURN(MATCH_NOMATCH);
+          Lmax = i;
+          break;
+          }
+        Feptr--;
+        BACKCHAR(Feptr);
+        }
+      }
+    else
+#endif
+
+    /* No UTF support or not in UTF mode */
+
+      {
+      ptrdiff_t diff = Feptr - mb->start_subject;
+      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
+      if (Lmin > available) RRETURN(MATCH_NOMATCH);
+      if (Lmax > available) Lmax = available;
+      Feptr -= Lmax;
+      }
+
+    /* Now try matching, moving forward one character on failure, until we
+    reach the mimimum back length. */
+
+    for (;;)
+      {
+      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
+      Feptr++;
+#ifdef SUPPORT_UNICODE
+      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
+#endif
+      }
+    /* Control never reaches here */
+
+#undef Lmin
+#undef Lmax
+#undef Leptr
+
    /* ===================================================================== */
    /* An alternation is the end of a branch; scan along to find the end of the
    bracketed group. */

    case OP_ALT:
+    branch_end = Fecode;
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
    break;

@@ -5727,7 +5898,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    /* ===================================================================== */
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
    starting frame was added to the chained frames in order to remember the
-    starting subject position for the group. */
+    starting subject position for the group. (Not true for OP_BRA when it's a
+    whole pattern recursion, but that is handled separately below.)*/

    case OP_KET:
    case OP_KETRMIN:
@@ -5736,8 +5908,14 @@ fprintf(stderr, "++ op=%d\n", *Fecode);

    bracode = Fecode - GET(Fecode, 1);

-    /* Point N to the frame at the start of the most recent group.
-    Remember the subject pointer at the start of the group. */
+    if (branch_end == NULL) branch_end = Fecode;
+    branch_start = bracode;
+    while (branch_start + GET(branch_start, 1) != branch_end)
+      branch_start += GET(branch_start, 1);
+    branch_end = NULL;
+
+    /* Point N to the frame at the start of the most recent group, and P to its
+    predecessor. Remember the subject pointer at the start of the group. */

    if (*bracode != OP_BRA && *bracode != OP_COND)
      {
@@ -5773,27 +5951,64 @@ fprintf(stderr, "++ op=%d\n", *Fecode);

    switch (*bracode)
      {
-      case OP_BRA:    /* No need to do anything for these */
-      case OP_COND:
+      /* Whole pattern recursion is handled as a recursion into group 0, but
+      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
+      group - a design mistake: it should perhaps have been capture group 0.
+      Anyway, that means the end of such recursion must be handled here. It is
+      detected by checking for an immediately following OP_END when we are
+      recursing in group 0. If this is not the end of a whole-pattern
+      recursion, there is nothing to be done. */
+
+      case OP_BRA:
+      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
+
+      /* It is the end of whole-pattern recursion. */
+
+      offset = Flast_group_offset;
+      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
+      N = (heapframe *)((char *)match_data->heapframes + offset);
+      P = (heapframe *)((char *)N - frame_size);
+      Flast_group_offset = P->last_group_offset;
+
+      /* Reinstate the previous set of captures and then carry on after the
+      recursion call. */
+
+      memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
+        Foffset_top * sizeof(PCRE2_SIZE));
+      Foffset_top = P->offset_top;
+      Fcapture_last = P->capture_last;
+      Fcurrent_recurse = P->current_recurse;
+      Fecode = P->ecode + 1 + LINK_SIZE;
+      continue;  /* With next opcode */
+
+      case OP_COND:     /* No need to do anything for these */
      case OP_SCOND:
      break;

      /* Non-atomic positive assertions are like OP_BRA, except that the
      subject pointer must be put back to where it was at the start of the
-      assertion. */
+      assertion. For a variable lookbehind, check its end point. */
+
+      case OP_ASSERTBACK_NA:
+      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+        RRETURN(MATCH_NOMATCH);
+      /* Fall through */

      case OP_ASSERT_NA:
-      case OP_ASSERTBACK_NA:
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
      Feptr = P->eptr;
      break;

      /* Atomic positive assertions are like OP_ONCE, except that in addition
      the subject pointer must be put back to where it was at the start of the
-      assertion. */
+      assertion. For a variable lookbehind, check its end point. */
+
+      case OP_ASSERTBACK:
+      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+        RRETURN(MATCH_NOMATCH);
+      /* Fall through */

      case OP_ASSERT:
-      case OP_ASSERTBACK:
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
      Feptr = P->eptr;
      /* Fall through */
@@ -5814,10 +6029,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      break;

      /* A matching negative assertion returns MATCH, which is turned into
-      NOMATCH at the assertion level. */
+      NOMATCH at the assertion level. For a variable lookbehind, check its end
+      point. */
+
+      case OP_ASSERTBACK_NOT:
+      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+        RRETURN(MATCH_NOMATCH);
+      /* Fall through */

      case OP_ASSERT_NOT:
-      case OP_ASSERTBACK_NOT:
      RRETURN(MATCH_MATCH);

      /* At the end of a script run, apply the script-checking rules. This code
@@ -5828,9 +6048,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
      break;

-      /* Whole-pattern recursion is coded as a recurse into group 0, so it
-      won't be picked up here. Instead, we catch it when the OP_END is reached.
-      Other recursion is handled here. */
+      /* Whole-pattern recursion is coded as a recurse into group 0, and is
+      handled with OP_BRA above. Other recursion is handled here. */

      case OP_CBRA:
      case OP_CBRAPOS:
@@ -5845,7 +6064,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        {
        P = (heapframe *)((char *)N - frame_size);
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
-          P->offset_top * sizeof(PCRE2_SIZE));
+          Foffset_top * sizeof(PCRE2_SIZE));
        Foffset_top = P->offset_top;
        Fcapture_last = P->capture_last;
        Fcurrent_recurse = P->current_recurse;
@@ -5928,10 +6147,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;

    /* Fall through */
-    /* Unconditional end of subject assertion (\z) */
+    /* Unconditional end of subject assertion (\z). */

    case OP_EOD:
-    if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
    if (mb->partial != 0)
      {
      mb->hitend = TRUE;
@@ -6043,6 +6262,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);

    case OP_NOT_WORD_BOUNDARY:
    case OP_WORD_BOUNDARY:
+    case OP_NOT_UCP_WORD_BOUNDARY:
+    case OP_UCP_WORD_BOUNDARY:
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
      {
      PCRE2_SPTR lastptr = Feptr - 1;
@@ -6057,13 +6278,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      fc = *lastptr;
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
 #ifdef SUPPORT_UNICODE
-      if ((mb->poptions & PCRE2_UCP) != 0)
+      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
        {
-        if (fc == '_') prev_is_word = TRUE; else
-          {
-          int cat = UCD_CATEGORY(fc);
-          prev_is_word = (cat == ucp_L || cat == ucp_N);
-          }
+        int chartype = UCD_CHARTYPE(fc);
+        int category = PRIV(ucp_gentype)[chartype];
+        prev_is_word = (category == ucp_L || category == ucp_N ||
+          chartype == ucp_Mn || chartype == ucp_Pc);
        }
      else
 #endif  /* SUPPORT_UNICODE */
@@ -6091,13 +6311,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      fc = *Feptr;
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
 #ifdef SUPPORT_UNICODE
-      if ((mb->poptions & PCRE2_UCP) != 0)
+      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
        {
-        if (fc == '_') cur_is_word = TRUE; else
-          {
-          int cat = UCD_CATEGORY(fc);
-          cur_is_word = (cat == ucp_L || cat == ucp_N);
-          }
+        int chartype = UCD_CHARTYPE(fc);
+        int category = PRIV(ucp_gentype)[chartype];
+        cur_is_word = (category == ucp_L || category == ucp_N ||
+          chartype == ucp_Mn || chartype == ucp_Pc);
        }
      else
 #endif  /* SUPPORT_UNICODE */
@@ -6106,7 +6325,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);

    /* Now see if the situation is what we want */

-    if ((*Fecode++ == OP_WORD_BOUNDARY)?
+    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
      RRETURN(MATCH_NOMATCH);
    break;
@@ -6252,7 +6471,7 @@ F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */

 #ifdef DEBUG_SHOW_RMATCH
-fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
+fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
 #endif

 switch (Freturn_id)
@@ -6261,7 +6480,7 @@ switch (Freturn_id)
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
-  LBL(33) LBL(34) LBL(35) LBL(36)
+  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37)

 #ifdef SUPPORT_WIDE_CHARS
  LBL(100) LBL(101)
@@ -6549,6 +6768,7 @@ if (use_jit)
    match_data, mcontext);
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
    {
+    match_data->subject_length = length;
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
      {
      length = CU2BYTES(length + was_zero_terminated);
@@ -6717,7 +6937,7 @@ if (mcontext == NULL)
 else mb->memctl = mcontext->memctl;

 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
-firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
+firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
 startline = (re->flags & PCRE2_STARTLINE) != 0;
 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
  true_end_subject : subject + mcontext->offset_limit;
@@ -6740,6 +6960,7 @@ mb->callout_data = mcontext->callout_data;
 mb->start_subject = subject;
 mb->start_offset = start_offset;
 mb->end_subject = end_subject;
+mb->true_end_subject = true_end_subject;
 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
 mb->allowemptypartial = (re->max_lookbehind > 0) ||
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
@@ -6799,7 +7020,7 @@ the pattern. It is not used at all if there are no capturing parentheses.

  frame_size                   is the total size of each frame
  match_data->heapframes       is the pointer to the frames vector
-  match_data->heapframes_size  is the total size of the vector
+  match_data->heapframes_size  is the allocated size of the vector

 We must pad the frame_size for alignment to ensure subsequent frames are as
 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
@@ -6814,7 +7035,7 @@ frame_size = (offsetof(heapframe, ovector) +
 smaller. */

 mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
-  mcontext->heap_limit : re->limit_heap) * 1024;
+  mcontext->heap_limit : re->limit_heap);

 mb->match_limit = (mcontext->match_limit < re->limit_match)?
  mcontext->match_limit : re->limit_match;
@@ -6825,19 +7046,19 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
 /* If a pattern has very many capturing parentheses, the frame size may be very
 large. Set the initial frame vector size to ensure that there are at least 10
 available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
-greater than the heap limit, get as large a vector as possible. Always round
-the size to a multiple of the frame size. */
+greater than the heap limit, get as large a vector as possible. */

 heapframes_size = frame_size * 10;
 if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
-if (heapframes_size > mb->heap_limit)
+if (heapframes_size / 1024 > mb->heap_limit)
  {
-  if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT;
-  heapframes_size = mb->heap_limit;
+  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
+  if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
+  heapframes_size = max_size;
  }

 /* If an existing frame vector in the match_data block is large enough, we can
-use it.Otherwise, free any pre-existing vector and get a new one. */
+use it. Otherwise, free any pre-existing vector and get a new one. */

 if (match_data->heapframes_size < heapframes_size)
  {
@@ -7284,9 +7505,17 @@ for(;;)
  mb->end_offset_top = 0;
  mb->skip_arg_count = 0;

+#ifdef DEBUG_SHOW_OPS
+  fprintf(stderr, "++ Calling match()\n");
+#endif
+
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
    match_data, mb);

+#ifdef DEBUG_SHOW_OPS
+  fprintf(stderr, "++ match() returned %d\n\n", rc);
+#endif
+
  if (mb->hitend && start_partial == NULL)
    {
    start_partial = mb->start_used_ptr;
@@ -7434,6 +7663,7 @@ if (utf && end_subject != true_end_subject &&
    if (start_match >= true_end_subject)
      {
      rc = MATCH_NOMATCH;  /* In case it was partial */
+      match_partial = NULL;
      break;
      }

@@ -7483,6 +7713,7 @@ if (rc == MATCH_MATCH)
  {
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
    0 : (int)mb->end_offset_top/2 + 1;
+  match_data->subject_length = length;
  match_data->startchar = start_match - subject;
  match_data->leftchar = mb->start_used_ptr - subject;
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
@@ -7497,6 +7728,7 @@ if (rc == MATCH_MATCH)
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
    }
  else match_data->subject = subject;
+
  return match_data->rc;
  }

@@ -7518,6 +7750,7 @@ PCRE2_ERROR_PARTIAL. */
 else if (match_partial != NULL)
  {
  match_data->subject = subject;
+  match_data->subject_length = length;
  match_data->ovector[0] = match_partial - subject;
  match_data->ovector[1] = end_subject - subject;
  match_data->startchar = match_partial - subject;
--- a/Foundation/src/pcre2_match_data.c
+++ b/Foundation/src/pcre2_match_data.c
@@ -167,4 +167,16 @@ return offsetof(pcre2_match_data, ovector) +
  2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
 }

+
+
+/*************************************************
+*             Get heapframes size                *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
+pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
+{
+return match_data->heapframes_size;
+}
+
 /* End of pcre2_match_data.c */
--- a/Foundation/src/pcre2_study.c
+++ b/Foundation/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -253,6 +253,7 @@ for (;;)
    /* Skip over things that don't match chars */

    case OP_REVERSE:
+    case OP_VREVERSE:
    case OP_CREF:
    case OP_DNCREF:
    case OP_RREF:
@@ -270,6 +271,8 @@ for (;;)
    case OP_DOLLM:
    case OP_NOT_WORD_BOUNDARY:
    case OP_WORD_BOUNDARY:
+    case OP_NOT_UCP_WORD_BOUNDARY:
+    case OP_UCP_WORD_BOUNDARY:
    cc += PRIV(OP_lengths)[*cc];
    break;

@@ -973,6 +976,7 @@ do
  while (try_next)    /* Loop for items in this branch */
    {
    int rc;
+    PCRE2_SPTR ncode;
    uint8_t *classmap = NULL;
 #ifdef SUPPORT_WIDE_CHARS
    PCRE2_UCHAR xclassflags;
@@ -1051,6 +1055,7 @@ do
      case OP_REF:
      case OP_REFI:
      case OP_REVERSE:
+      case OP_VREVERSE:
      case OP_RREF:
      case OP_SCOND:
      case OP_SET_SOM:
@@ -1098,13 +1103,100 @@ do

      case OP_WORD_BOUNDARY:
      case OP_NOT_WORD_BOUNDARY:
+      case OP_UCP_WORD_BOUNDARY:
+      case OP_NOT_UCP_WORD_BOUNDARY:
      tcode++;
      break;

-      /* If we hit a bracket or a positive lookahead assertion, recurse to set
-      bits from within the subpattern. If it can't find anything, we have to
-      give up. If it finds some mandatory character(s), we are done for this
-      branch. Otherwise, carry on scanning after the subpattern. */
+      /* For a positive lookahead assertion, inspect what immediately follows,
+      ignoring intermediate assertions and callouts. If the next item is one
+      that sets a mandatory character, skip this assertion. Otherwise, treat it
+      the same as other bracket groups. */
+
+      case OP_ASSERT:
+      case OP_ASSERT_NA:
+      ncode = tcode + GET(tcode, 1);
+      while (*ncode == OP_ALT) ncode += GET(ncode, 1);
+      ncode += 1 + LINK_SIZE;
+
+      /* Skip irrelevant items */
+
+      for (BOOL done = FALSE; !done;)
+        {
+        switch (*ncode)
+          {
+          case OP_ASSERT:
+          case OP_ASSERT_NOT:
+          case OP_ASSERTBACK:
+          case OP_ASSERTBACK_NOT:
+          case OP_ASSERT_NA:
+          case OP_ASSERTBACK_NA:
+          ncode += GET(ncode, 1);
+          while (*ncode == OP_ALT) ncode += GET(ncode, 1);
+          ncode += 1 + LINK_SIZE;
+          break;
+
+          case OP_WORD_BOUNDARY:
+          case OP_NOT_WORD_BOUNDARY:
+          case OP_UCP_WORD_BOUNDARY:
+          case OP_NOT_UCP_WORD_BOUNDARY:
+          ncode++;
+          break;
+
+          case OP_CALLOUT:
+          ncode += PRIV(OP_lengths)[OP_CALLOUT];
+          break;
+
+          case OP_CALLOUT_STR:
+          ncode += GET(ncode, 1 + 2*LINK_SIZE);
+          break;
+
+          default:
+          done = TRUE;
+          break;
+          }
+        }
+
+      /* Now check the next significant item. */
+
+      switch(*ncode)
+        {
+        default:
+        break;
+
+        case OP_PROP:
+        if (ncode[1] != PT_CLIST) break;
+        /* Fall through */
+        case OP_ANYNL:
+        case OP_CHAR:
+        case OP_CHARI:
+        case OP_EXACT:
+        case OP_EXACTI:
+        case OP_HSPACE:
+        case OP_MINPLUS:
+        case OP_MINPLUSI:
+        case OP_PLUS:
+        case OP_PLUSI:
+        case OP_POSPLUS:
+        case OP_POSPLUSI:
+        case OP_VSPACE:
+        /* Note that these types will only be present in non-UCP mode. */
+        case OP_DIGIT:
+        case OP_NOT_DIGIT:
+        case OP_WORDCHAR:
+        case OP_NOT_WORDCHAR:
+        case OP_WHITESPACE:
+        case OP_NOT_WHITESPACE:
+        tcode = ncode;
+        continue;   /* With the following significant opcode */
+        }
+      /* Fall through */
+
+      /* For a group bracket or a positive assertion without an immediately
+      following mandatory setting, recurse to set bits from within the
+      subpattern. If it can't find anything, we have to give up. If it finds
+      some mandatory character(s), we are done for this branch. Otherwise,
+      carry on scanning after the subpattern. */

      case OP_BRA:
      case OP_SBRA:
@@ -1116,8 +1208,6 @@ do
      case OP_SCBRAPOS:
      case OP_ONCE:
      case OP_SCRIPT_RUN:
-      case OP_ASSERT:
-      case OP_ASSERT_NA:
      rc = set_start_bits(re, tcode, utf, ucp, depthptr);
      if (rc == SSB_DONE)
        {
--- a/Foundation/src/pcre2_substring.c
+++ b/Foundation/src/pcre2_substring.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -306,6 +306,7 @@ Returns:         if successful: 0
                   PCRE2_ERROR_NOSUBSTRING: no such substring
                   PCRE2_ERROR_UNAVAILABLE: ovector is too small
                   PCRE2_ERROR_UNSET: substring is not set
+                   PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
 */

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -338,6 +339,8 @@ else  /* Matched using pcre2_dfa_match() */

 left = match_data->ovector[stringnumber*2];
 right = match_data->ovector[stringnumber*2+1];
+if (left > match_data->subject_length || right > match_data->subject_length)
+  return PCRE2_ERROR_INVALIDOFFSET;
 if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
 return 0;
 }
@@ -439,7 +442,7 @@ Returns:      nothing
 */

 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
-pcre2_substring_list_free(PCRE2_SPTR *list)
+pcre2_substring_list_free(PCRE2_UCHAR **list)
 {
 if (list != NULL)
  {
--- a/Foundation/src/pcre2_tables.c
+++ b/Foundation/src/pcre2_tables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -169,9 +169,9 @@ are implementing).
 6. Do not break after Prepend characters.

 7. Do not break within emoji modifier sequences or emoji zwj sequences. That
-   is, do not break between characters with the Extended_Pictographic property.
-   Extend and ZWJ characters are allowed between the characters; this cannot be
-   represented in this table, the code has to deal with it.
+   is, do not break between characters with the Extended_Pictographic property
+   if a ZWJ intervenes. Extend characters are allowed between the characters;
+   this cannot be represented in this table, the code has to deal with it.

 8. Do not break within emoji flag sequences. That is, do not break between
   regional indicator (RI) symbols if there are an odd number of RI characters
@@ -201,8 +201,8 @@ const uint32_t PRIV(ucp_gbtable)[] = {
   ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
   (1u<<ucp_gbRegional_Indicator),                      /* 11 Regional Indicator */
   ESZ,                                                 /* 12 Other */
-   ESZ,                                                 /* 13 ZWJ */
-   ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
+   ESZ|(1u<<ucp_gbExtended_Pictographic),               /* 13 ZWJ */
+   ESZ                                                  /* 14 Extended Pictographic */
 };

 #undef ESZ
--- a/Foundation/src/pcre2_ucd.c
+++ b/Foundation/src/pcre2_ucd.c
--- a/Foundation/src/pcre2_ucp.h
+++ b/Foundation/src/pcre2_ucp.h
@@ -166,29 +166,29 @@ enum {
 /* These are the bidi class values. */

 enum {
-  ucp_bidiAL,   /* Arabic letter */
-  ucp_bidiAN,   /* Arabic number */
-  ucp_bidiB,    /* Paragraph separator */
-  ucp_bidiBN,   /* Boundary neutral */
-  ucp_bidiCS,   /* Common separator */
-  ucp_bidiEN,   /* European number */
-  ucp_bidiES,   /* European separator */
-  ucp_bidiET,   /* European terminator */
-  ucp_bidiFSI,  /* First strong isolate */
-  ucp_bidiL,    /* Left to right */
-  ucp_bidiLRE,  /* Left to right embedding */
-  ucp_bidiLRI,  /* Left to right isolate */
-  ucp_bidiLRO,  /* Left to right override */
-  ucp_bidiNSM,  /* Non-spacing mark */
-  ucp_bidiON,   /* Other neutral */
-  ucp_bidiPDF,  /* Pop directional format */
-  ucp_bidiPDI,  /* Pop directional isolate */
-  ucp_bidiR,    /* Right to left */
-  ucp_bidiRLE,  /* Right to left embedding */
-  ucp_bidiRLI,  /* Right to left isolate */
-  ucp_bidiRLO,  /* Right to left override */
-  ucp_bidiS,    /* Segment separator */
-  ucp_bidiWS,   /* White space */
+  ucp_bidiAL,   /* Arabic_Letter */
+  ucp_bidiAN,   /* Arabic_Number */
+  ucp_bidiB,    /* Paragraph_Separator */
+  ucp_bidiBN,   /* Boundary_Neutral */
+  ucp_bidiCS,   /* Common_Separator */
+  ucp_bidiEN,   /* European_Number */
+  ucp_bidiES,   /* European_Separator */
+  ucp_bidiET,   /* European_Terminator */
+  ucp_bidiFSI,  /* First_Strong_Isolate */
+  ucp_bidiL,    /* Left_To_Right */
+  ucp_bidiLRE,  /* Left_To_Right_Embedding */
+  ucp_bidiLRI,  /* Left_To_Right_Isolate */
+  ucp_bidiLRO,  /* Left_To_Right_Override */
+  ucp_bidiNSM,  /* Nonspacing_Mark */
+  ucp_bidiON,   /* Other_Neutral */
+  ucp_bidiPDF,  /* Pop_Directional_Format */
+  ucp_bidiPDI,  /* Pop_Directional_Isolate */
+  ucp_bidiR,    /* Right_To_Left */
+  ucp_bidiRLE,  /* Right_To_Left_Embedding */
+  ucp_bidiRLI,  /* Right_To_Left_Isolate */
+  ucp_bidiRLO,  /* Right_To_Left_Override */
+  ucp_bidiS,    /* Segment_Separator */
+  ucp_bidiWS,   /* White_Space */
 };

 /* These are grapheme break properties. The Extended Pictographic property
@@ -380,6 +380,8 @@ enum {
  ucp_Tangsa,
  ucp_Toto,
  ucp_Vithkuqi,
+  ucp_Kawi,
+  ucp_Nag_Mundari,

  /* This must be last */
  ucp_Script_Count
--- a/Foundation/src/pcre2_ucptables.c
+++ b/Foundation/src/pcre2_ucptables.c
@@ -265,6 +265,7 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_kana0 STR_k STR_a STR_n STR_a "\0"
 #define STRING_kannada0 STR_k STR_a STR_n STR_n STR_a STR_d STR_a "\0"
 #define STRING_katakana0 STR_k STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
+#define STRING_kawi0 STR_k STR_a STR_w STR_i "\0"
 #define STRING_kayahli0 STR_k STR_a STR_y STR_a STR_h STR_l STR_i "\0"
 #define STRING_khar0 STR_k STR_h STR_a STR_r "\0"
 #define STRING_kharoshthi0 STR_k STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
@@ -347,6 +348,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_mymr0 STR_m STR_y STR_m STR_r "\0"
 #define STRING_n0 STR_n "\0"
 #define STRING_nabataean0 STR_n STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
+#define STRING_nagm0 STR_n STR_a STR_g STR_m "\0"
+#define STRING_nagmundari0 STR_n STR_a STR_g STR_m STR_u STR_n STR_d STR_a STR_r STR_i "\0"
 #define STRING_nand0 STR_n STR_a STR_n STR_d "\0"
 #define STRING_nandinagari0 STR_n STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
 #define STRING_narb0 STR_n STR_a STR_r STR_b "\0"
@@ -753,6 +756,7 @@ const char PRIV(utt_names)[] =
  STRING_kana0
  STRING_kannada0
  STRING_katakana0
+  STRING_kawi0
  STRING_kayahli0
  STRING_khar0
  STRING_kharoshthi0
@@ -835,6 +839,8 @@ const char PRIV(utt_names)[] =
  STRING_mymr0
  STRING_n0
  STRING_nabataean0
+  STRING_nagm0
+  STRING_nagmundari0
  STRING_nand0
  STRING_nandinagari0
  STRING_narb0
@@ -1241,280 +1247,283 @@ const ucp_type_table PRIV(utt)[] = {
  { 1665, PT_SCX, ucp_Katakana },
  { 1670, PT_SCX, ucp_Kannada },
  { 1678, PT_SCX, ucp_Katakana },
-  { 1687, PT_SCX, ucp_Kayah_Li },
-  { 1695, PT_SC, ucp_Kharoshthi },
+  { 1687, PT_SC, ucp_Kawi },
+  { 1692, PT_SCX, ucp_Kayah_Li },
  { 1700, PT_SC, ucp_Kharoshthi },
-  { 1711, PT_SC, ucp_Khitan_Small_Script },
-  { 1729, PT_SC, ucp_Khmer },
-  { 1735, PT_SC, ucp_Khmer },
-  { 1740, PT_SCX, ucp_Khojki },
+  { 1705, PT_SC, ucp_Kharoshthi },
+  { 1716, PT_SC, ucp_Khitan_Small_Script },
+  { 1734, PT_SC, ucp_Khmer },
+  { 1740, PT_SC, ucp_Khmer },
  { 1745, PT_SCX, ucp_Khojki },
-  { 1752, PT_SCX, ucp_Khudawadi },
-  { 1762, PT_SC, ucp_Khitan_Small_Script },
-  { 1767, PT_SCX, ucp_Kannada },
-  { 1772, PT_SCX, ucp_Kaithi },
-  { 1777, PT_GC, ucp_L },
-  { 1779, PT_LAMP, 0 },
-  { 1782, PT_SC, ucp_Tai_Tham },
-  { 1787, PT_SC, ucp_Lao },
-  { 1791, PT_SC, ucp_Lao },
-  { 1796, PT_SCX, ucp_Latin },
-  { 1802, PT_SCX, ucp_Latin },
-  { 1807, PT_LAMP, 0 },
-  { 1810, PT_SC, ucp_Lepcha },
+  { 1750, PT_SCX, ucp_Khojki },
+  { 1757, PT_SCX, ucp_Khudawadi },
+  { 1767, PT_SC, ucp_Khitan_Small_Script },
+  { 1772, PT_SCX, ucp_Kannada },
+  { 1777, PT_SCX, ucp_Kaithi },
+  { 1782, PT_GC, ucp_L },
+  { 1784, PT_LAMP, 0 },
+  { 1787, PT_SC, ucp_Tai_Tham },
+  { 1792, PT_SC, ucp_Lao },
+  { 1796, PT_SC, ucp_Lao },
+  { 1801, PT_SCX, ucp_Latin },
+  { 1807, PT_SCX, ucp_Latin },
+  { 1812, PT_LAMP, 0 },
  { 1815, PT_SC, ucp_Lepcha },
-  { 1822, PT_SCX, ucp_Limbu },
+  { 1820, PT_SC, ucp_Lepcha },
  { 1827, PT_SCX, ucp_Limbu },
-  { 1833, PT_SCX, ucp_Linear_A },
-  { 1838, PT_SCX, ucp_Linear_B },
-  { 1843, PT_SCX, ucp_Linear_A },
-  { 1851, PT_SCX, ucp_Linear_B },
-  { 1859, PT_SC, ucp_Lisu },
-  { 1864, PT_PC, ucp_Ll },
-  { 1867, PT_PC, ucp_Lm },
-  { 1870, PT_PC, ucp_Lo },
-  { 1873, PT_BOOL, ucp_Logical_Order_Exception },
-  { 1877, PT_BOOL, ucp_Logical_Order_Exception },
-  { 1899, PT_BOOL, ucp_Lowercase },
-  { 1905, PT_BOOL, ucp_Lowercase },
-  { 1915, PT_PC, ucp_Lt },
-  { 1918, PT_PC, ucp_Lu },
-  { 1921, PT_SC, ucp_Lycian },
+  { 1832, PT_SCX, ucp_Limbu },
+  { 1838, PT_SCX, ucp_Linear_A },
+  { 1843, PT_SCX, ucp_Linear_B },
+  { 1848, PT_SCX, ucp_Linear_A },
+  { 1856, PT_SCX, ucp_Linear_B },
+  { 1864, PT_SC, ucp_Lisu },
+  { 1869, PT_PC, ucp_Ll },
+  { 1872, PT_PC, ucp_Lm },
+  { 1875, PT_PC, ucp_Lo },
+  { 1878, PT_BOOL, ucp_Logical_Order_Exception },
+  { 1882, PT_BOOL, ucp_Logical_Order_Exception },
+  { 1904, PT_BOOL, ucp_Lowercase },
+  { 1910, PT_BOOL, ucp_Lowercase },
+  { 1920, PT_PC, ucp_Lt },
+  { 1923, PT_PC, ucp_Lu },
  { 1926, PT_SC, ucp_Lycian },
-  { 1933, PT_SC, ucp_Lydian },
+  { 1931, PT_SC, ucp_Lycian },
  { 1938, PT_SC, ucp_Lydian },
-  { 1945, PT_GC, ucp_M },
-  { 1947, PT_SCX, ucp_Mahajani },
-  { 1956, PT_SCX, ucp_Mahajani },
-  { 1961, PT_SC, ucp_Makasar },
+  { 1943, PT_SC, ucp_Lydian },
+  { 1950, PT_GC, ucp_M },
+  { 1952, PT_SCX, ucp_Mahajani },
+  { 1961, PT_SCX, ucp_Mahajani },
  { 1966, PT_SC, ucp_Makasar },
-  { 1974, PT_SCX, ucp_Malayalam },
-  { 1984, PT_SCX, ucp_Mandaic },
+  { 1971, PT_SC, ucp_Makasar },
+  { 1979, PT_SCX, ucp_Malayalam },
  { 1989, PT_SCX, ucp_Mandaic },
-  { 1997, PT_SCX, ucp_Manichaean },
+  { 1994, PT_SCX, ucp_Mandaic },
  { 2002, PT_SCX, ucp_Manichaean },
-  { 2013, PT_SC, ucp_Marchen },
+  { 2007, PT_SCX, ucp_Manichaean },
  { 2018, PT_SC, ucp_Marchen },
-  { 2026, PT_SCX, ucp_Masaram_Gondi },
-  { 2039, PT_BOOL, ucp_Math },
-  { 2044, PT_PC, ucp_Mc },
-  { 2047, PT_PC, ucp_Me },
-  { 2050, PT_SC, ucp_Medefaidrin },
-  { 2062, PT_SC, ucp_Medefaidrin },
-  { 2067, PT_SC, ucp_Meetei_Mayek },
-  { 2079, PT_SC, ucp_Mende_Kikakui },
+  { 2023, PT_SC, ucp_Marchen },
+  { 2031, PT_SCX, ucp_Masaram_Gondi },
+  { 2044, PT_BOOL, ucp_Math },
+  { 2049, PT_PC, ucp_Mc },
+  { 2052, PT_PC, ucp_Me },
+  { 2055, PT_SC, ucp_Medefaidrin },
+  { 2067, PT_SC, ucp_Medefaidrin },
+  { 2072, PT_SC, ucp_Meetei_Mayek },
  { 2084, PT_SC, ucp_Mende_Kikakui },
-  { 2097, PT_SC, ucp_Meroitic_Cursive },
-  { 2102, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 2107, PT_SC, ucp_Meroitic_Cursive },
-  { 2123, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 2143, PT_SC, ucp_Miao },
-  { 2148, PT_SCX, ucp_Malayalam },
-  { 2153, PT_PC, ucp_Mn },
-  { 2156, PT_SCX, ucp_Modi },
-  { 2161, PT_SCX, ucp_Mongolian },
+  { 2089, PT_SC, ucp_Mende_Kikakui },
+  { 2102, PT_SC, ucp_Meroitic_Cursive },
+  { 2107, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 2112, PT_SC, ucp_Meroitic_Cursive },
+  { 2128, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 2148, PT_SC, ucp_Miao },
+  { 2153, PT_SCX, ucp_Malayalam },
+  { 2158, PT_PC, ucp_Mn },
+  { 2161, PT_SCX, ucp_Modi },
  { 2166, PT_SCX, ucp_Mongolian },
-  { 2176, PT_SC, ucp_Mro },
-  { 2180, PT_SC, ucp_Mro },
-  { 2185, PT_SC, ucp_Meetei_Mayek },
-  { 2190, PT_SCX, ucp_Multani },
+  { 2171, PT_SCX, ucp_Mongolian },
+  { 2181, PT_SC, ucp_Mro },
+  { 2185, PT_SC, ucp_Mro },
+  { 2190, PT_SC, ucp_Meetei_Mayek },
  { 2195, PT_SCX, ucp_Multani },
-  { 2203, PT_SCX, ucp_Myanmar },
-  { 2211, PT_SCX, ucp_Myanmar },
-  { 2216, PT_GC, ucp_N },
-  { 2218, PT_SC, ucp_Nabataean },
-  { 2228, PT_SCX, ucp_Nandinagari },
-  { 2233, PT_SCX, ucp_Nandinagari },
-  { 2245, PT_SC, ucp_Old_North_Arabian },
-  { 2250, PT_SC, ucp_Nabataean },
-  { 2255, PT_BOOL, ucp_Noncharacter_Code_Point },
-  { 2261, PT_PC, ucp_Nd },
-  { 2264, PT_SC, ucp_Newa },
-  { 2269, PT_SC, ucp_New_Tai_Lue },
-  { 2279, PT_SCX, ucp_Nko },
-  { 2283, PT_SCX, ucp_Nko },
-  { 2288, PT_PC, ucp_Nl },
-  { 2291, PT_PC, ucp_No },
-  { 2294, PT_BOOL, ucp_Noncharacter_Code_Point },
-  { 2316, PT_SC, ucp_Nushu },
-  { 2321, PT_SC, ucp_Nushu },
-  { 2327, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
-  { 2348, PT_SC, ucp_Ogham },
-  { 2353, PT_SC, ucp_Ogham },
-  { 2359, PT_SC, ucp_Ol_Chiki },
-  { 2367, PT_SC, ucp_Ol_Chiki },
-  { 2372, PT_SC, ucp_Old_Hungarian },
-  { 2385, PT_SC, ucp_Old_Italic },
-  { 2395, PT_SC, ucp_Old_North_Arabian },
-  { 2411, PT_SCX, ucp_Old_Permic },
-  { 2421, PT_SC, ucp_Old_Persian },
-  { 2432, PT_SC, ucp_Old_Sogdian },
-  { 2443, PT_SC, ucp_Old_South_Arabian },
-  { 2459, PT_SC, ucp_Old_Turkic },
-  { 2469, PT_SCX, ucp_Old_Uyghur },
-  { 2479, PT_SCX, ucp_Oriya },
-  { 2485, PT_SC, ucp_Old_Turkic },
-  { 2490, PT_SCX, ucp_Oriya },
-  { 2495, PT_SC, ucp_Osage },
-  { 2501, PT_SC, ucp_Osage },
-  { 2506, PT_SC, ucp_Osmanya },
-  { 2511, PT_SC, ucp_Osmanya },
-  { 2519, PT_SCX, ucp_Old_Uyghur },
-  { 2524, PT_GC, ucp_P },
-  { 2526, PT_SC, ucp_Pahawh_Hmong },
-  { 2538, PT_SC, ucp_Palmyrene },
-  { 2543, PT_SC, ucp_Palmyrene },
-  { 2553, PT_BOOL, ucp_Pattern_Syntax },
-  { 2560, PT_BOOL, ucp_Pattern_Syntax },
-  { 2574, PT_BOOL, ucp_Pattern_White_Space },
-  { 2592, PT_BOOL, ucp_Pattern_White_Space },
-  { 2598, PT_SC, ucp_Pau_Cin_Hau },
-  { 2603, PT_SC, ucp_Pau_Cin_Hau },
-  { 2613, PT_PC, ucp_Pc },
-  { 2616, PT_BOOL, ucp_Prepended_Concatenation_Mark },
-  { 2620, PT_PC, ucp_Pd },
-  { 2623, PT_PC, ucp_Pe },
-  { 2626, PT_SCX, ucp_Old_Permic },
-  { 2631, PT_PC, ucp_Pf },
-  { 2634, PT_SCX, ucp_Phags_Pa },
-  { 2639, PT_SCX, ucp_Phags_Pa },
-  { 2647, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 2652, PT_SCX, ucp_Psalter_Pahlavi },
-  { 2657, PT_SC, ucp_Phoenician },
-  { 2662, PT_SC, ucp_Phoenician },
-  { 2673, PT_PC, ucp_Pi },
-  { 2676, PT_SC, ucp_Miao },
-  { 2681, PT_PC, ucp_Po },
-  { 2684, PT_BOOL, ucp_Prepended_Concatenation_Mark },
-  { 2711, PT_SC, ucp_Inscriptional_Parthian },
-  { 2716, PT_PC, ucp_Ps },
-  { 2719, PT_SCX, ucp_Psalter_Pahlavi },
-  { 2734, PT_SCX, ucp_Coptic },
-  { 2739, PT_SC, ucp_Inherited },
-  { 2744, PT_BOOL, ucp_Quotation_Mark },
-  { 2750, PT_BOOL, ucp_Quotation_Mark },
-  { 2764, PT_BOOL, ucp_Radical },
-  { 2772, PT_BOOL, ucp_Regional_Indicator },
-  { 2790, PT_SC, ucp_Rejang },
-  { 2797, PT_BOOL, ucp_Regional_Indicator },
-  { 2800, PT_SC, ucp_Rejang },
-  { 2805, PT_SCX, ucp_Hanifi_Rohingya },
-  { 2810, PT_SC, ucp_Runic },
-  { 2816, PT_SC, ucp_Runic },
-  { 2821, PT_GC, ucp_S },
-  { 2823, PT_SC, ucp_Samaritan },
-  { 2833, PT_SC, ucp_Samaritan },
-  { 2838, PT_SC, ucp_Old_South_Arabian },
-  { 2843, PT_SC, ucp_Saurashtra },
-  { 2848, PT_SC, ucp_Saurashtra },
-  { 2859, PT_PC, ucp_Sc },
-  { 2862, PT_BOOL, ucp_Soft_Dotted },
-  { 2865, PT_BOOL, ucp_Sentence_Terminal },
-  { 2882, PT_SC, ucp_SignWriting },
-  { 2887, PT_SCX, ucp_Sharada },
-  { 2895, PT_SC, ucp_Shavian },
-  { 2903, PT_SC, ucp_Shavian },
+  { 2200, PT_SCX, ucp_Multani },
+  { 2208, PT_SCX, ucp_Myanmar },
+  { 2216, PT_SCX, ucp_Myanmar },
+  { 2221, PT_GC, ucp_N },
+  { 2223, PT_SC, ucp_Nabataean },
+  { 2233, PT_SC, ucp_Nag_Mundari },
+  { 2238, PT_SC, ucp_Nag_Mundari },
+  { 2249, PT_SCX, ucp_Nandinagari },
+  { 2254, PT_SCX, ucp_Nandinagari },
+  { 2266, PT_SC, ucp_Old_North_Arabian },
+  { 2271, PT_SC, ucp_Nabataean },
+  { 2276, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2282, PT_PC, ucp_Nd },
+  { 2285, PT_SC, ucp_Newa },
+  { 2290, PT_SC, ucp_New_Tai_Lue },
+  { 2300, PT_SCX, ucp_Nko },
+  { 2304, PT_SCX, ucp_Nko },
+  { 2309, PT_PC, ucp_Nl },
+  { 2312, PT_PC, ucp_No },
+  { 2315, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2337, PT_SC, ucp_Nushu },
+  { 2342, PT_SC, ucp_Nushu },
+  { 2348, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 2369, PT_SC, ucp_Ogham },
+  { 2374, PT_SC, ucp_Ogham },
+  { 2380, PT_SC, ucp_Ol_Chiki },
+  { 2388, PT_SC, ucp_Ol_Chiki },
+  { 2393, PT_SC, ucp_Old_Hungarian },
+  { 2406, PT_SC, ucp_Old_Italic },
+  { 2416, PT_SC, ucp_Old_North_Arabian },
+  { 2432, PT_SCX, ucp_Old_Permic },
+  { 2442, PT_SC, ucp_Old_Persian },
+  { 2453, PT_SC, ucp_Old_Sogdian },
+  { 2464, PT_SC, ucp_Old_South_Arabian },
+  { 2480, PT_SC, ucp_Old_Turkic },
+  { 2490, PT_SCX, ucp_Old_Uyghur },
+  { 2500, PT_SCX, ucp_Oriya },
+  { 2506, PT_SC, ucp_Old_Turkic },
+  { 2511, PT_SCX, ucp_Oriya },
+  { 2516, PT_SC, ucp_Osage },
+  { 2522, PT_SC, ucp_Osage },
+  { 2527, PT_SC, ucp_Osmanya },
+  { 2532, PT_SC, ucp_Osmanya },
+  { 2540, PT_SCX, ucp_Old_Uyghur },
+  { 2545, PT_GC, ucp_P },
+  { 2547, PT_SC, ucp_Pahawh_Hmong },
+  { 2559, PT_SC, ucp_Palmyrene },
+  { 2564, PT_SC, ucp_Palmyrene },
+  { 2574, PT_BOOL, ucp_Pattern_Syntax },
+  { 2581, PT_BOOL, ucp_Pattern_Syntax },
+  { 2595, PT_BOOL, ucp_Pattern_White_Space },
+  { 2613, PT_BOOL, ucp_Pattern_White_Space },
+  { 2619, PT_SC, ucp_Pau_Cin_Hau },
+  { 2624, PT_SC, ucp_Pau_Cin_Hau },
+  { 2634, PT_PC, ucp_Pc },
+  { 2637, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2641, PT_PC, ucp_Pd },
+  { 2644, PT_PC, ucp_Pe },
+  { 2647, PT_SCX, ucp_Old_Permic },
+  { 2652, PT_PC, ucp_Pf },
+  { 2655, PT_SCX, ucp_Phags_Pa },
+  { 2660, PT_SCX, ucp_Phags_Pa },
+  { 2668, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 2673, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2678, PT_SC, ucp_Phoenician },
+  { 2683, PT_SC, ucp_Phoenician },
+  { 2694, PT_PC, ucp_Pi },
+  { 2697, PT_SC, ucp_Miao },
+  { 2702, PT_PC, ucp_Po },
+  { 2705, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2732, PT_SC, ucp_Inscriptional_Parthian },
+  { 2737, PT_PC, ucp_Ps },
+  { 2740, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2755, PT_SCX, ucp_Coptic },
+  { 2760, PT_SC, ucp_Inherited },
+  { 2765, PT_BOOL, ucp_Quotation_Mark },
+  { 2771, PT_BOOL, ucp_Quotation_Mark },
+  { 2785, PT_BOOL, ucp_Radical },
+  { 2793, PT_BOOL, ucp_Regional_Indicator },
+  { 2811, PT_SC, ucp_Rejang },
+  { 2818, PT_BOOL, ucp_Regional_Indicator },
+  { 2821, PT_SC, ucp_Rejang },
+  { 2826, PT_SCX, ucp_Hanifi_Rohingya },
+  { 2831, PT_SC, ucp_Runic },
+  { 2837, PT_SC, ucp_Runic },
+  { 2842, PT_GC, ucp_S },
+  { 2844, PT_SC, ucp_Samaritan },
+  { 2854, PT_SC, ucp_Samaritan },
+  { 2859, PT_SC, ucp_Old_South_Arabian },
+  { 2864, PT_SC, ucp_Saurashtra },
+  { 2869, PT_SC, ucp_Saurashtra },
+  { 2880, PT_PC, ucp_Sc },
+  { 2883, PT_BOOL, ucp_Soft_Dotted },
+  { 2886, PT_BOOL, ucp_Sentence_Terminal },
+  { 2903, PT_SC, ucp_SignWriting },
  { 2908, PT_SCX, ucp_Sharada },
-  { 2913, PT_SC, ucp_Siddham },
-  { 2918, PT_SC, ucp_Siddham },
-  { 2926, PT_SC, ucp_SignWriting },
-  { 2938, PT_SCX, ucp_Khudawadi },
-  { 2943, PT_SCX, ucp_Sinhala },
-  { 2948, PT_SCX, ucp_Sinhala },
-  { 2956, PT_PC, ucp_Sk },
-  { 2959, PT_PC, ucp_Sm },
-  { 2962, PT_PC, ucp_So },
-  { 2965, PT_BOOL, ucp_Soft_Dotted },
-  { 2976, PT_SCX, ucp_Sogdian },
-  { 2981, PT_SCX, ucp_Sogdian },
-  { 2989, PT_SC, ucp_Old_Sogdian },
-  { 2994, PT_SC, ucp_Sora_Sompeng },
-  { 2999, PT_SC, ucp_Sora_Sompeng },
-  { 3011, PT_SC, ucp_Soyombo },
-  { 3016, PT_SC, ucp_Soyombo },
-  { 3024, PT_BOOL, ucp_White_Space },
-  { 3030, PT_BOOL, ucp_Sentence_Terminal },
-  { 3036, PT_SC, ucp_Sundanese },
-  { 3041, PT_SC, ucp_Sundanese },
-  { 3051, PT_SCX, ucp_Syloti_Nagri },
-  { 3056, PT_SCX, ucp_Syloti_Nagri },
-  { 3068, PT_SCX, ucp_Syriac },
-  { 3073, PT_SCX, ucp_Syriac },
-  { 3080, PT_SCX, ucp_Tagalog },
-  { 3088, PT_SCX, ucp_Tagbanwa },
-  { 3093, PT_SCX, ucp_Tagbanwa },
-  { 3102, PT_SCX, ucp_Tai_Le },
-  { 3108, PT_SC, ucp_Tai_Tham },
-  { 3116, PT_SC, ucp_Tai_Viet },
-  { 3124, PT_SCX, ucp_Takri },
-  { 3129, PT_SCX, ucp_Takri },
-  { 3135, PT_SCX, ucp_Tai_Le },
-  { 3140, PT_SC, ucp_New_Tai_Lue },
-  { 3145, PT_SCX, ucp_Tamil },
-  { 3151, PT_SCX, ucp_Tamil },
-  { 3156, PT_SC, ucp_Tangut },
-  { 3161, PT_SC, ucp_Tangsa },
-  { 3168, PT_SC, ucp_Tangut },
-  { 3175, PT_SC, ucp_Tai_Viet },
-  { 3180, PT_SCX, ucp_Telugu },
-  { 3185, PT_SCX, ucp_Telugu },
-  { 3192, PT_BOOL, ucp_Terminal_Punctuation },
-  { 3197, PT_BOOL, ucp_Terminal_Punctuation },
-  { 3217, PT_SC, ucp_Tifinagh },
-  { 3222, PT_SCX, ucp_Tagalog },
-  { 3227, PT_SCX, ucp_Thaana },
-  { 3232, PT_SCX, ucp_Thaana },
-  { 3239, PT_SC, ucp_Thai },
-  { 3244, PT_SC, ucp_Tibetan },
-  { 3252, PT_SC, ucp_Tibetan },
-  { 3257, PT_SC, ucp_Tifinagh },
-  { 3266, PT_SCX, ucp_Tirhuta },
-  { 3271, PT_SCX, ucp_Tirhuta },
-  { 3279, PT_SC, ucp_Tangsa },
-  { 3284, PT_SC, ucp_Toto },
-  { 3289, PT_SC, ucp_Ugaritic },
-  { 3294, PT_SC, ucp_Ugaritic },
-  { 3303, PT_BOOL, ucp_Unified_Ideograph },
-  { 3309, PT_BOOL, ucp_Unified_Ideograph },
-  { 3326, PT_SC, ucp_Unknown },
-  { 3334, PT_BOOL, ucp_Uppercase },
-  { 3340, PT_BOOL, ucp_Uppercase },
-  { 3350, PT_SC, ucp_Vai },
-  { 3354, PT_SC, ucp_Vai },
-  { 3359, PT_BOOL, ucp_Variation_Selector },
-  { 3377, PT_SC, ucp_Vithkuqi },
-  { 3382, PT_SC, ucp_Vithkuqi },
-  { 3391, PT_BOOL, ucp_Variation_Selector },
-  { 3394, PT_SC, ucp_Wancho },
-  { 3401, PT_SC, ucp_Warang_Citi },
-  { 3406, PT_SC, ucp_Warang_Citi },
-  { 3417, PT_SC, ucp_Wancho },
-  { 3422, PT_BOOL, ucp_White_Space },
-  { 3433, PT_BOOL, ucp_White_Space },
-  { 3440, PT_ALNUM, 0 },
-  { 3444, PT_BOOL, ucp_XID_Continue },
-  { 3449, PT_BOOL, ucp_XID_Continue },
-  { 3461, PT_BOOL, ucp_XID_Start },
-  { 3466, PT_BOOL, ucp_XID_Start },
-  { 3475, PT_SC, ucp_Old_Persian },
-  { 3480, PT_PXSPACE, 0 },
-  { 3484, PT_SPACE, 0 },
-  { 3488, PT_SC, ucp_Cuneiform },
-  { 3493, PT_UCNC, 0 },
-  { 3497, PT_WORD, 0 },
-  { 3501, PT_SCX, ucp_Yezidi },
-  { 3506, PT_SCX, ucp_Yezidi },
-  { 3513, PT_SCX, ucp_Yi },
-  { 3516, PT_SCX, ucp_Yi },
-  { 3521, PT_GC, ucp_Z },
-  { 3523, PT_SC, ucp_Zanabazar_Square },
-  { 3539, PT_SC, ucp_Zanabazar_Square },
-  { 3544, PT_SC, ucp_Inherited },
-  { 3549, PT_PC, ucp_Zl },
-  { 3552, PT_PC, ucp_Zp },
-  { 3555, PT_PC, ucp_Zs },
-  { 3558, PT_SC, ucp_Common },
-  { 3563, PT_SC, ucp_Unknown }
+  { 2916, PT_SC, ucp_Shavian },
+  { 2924, PT_SC, ucp_Shavian },
+  { 2929, PT_SCX, ucp_Sharada },
+  { 2934, PT_SC, ucp_Siddham },
+  { 2939, PT_SC, ucp_Siddham },
+  { 2947, PT_SC, ucp_SignWriting },
+  { 2959, PT_SCX, ucp_Khudawadi },
+  { 2964, PT_SCX, ucp_Sinhala },
+  { 2969, PT_SCX, ucp_Sinhala },
+  { 2977, PT_PC, ucp_Sk },
+  { 2980, PT_PC, ucp_Sm },
+  { 2983, PT_PC, ucp_So },
+  { 2986, PT_BOOL, ucp_Soft_Dotted },
+  { 2997, PT_SCX, ucp_Sogdian },
+  { 3002, PT_SCX, ucp_Sogdian },
+  { 3010, PT_SC, ucp_Old_Sogdian },
+  { 3015, PT_SC, ucp_Sora_Sompeng },
+  { 3020, PT_SC, ucp_Sora_Sompeng },
+  { 3032, PT_SC, ucp_Soyombo },
+  { 3037, PT_SC, ucp_Soyombo },
+  { 3045, PT_BOOL, ucp_White_Space },
+  { 3051, PT_BOOL, ucp_Sentence_Terminal },
+  { 3057, PT_SC, ucp_Sundanese },
+  { 3062, PT_SC, ucp_Sundanese },
+  { 3072, PT_SCX, ucp_Syloti_Nagri },
+  { 3077, PT_SCX, ucp_Syloti_Nagri },
+  { 3089, PT_SCX, ucp_Syriac },
+  { 3094, PT_SCX, ucp_Syriac },
+  { 3101, PT_SCX, ucp_Tagalog },
+  { 3109, PT_SCX, ucp_Tagbanwa },
+  { 3114, PT_SCX, ucp_Tagbanwa },
+  { 3123, PT_SCX, ucp_Tai_Le },
+  { 3129, PT_SC, ucp_Tai_Tham },
+  { 3137, PT_SC, ucp_Tai_Viet },
+  { 3145, PT_SCX, ucp_Takri },
+  { 3150, PT_SCX, ucp_Takri },
+  { 3156, PT_SCX, ucp_Tai_Le },
+  { 3161, PT_SC, ucp_New_Tai_Lue },
+  { 3166, PT_SCX, ucp_Tamil },
+  { 3172, PT_SCX, ucp_Tamil },
+  { 3177, PT_SC, ucp_Tangut },
+  { 3182, PT_SC, ucp_Tangsa },
+  { 3189, PT_SC, ucp_Tangut },
+  { 3196, PT_SC, ucp_Tai_Viet },
+  { 3201, PT_SCX, ucp_Telugu },
+  { 3206, PT_SCX, ucp_Telugu },
+  { 3213, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3218, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3238, PT_SC, ucp_Tifinagh },
+  { 3243, PT_SCX, ucp_Tagalog },
+  { 3248, PT_SCX, ucp_Thaana },
+  { 3253, PT_SCX, ucp_Thaana },
+  { 3260, PT_SC, ucp_Thai },
+  { 3265, PT_SC, ucp_Tibetan },
+  { 3273, PT_SC, ucp_Tibetan },
+  { 3278, PT_SC, ucp_Tifinagh },
+  { 3287, PT_SCX, ucp_Tirhuta },
+  { 3292, PT_SCX, ucp_Tirhuta },
+  { 3300, PT_SC, ucp_Tangsa },
+  { 3305, PT_SC, ucp_Toto },
+  { 3310, PT_SC, ucp_Ugaritic },
+  { 3315, PT_SC, ucp_Ugaritic },
+  { 3324, PT_BOOL, ucp_Unified_Ideograph },
+  { 3330, PT_BOOL, ucp_Unified_Ideograph },
+  { 3347, PT_SC, ucp_Unknown },
+  { 3355, PT_BOOL, ucp_Uppercase },
+  { 3361, PT_BOOL, ucp_Uppercase },
+  { 3371, PT_SC, ucp_Vai },
+  { 3375, PT_SC, ucp_Vai },
+  { 3380, PT_BOOL, ucp_Variation_Selector },
+  { 3398, PT_SC, ucp_Vithkuqi },
+  { 3403, PT_SC, ucp_Vithkuqi },
+  { 3412, PT_BOOL, ucp_Variation_Selector },
+  { 3415, PT_SC, ucp_Wancho },
+  { 3422, PT_SC, ucp_Warang_Citi },
+  { 3427, PT_SC, ucp_Warang_Citi },
+  { 3438, PT_SC, ucp_Wancho },
+  { 3443, PT_BOOL, ucp_White_Space },
+  { 3454, PT_BOOL, ucp_White_Space },
+  { 3461, PT_ALNUM, 0 },
+  { 3465, PT_BOOL, ucp_XID_Continue },
+  { 3470, PT_BOOL, ucp_XID_Continue },
+  { 3482, PT_BOOL, ucp_XID_Start },
+  { 3487, PT_BOOL, ucp_XID_Start },
+  { 3496, PT_SC, ucp_Old_Persian },
+  { 3501, PT_PXSPACE, 0 },
+  { 3505, PT_SPACE, 0 },
+  { 3509, PT_SC, ucp_Cuneiform },
+  { 3514, PT_UCNC, 0 },
+  { 3518, PT_WORD, 0 },
+  { 3522, PT_SCX, ucp_Yezidi },
+  { 3527, PT_SCX, ucp_Yezidi },
+  { 3534, PT_SCX, ucp_Yi },
+  { 3537, PT_SCX, ucp_Yi },
+  { 3542, PT_GC, ucp_Z },
+  { 3544, PT_SC, ucp_Zanabazar_Square },
+  { 3560, PT_SC, ucp_Zanabazar_Square },
+  { 3565, PT_SC, ucp_Inherited },
+  { 3570, PT_PC, ucp_Zl },
+  { 3573, PT_PC, ucp_Zp },
+  { 3576, PT_PC, ucp_Zs },
+  { 3579, PT_SC, ucp_Common },
+  { 3584, PT_SC, ucp_Unknown }
 };

 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
--- a/Foundation/src/pcre2_valid_utf.c
+++ b/Foundation/src/pcre2_valid_utf.c
@@ -169,7 +169,7 @@ for (p = string; length > 0; p++)

  if (((d = *(++p)) & 0xc0) != 0x80)
    {
-    *erroroffset = (int)(p - string) - 1;
+    *erroroffset = (PCRE2_SIZE)(p - string) - 1;
    return PCRE2_ERROR_UTF8_ERR6;
    }

@@ -184,7 +184,7 @@ for (p = string; length > 0; p++)

    case 1: if ((c & 0x3e) == 0)
      {
-      *erroroffset = (int)(p - string) - 1;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 1;
      return PCRE2_ERROR_UTF8_ERR15;
      }
    break;
@@ -196,17 +196,17 @@ for (p = string; length > 0; p++)
    case 2:
    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR7;
      }
    if (c == 0xe0 && (d & 0x20) == 0)
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR16;
      }
    if (c == 0xed && d >= 0xa0)
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR14;
      }
    break;
@@ -218,22 +218,22 @@ for (p = string; length > 0; p++)
    case 3:
    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR7;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
      {
-      *erroroffset = (int)(p - string) - 3;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 3;
      return PCRE2_ERROR_UTF8_ERR8;
      }
    if (c == 0xf0 && (d & 0x30) == 0)
      {
-      *erroroffset = (int)(p - string) - 3;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 3;
      return PCRE2_ERROR_UTF8_ERR17;
      }
    if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
      {
-      *erroroffset = (int)(p - string) - 3;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 3;
      return PCRE2_ERROR_UTF8_ERR13;
      }
    break;
@@ -249,22 +249,22 @@ for (p = string; length > 0; p++)
    case 4:
    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR7;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
      {
-      *erroroffset = (int)(p - string) - 3;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 3;
      return PCRE2_ERROR_UTF8_ERR8;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
      {
-      *erroroffset = (int)(p - string) - 4;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 4;
      return PCRE2_ERROR_UTF8_ERR9;
      }
    if (c == 0xf8 && (d & 0x38) == 0)
      {
-      *erroroffset = (int)(p - string) - 4;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 4;
      return PCRE2_ERROR_UTF8_ERR18;
      }
    break;
@@ -275,27 +275,27 @@ for (p = string; length > 0; p++)
    case 5:
    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
      {
-      *erroroffset = (int)(p - string) - 2;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 2;
      return PCRE2_ERROR_UTF8_ERR7;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
      {
-      *erroroffset = (int)(p - string) - 3;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 3;
      return PCRE2_ERROR_UTF8_ERR8;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
      {
-      *erroroffset = (int)(p - string) - 4;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 4;
      return PCRE2_ERROR_UTF8_ERR9;
      }
    if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
      {
-      *erroroffset = (int)(p - string) - 5;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 5;
      return PCRE2_ERROR_UTF8_ERR10;
      }
    if (c == 0xfc && (d & 0x3c) == 0)
      {
-      *erroroffset = (int)(p - string) - 5;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 5;
      return PCRE2_ERROR_UTF8_ERR19;
      }
    break;
@@ -307,7 +307,7 @@ for (p = string; length > 0; p++)

  if (ab > 3)
    {
-    *erroroffset = (int)(p - string) - ab;
+    *erroroffset = (PCRE2_SIZE)(p - string) - ab;
    return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
    }
  }
@@ -338,21 +338,21 @@ for (p = string; length > 0; p++)
    /* High surrogate. Must be a followed by a low surrogate. */
    if (length == 0)
      {
-      *erroroffset = p - string;
+      *erroroffset = (PCRE2_SIZE)(p - string);
      return PCRE2_ERROR_UTF16_ERR1;
      }
    p++;
    length--;
    if ((*p & 0xfc00) != 0xdc00)
      {
-      *erroroffset = p - string - 1;
+      *erroroffset = (PCRE2_SIZE)(p - string) - 1;
      return PCRE2_ERROR_UTF16_ERR2;
      }
    }
  else
    {
    /* Isolated low surrogate. Always an error. */
-    *erroroffset = p - string;
+    *erroroffset = (PCRE2_SIZE)(p - string);
    return PCRE2_ERROR_UTF16_ERR3;
    }
  }
@@ -377,14 +377,14 @@ for (p = string; length > 0; length--, p++)
    /* Normal UTF-32 code point. Neither high nor low surrogate. */
    if (c > 0x10ffffu)
      {
-      *erroroffset = p - string;
+      *erroroffset = (PCRE2_SIZE)(p - string);
      return PCRE2_ERROR_UTF32_ERR2;
      }
    }
  else
    {
    /* A surrogate */
-    *erroroffset = p - string;
+    *erroroffset = (PCRE2_SIZE)(p - string);
    return PCRE2_ERROR_UTF32_ERR1;
    }
  }
--- a/Foundation/src/pcre2_xclass.c
+++ b/Foundation/src/pcre2_xclass.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -129,6 +129,7 @@ while ((t = *data++) != XCL_END)
 #ifdef SUPPORT_UNICODE
  else  /* XCL_PROP & XCL_NOTPROP */
    {
+    int chartype;
    const ucd_record *prop = GET_UCD(c);
    BOOL isprop = t == XCL_PROP;
    BOOL ok;
@@ -140,8 +141,9 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_LAMP:
-      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-           prop->chartype == ucp_Lt) == isprop) return !negated;
+      chartype = prop->chartype;
+      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
+           chartype == ucp_Lt) == isprop) return !negated;
      break;

      case PT_GC:
@@ -164,8 +166,9 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_ALNUM:
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
+      chartype = prop->chartype;
+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+           PRIV(ucp_gentype)[chartype] == ucp_N) == isprop)
        return !negated;
      break;

@@ -190,9 +193,10 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_WORD:
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
-             == isprop)
+      chartype = prop->chartype;
+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+           PRIV(ucp_gentype)[chartype] == ucp_N ||
+           chartype == ucp_Mn || chartype == ucp_Pc) == isprop)
        return !negated;
      break;

@@ -234,9 +238,10 @@ while ((t = *data++) != XCL_END)
      */

      case PT_PXGRAPH:
-      if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
-            (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
-              (prop->chartype == ucp_Cf &&
+      chartype = prop->chartype;
+      if ((PRIV(ucp_gentype)[chartype] != ucp_Z &&
+            (PRIV(ucp_gentype)[chartype] != ucp_C ||
+              (chartype == ucp_Cf &&
                c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
         )) == isprop)
        return !negated;
@@ -246,10 +251,11 @@ while ((t = *data++) != XCL_END)
      not Zl and not Zp, and U+180E. */

      case PT_PXPRINT:
-      if ((prop->chartype != ucp_Zl &&
-           prop->chartype != ucp_Zp &&
-            (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
-              (prop->chartype == ucp_Cf &&
+      chartype = prop->chartype;
+      if ((chartype != ucp_Zl &&
+           chartype != ucp_Zp &&
+            (PRIV(ucp_gentype)[chartype] != ucp_C ||
+              (chartype == ucp_Cf &&
                c != 0x061c && (c < 0x2066 || c > 0x2069))
         )) == isprop)
        return !negated;
@@ -260,8 +266,21 @@ while ((t = *data++) != XCL_END)
      compatibility (these are $+<=>^`|~). */

      case PT_PXPUNCT:
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
-            (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
+      chartype = prop->chartype;
+      if ((PRIV(ucp_gentype)[chartype] == ucp_P ||
+            (c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop)
+        return !negated;
+      break;
+
+      /* Perl has two sets of hex digits */
+
+      case PT_PXXDIGIT:
+      if (((c >= CHAR_0 && c <= CHAR_9) ||
+           (c >= CHAR_A && c <= CHAR_F) ||
+           (c >= CHAR_a && c <= CHAR_f) ||
+           (c >= 0xff10 && c <= 0xff19) ||  /* Fullwidth digits */
+           (c >= 0xff21 && c <= 0xff26) ||  /* Fullwidth letters */
+           (c >= 0xff41 && c <= 0xff46)) == isprop)
        return !negated;
      break;