diff --git a/Foundation/src/pcre2.h b/Foundation/src/pcre2.h index 1cb463378..7e43d43fb 100644 --- a/Foundation/src/pcre2.h +++ b/Foundation/src/pcre2.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 40 +#define PCRE2_MINOR 42 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2022-04-14 +#define PCRE2_DATE 2022-12-11 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -572,19 +572,19 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); /* Functions for manipulating contexts. */ #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ - *pcre2_general_context_copy(pcre2_general_context *); \ -PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ - *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ void (*)(void *, void *), void *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_general_context_free(pcre2_general_context *); #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ - *pcre2_compile_context_copy(pcre2_compile_context *); \ -PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ - *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_create(pcre2_general_context *);\ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_compile_context_free(pcre2_compile_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -604,10 +604,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ int (*)(uint32_t, void *), void *); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ - *pcre2_match_context_copy(pcre2_match_context *); \ -PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ - *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_match_context_free(pcre2_match_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -631,10 +631,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *); #define PCRE2_CONVERT_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ - *pcre2_convert_context_copy(pcre2_convert_context *); \ -PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ - *pcre2_convert_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_copy(pcre2_convert_context *); \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_convert_context_free(pcre2_convert_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -646,15 +646,15 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ - *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ pcre2_compile_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_code_free(pcre2_code *); \ -PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ - *pcre2_code_copy(const pcre2_code *); \ -PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ - *pcre2_code_copy_with_tables(const pcre2_code *); +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy_with_tables(const pcre2_code *); /* Functions that give information about a compiled pattern. */ @@ -670,10 +670,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ -PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ - *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ -PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ - *pcre2_match_data_create_from_pattern(const pcre2_code *, \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create_from_pattern(const pcre2_code *, \ pcre2_general_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ @@ -689,8 +689,8 @@ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ pcre2_get_match_data_size(pcre2_match_data *); \ PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ pcre2_get_ovector_count(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ - *pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_pointer(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ pcre2_get_startchar(pcre2_match_data *); @@ -770,8 +770,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ uint32_t, pcre2_match_data *, pcre2_match_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_jit_free_unused_memory(pcre2_general_context *); \ -PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \ - *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ @@ -783,8 +783,8 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ #define PCRE2_OTHER_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ -PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ - *pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \ + pcre2_maketables(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_maketables_free(pcre2_general_context *, const uint8_t *); diff --git a/Foundation/src/pcre2_compile.c b/Foundation/src/pcre2_compile.c index 9a65b2f6e..f31144bd6 100644 --- a/Foundation/src/pcre2_compile.c +++ b/Foundation/src/pcre2_compile.c @@ -1264,8 +1264,10 @@ PCRE2_SIZE* ref_count; if (code != NULL) { +#ifdef SUPPORT_JIT if (code->executable_jit != NULL) PRIV(jit_free)(code->executable_jit, &code->memctl); +#endif if ((code->flags & PCRE2_DEREF_TABLES) != 0) { @@ -2685,7 +2687,7 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED; while (ptr < ptrend) { int prev_expect_cond_assert; - uint32_t min_repeat, max_repeat; + uint32_t min_repeat = 0, max_repeat = 0; uint32_t set, unset, *optset; uint32_t terminator; uint32_t prev_meta_quantifier; @@ -8550,7 +8552,7 @@ do { op == OP_SCBRA || op == OP_SCBRAPOS) { int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1); if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE; } @@ -10618,4 +10620,10 @@ re = NULL; goto EXIT; } +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + /* End of pcre2_compile.c */ diff --git a/Foundation/src/pcre2_config.h b/Foundation/src/pcre2_config.h index 3087804b3..70a1c4e6e 100644 --- a/Foundation/src/pcre2_config.h +++ b/Foundation/src/pcre2_config.h @@ -239,7 +239,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.40" +#define PACKAGE_STRING "PCRE2 10.42" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -248,7 +248,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.40" +#define PACKAGE_VERSION "10.42" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -445,7 +445,13 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.40" +#define VERSION "10.42" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/Foundation/src/pcre2_context.c b/Foundation/src/pcre2_context.c index 9a83ec31c..415c4f4ab 100644 --- a/Foundation/src/pcre2_context.c +++ b/Foundation/src/pcre2_context.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -440,8 +440,11 @@ mcontext->offset_limit = limit; return 0; } -/* This function became obsolete at release 10.30. It is kept as a synonym for -backwards compatibility. */ +/* These functions became obsolete at release 10.30. The first is kept as a +synonym for backwards compatibility. The second now does nothing. Exclude both +from coverage reports. */ + +/* LCOV_EXCL_START */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) @@ -461,6 +464,9 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, return 0; } +/* LCOV_EXCL_STOP */ + + /* ------------ Convert context ------------ */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION diff --git a/Foundation/src/pcre2_convert.c b/Foundation/src/pcre2_convert.c index 81d6968d7..98421f3a4 100644 --- a/Foundation/src/pcre2_convert.c +++ b/Foundation/src/pcre2_convert.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -62,9 +62,8 @@ POSSIBILITY OF SUCH DAMAGE. #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS -/* States for range and POSIX processing */ +/* States for POSIX processing */ -enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED }; enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; diff --git a/Foundation/src/pcre2_dfa_match.c b/Foundation/src/pcre2_dfa_match.c index 3f5946552..b564d4e90 100644 --- a/Foundation/src/pcre2_dfa_match.c +++ b/Foundation/src/pcre2_dfa_match.c @@ -348,7 +348,7 @@ Returns: the return from the callout */ static int -do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, +do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode, PCRE2_SIZE *lengthptr) { @@ -2797,7 +2797,7 @@ for (;;) || code[LINK_SIZE + 1] == OP_CALLOUT_STR) { PCRE2_SIZE callout_length; - rrc = do_callout(code, offsets, current_subject, ptr, mb, + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 1 + LINK_SIZE, &callout_length); if (rrc < 0) return rrc; /* Abandon */ if (rrc > 0) break; /* Fail this thread */ @@ -3194,7 +3194,7 @@ for (;;) case OP_CALLOUT_STR: { PCRE2_SIZE callout_length; - rrc = do_callout(code, offsets, current_subject, ptr, mb, 0, + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0, &callout_length); if (rrc < 0) return rrc; /* Abandon */ if (rrc == 0) @@ -4055,4 +4055,10 @@ while (rws->next != NULL) return rc; } +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + /* End of pcre2_dfa_match.c */ diff --git a/Foundation/src/pcre2_internal.h b/Foundation/src/pcre2_internal.h index fe7a0e005..92dd3138d 100644 --- a/Foundation/src/pcre2_internal.h +++ b/Foundation/src/pcre2_internal.h @@ -220,18 +220,17 @@ not rely on this. */ #define COMPILE_ERROR_BASE 100 -/* The initial frames vector for remembering backtracking points in -pcre2_match() is allocated on the system stack, of this size (bytes). The size -must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a -multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends -on the number of capturing parentheses) so 20KiB handles quite a few frames. A -larger vector on the heap is obtained for patterns that need more frames. The -maximum size of this can be limited. */ +/* The initial frames vector for remembering pcre2_match() backtracking points +is allocated on the heap, of this size (bytes) or ten times the frame size if +larger, unless the heap limit is smaller. Typical frame sizes are a few hundred +bytes (it depends on the number of capturing parentheses) so 20KiB handles +quite a few frames. A larger vector on the heap is obtained for matches that +need more frames, subject to the heap limit. */ #define START_FRAMES_SIZE 20480 -/* Similarly, for DFA matching, an initial internal workspace vector is -allocated on the stack. */ +/* For DFA matching, an initial internal workspace vector is allocated on the +stack. The heap is used only if this turns out to be too small. */ #define DFA_START_RWS_SIZE 30720 diff --git a/Foundation/src/pcre2_intmodedep.h b/Foundation/src/pcre2_intmodedep.h index f8a3d25de..390e737a6 100644 --- a/Foundation/src/pcre2_intmodedep.h +++ b/Foundation/src/pcre2_intmodedep.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -649,19 +649,23 @@ the size varies from call to call. As the maximum number of capturing subpatterns is 65535 we must allow for 65536 strings to include the overall match. (See also the heapframe structure below.) */ +struct heapframe; /* Forward reference */ + typedef struct pcre2_real_match_data { - pcre2_memctl memctl; - const pcre2_real_code *code; /* The pattern used for the match */ - PCRE2_SPTR subject; /* The subject that was matched */ - PCRE2_SPTR mark; /* Pointer to last mark */ - PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ - PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ - PCRE2_SIZE startchar; /* Offset to starting code unit */ - uint8_t matchedby; /* Type of match (normal, JIT, DFA) */ - uint8_t flags; /* Various flags */ - uint16_t oveccount; /* Number of pairs */ - int rc; /* The return code from the match */ - PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ + pcre2_memctl memctl; /* Memory control fields */ + const pcre2_real_code *code; /* The pattern used for the match */ + PCRE2_SPTR subject; /* The subject that was matched */ + PCRE2_SPTR mark; /* Pointer to last mark */ + struct heapframe *heapframes; /* Backtracking frames heap memory */ + PCRE2_SIZE heapframes_size; /* Malloc-ed size */ + PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ + PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ + PCRE2_SIZE startchar; /* Offset to starting code unit */ + uint8_t matchedby; /* Type of match (normal, JIT, DFA) */ + uint8_t flags; /* Various flags */ + uint16_t oveccount; /* Number of pairs */ + int rc; /* The return code from the match */ + PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } pcre2_real_match_data; @@ -854,10 +858,6 @@ doing traditional NFA matching (pcre2_match() and friends). */ typedef struct match_block { pcre2_memctl memctl; /* For general use */ - PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */ - heapframe *match_frames; /* Points to vector of frames */ - heapframe *match_frames_top; /* Points after the end of the vector */ - heapframe *stack_frames; /* The original vector on the stack */ PCRE2_SIZE heap_limit; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_depth; /* As it says */ diff --git a/Foundation/src/pcre2_jit_compile.c b/Foundation/src/pcre2_jit_compile.c index 1b7c04dc0..2095ed10f 100644 --- a/Foundation/src/pcre2_jit_compile.c +++ b/Foundation/src/pcre2_jit_compile.c @@ -539,7 +539,7 @@ typedef struct compare_context { #undef CMP /* Used for accessing the elements of the stack. */ -#define STACK(i) ((i) * (int)sizeof(sljit_sw)) +#define STACK(i) ((i) * SSIZE_OF(sw)) #ifdef SLJIT_PREF_SHIFT_REG #if SLJIT_PREF_SHIFT_REG == SLJIT_R2 @@ -587,8 +587,8 @@ to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_START (common->ovector_start) -#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) -#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw)) #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -2148,9 +2148,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } cc += 1; @@ -2165,9 +2165,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } cc += 1 + 2 + cc[1]; @@ -2178,27 +2178,27 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } if (common->mark_ptr != 0 && !setmark_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } if (common->capture_last_ptr != 0 && !capture_last_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } cc += 1 + LINK_SIZE; @@ -2212,20 +2212,20 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -3141,7 +3141,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); #ifdef DESTROY_REGISTERS OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); @@ -3157,7 +3157,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); } static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) @@ -3197,12 +3197,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -3258,8 +3258,8 @@ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size); loop = LABEL(); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0); CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop); if (uncleared_size >= sizeof(sljit_sw)) @@ -3286,12 +3286,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -3383,7 +3383,7 @@ else OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); } -has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; +has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); @@ -3391,7 +3391,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUME loop = LABEL(); if (has_pre) - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); else { OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); @@ -3414,14 +3414,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -3434,7 +3434,7 @@ if (topbracket > 1) /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); - OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -4649,8 +4649,8 @@ if (common->nltype != NLTYPE_ANY) /* All newlines are ascii, just skip intermediate octets. */ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); loop = LABEL(); - if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); else { OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); @@ -5883,7 +5883,7 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw while (j < i) { b_pri = chars[j].last_count; - if (b_pri > 2 && a_pri + b_pri >= max_pri) + if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri) { b1 = chars[j].chars[0]; b2 = chars[j].chars[1]; @@ -6569,21 +6569,21 @@ GET_LOCAL_BASE(TMP1, 0, 0); /* Drop frames until we reach STACK_TOP. */ mainloop = LABEL(); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw)); jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); GET_LOCAL_BASE(TMP1, 0, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0); @@ -6600,13 +6600,13 @@ OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0); } JUMPTO(SLJIT_JUMP, mainloop); @@ -7156,11 +7156,11 @@ if (char1_reg == STR_END) OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7168,14 +7168,14 @@ if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_ JUMPHERE(jump); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); } -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7229,9 +7229,9 @@ else lcc_table = TMP3; } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 1; -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 2; sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); @@ -7250,8 +7250,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); if (opt_type == 1) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else if (opt_type == 2) { @@ -7259,8 +7259,8 @@ else if (opt_type == 2) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else { @@ -9686,7 +9686,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -9753,7 +9753,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT) sljit_sw value1; sljit_sw value2; sljit_sw value3; -sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw); +sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); @@ -9803,7 +9803,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, callout_arg_size); @@ -11448,7 +11448,7 @@ struct sljit_label *label; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); int tmp_base, tmp_offset; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 BOOL use_tmp; @@ -11514,19 +11514,19 @@ if (exact > 1) } } else if (exact == 1) - { compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); - if (early_fail_type == type_fail_range) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); +if (early_fail_type == type_fail_range) + { + /* Range end first, followed by range start. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); } switch(opcode) @@ -12425,7 +12425,7 @@ PCRE2_SPTR end; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -14145,7 +14145,7 @@ quit_label = common->quit_label; if (common->currententry != NULL) { /* A free bit for each private data. */ - common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3; + common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3; SLJIT_ASSERT(common->recurse_bitset_size > 0); common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);; @@ -14381,7 +14381,7 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options) pcre2_real_code *re = (pcre2_real_code *)code; #ifdef SUPPORT_JIT executable_functions *functions; -static int executable_allocator_is_working = 0; +static int executable_allocator_is_working = -1; #endif if (code == NULL) @@ -14444,23 +14444,21 @@ return PCRE2_ERROR_JIT_BADOPTION; if ((re->flags & PCRE2_NOJIT) != 0) return 0; -if (executable_allocator_is_working == 0) +if (executable_allocator_is_working == -1) { /* Checks whether the executable allocator is working. This check might run multiple times in multi-threaded environments, but the result should not be affected by it. */ void *ptr = SLJIT_MALLOC_EXEC(32, NULL); - - executable_allocator_is_working = -1; - if (ptr != NULL) { SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); executable_allocator_is_working = 1; } + else executable_allocator_is_working = 0; } -if (executable_allocator_is_working < 0) +if (!executable_allocator_is_working) return PCRE2_ERROR_NOMEMORY; if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) diff --git a/Foundation/src/pcre2_jit_misc.c b/Foundation/src/pcre2_jit_misc.c index e57afad06..bb6a5589c 100644 --- a/Foundation/src/pcre2_jit_misc.c +++ b/Foundation/src/pcre2_jit_misc.c @@ -110,8 +110,10 @@ pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) (void)gcontext; /* Suppress warning */ #else /* SUPPORT_JIT */ SLJIT_UNUSED_ARG(gcontext); +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) sljit_free_unused_memory_exec(); -#endif /* SUPPORT_JIT */ +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ +#endif /* SUPPORT_JIT */ } diff --git a/Foundation/src/pcre2_match.c b/Foundation/src/pcre2_match.c index 3c316c0b4..c9020542b 100644 --- a/Foundation/src/pcre2_match.c +++ b/Foundation/src/pcre2_match.c @@ -202,6 +202,7 @@ Arguments: P a previous frame of interest frame_size the frame size mb points to the match block + match_data points to the match data block s identification text Returns: nothing @@ -209,7 +210,7 @@ Returns: nothing static void display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size, - match_block *mb, const char *s, ...) + match_block *mb, pcre2_match_data *match_data, const char *s, ...) { uint32_t i; heapframe *Q; @@ -221,10 +222,10 @@ vfprintf(f, s, ap); va_end(ap); if (P != NULL) fprintf(f, " P=%lu", - ((char *)P - (char *)(mb->match_frames))/frame_size); + ((char *)P - (char *)(match_data->heapframes))/frame_size); fprintf(f, "\n"); -for (i = 0, Q = mb->match_frames; +for (i = 0, Q = match_data->heapframes; Q <= F; i++, Q = (heapframe *)((char *)Q + frame_size)) { @@ -488,10 +489,16 @@ A version did exist that used individual frames on the heap instead of calling match() recursively, but this ran substantially slower. The current version is a refactoring that uses a vector of frames to remember backtracking points. This runs no slower, and possibly even a bit faster than the original recursive -implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe -50 frames) is allocated on the system stack. If this is not big enough, the -heap is used for a larger vector. +implementation. +At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50 +frames) was allocated on the system stack. If this was not big enough, the heap +was used for a larger vector. However, it turns out that there are environments +where taking as little as 20KiB from the system stack is an embarrassment. +After another refactoring, the heap is used exclusively, but a pointer the +frames vector and its size are cached in the match_data block, so that there is +no new memory allocation if the same match_data block is used for multiple +matches (unless the frames vector has to be extended). ******************************************************************************* ******************************************************************************/ @@ -564,10 +571,9 @@ made performance worse. Arguments: start_eptr starting character in subject start_ecode starting position in compiled code - ovector pointer to the final output vector - oveccount number of pairs in ovector top_bracket number of capturing parentheses in the pattern frame_size size of each backtracking frame + match_data pointer to the match_data block mb pointer to "static" variables block Returns: MATCH_MATCH if matched ) these values are >= 0 @@ -578,17 +584,19 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector, - uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size, - match_block *mb) +match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, + PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb) { /* Frame-handling variables */ heapframe *F; /* Current frame pointer */ heapframe *N = NULL; /* Temporary frame pointers */ heapframe *P = NULL; + +heapframe *frames_top; /* End of frames vector */ heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */ -PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ +PCRE2_SIZE heapframes_size; /* Usable size of frames vector */ +PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ /* Local variables that do not need to be preserved over calls to RRMATCH(). */ @@ -625,10 +633,14 @@ copied when a new frame is created. */ frame_copy_size = frame_size - offsetof(heapframe, eptr); -/* Set up the first current frame at the start of the vector, and initialize -fields that are not reset for new frames. */ +/* Set up the first frame and the end of the frames vector. We set the local +heapframes_size to the usuable amount of the vector, that is, a whole number of +frames. */ + +F = match_data->heapframes; +heapframes_size = (match_data->heapframes_size / frame_size) * frame_size; +frames_top = (heapframe *)((char *)F + heapframes_size); -F = mb->match_frames; Frdepth = 0; /* "Recursion" depth */ Fcapture_last = 0; /* Number of most recent capture */ Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */ @@ -644,34 +656,35 @@ backtracking point. */ MATCH_RECURSE: -/* Set up a new backtracking frame. If the vector is full, get a new one -on the heap, doubling the size, but constrained by the heap limit. */ +/* Set up a new backtracking frame. If the vector is full, get a new one, +doubling the size, but constrained by the heap limit (which is in KiB). */ N = (heapframe *)((char *)F + frame_size); -if (N >= mb->match_frames_top) +if (N >= frames_top) { - PCRE2_SIZE newsize = mb->frame_vector_size * 2; heapframe *new; + PCRE2_SIZE newsize = match_data->heapframes_size * 2; - if ((newsize / 1024) > mb->heap_limit) + if (newsize > mb->heap_limit) { - PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size; - if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT; + PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size; + if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT; newsize = maxsize; } - new = mb->memctl.malloc(newsize, mb->memctl.memory_data); + new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data); if (new == NULL) return PCRE2_ERROR_NOMEMORY; - memcpy(new, mb->match_frames, mb->frame_vector_size); + memcpy(new, match_data->heapframes, heapframes_size); - F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames)); + F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes)); N = (heapframe *)((char *)F + frame_size); - if (mb->match_frames != mb->stack_frames) - mb->memctl.free(mb->match_frames, mb->memctl.memory_data); - mb->match_frames = new; - mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize); - mb->frame_vector_size = newsize; + match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data); + match_data->heapframes = new; + match_data->heapframes_size = newsize; + + heapframes_size = (newsize / frame_size) * frame_size; + frames_top = (heapframe *)((char *)new + heapframes_size); } #ifdef DEBUG_SHOW_RMATCH @@ -729,7 +742,7 @@ recursion value. */ if (group_frame_type != 0) { - Flast_group_offset = (char *)F - (char *)mb->match_frames; + Flast_group_offset = (char *)F - (char *)match_data->heapframes; if (GF_IDMASK(group_frame_type) == GF_RECURSE) Fcurrent_recurse = GF_DATAMASK(group_frame_type); group_frame_type = 0; @@ -771,7 +784,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); for(;;) { if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_CAPTURE | number)) break; offset = P->last_group_offset; @@ -809,7 +822,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); for(;;) { if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; offset = P->last_group_offset; @@ -862,14 +875,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); mb->mark = Fmark; /* and the last success mark */ if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; - ovector[0] = Fstart_match - mb->start_subject; - ovector[1] = Feptr - mb->start_subject; + match_data->ovector[0] = Fstart_match - mb->start_subject; + match_data->ovector[1] = Feptr - mb->start_subject; /* Set i to the smaller of the sizes of the external and frame ovectors. */ - i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1); - memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); - while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET; + i = 2 * ((top_bracket + 1 > match_data->oveccount)? + match_data->oveccount : top_bracket + 1); + memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); + while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET; return MATCH_MATCH; /* Note: NOT RRETURN */ @@ -5326,7 +5340,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); offset = Flast_group_offset; while (offset != PCRE2_UNSET) { - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_RECURSE | number)) { @@ -5727,7 +5741,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (*bracode != OP_BRA && *bracode != OP_COND) { - N = (heapframe *)((char *)mb->match_frames + Flast_group_offset); + N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset); P = (heapframe *)((char *)N - frame_size); Flast_group_offset = P->last_group_offset; @@ -6344,6 +6358,7 @@ BOOL jit_checked_utf = FALSE; #endif /* SUPPORT_UNICODE */ PCRE2_SIZE frame_size; +PCRE2_SIZE heapframes_size; /* We need to have mb as a pointer to a match block, because the IS_NEWLINE macro is used below, and it expects NLBLOCK to be defined as a pointer. */ @@ -6352,15 +6367,6 @@ pcre2_callout_block cb; match_block actual_match_block; match_block *mb = &actual_match_block; -/* Allocate an initial vector of backtracking frames on the stack. If this -proves to be too small, it is replaced by a larger one on the heap. To get a -vector of the size required that is aligned for pointers, allocate it as a -vector of pointers. */ - -PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)] - PCRE2_KEEP_UNINITIALIZED; -mb->stack_frames = (heapframe *)stack_frames_vector; - /* Recognize NULL, length 0 as an empty string. */ if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; @@ -6791,15 +6797,11 @@ switch(re->newline_convention) vector at the end, whose size depends on the number of capturing parentheses in the pattern. It is not used at all if there are no capturing parentheses. - frame_size is the total size of each frame - mb->frame_vector_size is the total usable size of the vector (rounded down - to a whole number of frames) + frame_size is the total size of each frame + match_data->heapframes is the pointer to the frames vector + match_data->heapframes_size is the total size of the vector -The last of these is changed within the match() function if the frame vector -has to be expanded. We therefore put it into the match block so that it is -correct when calling match() more than once for non-anchored patterns. - -We must also pad frame_size for alignment to ensure subsequent frames are as +We must pad the frame_size for alignment to ensure subsequent frames are as aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE array, that does not guarantee it is suitably aligned for pointers, as some architectures have pointers that are larger than a size_t. */ @@ -6811,8 +6813,8 @@ frame_size = (offsetof(heapframe, ovector) + /* Limits set in the pattern override the match context only if they are smaller. */ -mb->heap_limit = (mcontext->heap_limit < re->limit_heap)? - mcontext->heap_limit : re->limit_heap; +mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)? + mcontext->heap_limit : re->limit_heap) * 1024; mb->match_limit = (mcontext->match_limit < re->limit_match)? mcontext->match_limit : re->limit_match; @@ -6821,35 +6823,40 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? mcontext->depth_limit : re->limit_depth; /* If a pattern has very many capturing parentheses, the frame size may be very -large. Ensure that there are at least 10 available frames by getting an initial -vector on the heap if necessary, except when the heap limit prevents this. Get -fewer if possible. (The heap limit is in kibibytes.) */ +large. Set the initial frame vector size to ensure that there are at least 10 +available frames, but enforce a minimum of START_FRAMES_SIZE. If this is +greater than the heap limit, get as large a vector as possible. Always round +the size to a multiple of the frame size. */ -if (frame_size <= START_FRAMES_SIZE/10) +heapframes_size = frame_size * 10; +if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE; +if (heapframes_size > mb->heap_limit) { - mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */ - mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size); + if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT; + heapframes_size = mb->heap_limit; } -else + +/* If an existing frame vector in the match_data block is large enough, we can +use it.Otherwise, free any pre-existing vector and get a new one. */ + +if (match_data->heapframes_size < heapframes_size) { - mb->frame_vector_size = frame_size * 10; - if ((mb->frame_vector_size / 1024) > mb->heap_limit) + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); + match_data->heapframes = match_data->memctl.malloc(heapframes_size, + match_data->memctl.memory_data); + if (match_data->heapframes == NULL) { - if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT; - mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size; + match_data->heapframes_size = 0; + return PCRE2_ERROR_NOMEMORY; } - mb->match_frames = mb->memctl.malloc(mb->frame_vector_size, - mb->memctl.memory_data); - if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY; + match_data->heapframes_size = heapframes_size; } -mb->match_frames_top = - (heapframe *)((char *)mb->match_frames + mb->frame_vector_size); - /* Write to the ovector within the first frame to mark every capture unset and to avoid uninitialized memory read errors when it is copied to a new frame. */ -memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff, +memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff, frame_size - offsetof(heapframe, ovector)); /* Pointers to the individual character tables */ @@ -7277,8 +7284,8 @@ for(;;) mb->end_offset_top = 0; mb->skip_arg_count = 0; - rc = match(start_match, mb->start_code, match_data->ovector, - match_data->oveccount, re->top_bracket, frame_size, mb); + rc = match(start_match, mb->start_code, re->top_bracket, frame_size, + match_data, mb); if (mb->hitend && start_partial == NULL) { @@ -7461,11 +7468,6 @@ if (utf && end_subject != true_end_subject && } #endif /* SUPPORT_UNICODE */ -/* Release an enlarged frame vector that is on the heap. */ - -if (mb->match_frames != mb->stack_frames) - mb->memctl.free(mb->match_frames, mb->memctl.memory_data); - /* Fill in fields that are always returned in the match data. */ match_data->code = re; @@ -7531,4 +7533,10 @@ else match_data->rc = PCRE2_ERROR_NOMATCH; return match_data->rc; } +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + /* End of pcre2_match.c */ diff --git a/Foundation/src/pcre2_match_data.c b/Foundation/src/pcre2_match_data.c index 2a69a9df1..7b22ca8ff 100644 --- a/Foundation/src/pcre2_match_data.c +++ b/Foundation/src/pcre2_match_data.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -48,19 +48,23 @@ POSSIBILITY OF SUCH DAMAGE. * Create a match data block given ovector size * *************************************************/ -/* A minimum of 1 is imposed on the number of ovector pairs. */ +/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also +imposed because the oveccount field in a match data block is uintt6_t. */ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) { pcre2_match_data *yield; if (oveccount < 1) oveccount = 1; +if (oveccount > UINT16_MAX) oveccount = UINT16_MAX; yield = PRIV(memctl_malloc)( offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE), (pcre2_memctl *)gcontext); if (yield == NULL) return NULL; yield->oveccount = oveccount; yield->flags = 0; +yield->heapframes = NULL; +yield->heapframes_size = 0; return yield; } @@ -92,6 +96,9 @@ pcre2_match_data_free(pcre2_match_data *match_data) { if (match_data != NULL) { + if (match_data->heapframes != NULL) + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) match_data->memctl.free((void *)match_data->subject, match_data->memctl.memory_data); diff --git a/Foundation/src/pcre2_substitute.c b/Foundation/src/pcre2_substitute.c index f4124d774..d587d9e6d 100644 --- a/Foundation/src/pcre2_substitute.c +++ b/Foundation/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -279,8 +279,9 @@ replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0); match data block. We create an internal match_data block in two cases: (a) an external one is not supplied (and we are not starting from an existing match); (b) an existing match is to be used for the first substitution. In the latter -case, we copy the existing match into the internal block. This ensures that no -changes are made to the existing match data block. */ +case, we copy the existing match into the internal block, except for any cached +heap frame size and pointer. This ensures that no changes are made to the +external match data block. */ if (match_data == NULL) { @@ -306,6 +307,8 @@ else if (use_existing_match) if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector) + 2*pairs*sizeof(PCRE2_SIZE)); + internal_match_data->heapframes = NULL; + internal_match_data->heapframes_size = 0; match_data = internal_match_data; }