mirror of
				https://github.com/pocoproject/poco.git
				synced 2025-10-25 18:22:59 +02:00 
			
		
		
		
	GH #1586: Upgrade bundled PCRE to 8.40
This commit is contained in:
		| @@ -40,7 +40,7 @@ public: | ||||
| 	// Implementation note: the following definitions must be kept | ||||
| 	// in sync with those from ucp.h (PCRE). | ||||
| 	enum CharacterCategory | ||||
| 		/// Unicode 5.0 character categories. | ||||
| 		/// Unicode character categories. | ||||
| 	{ | ||||
| 		UCP_OTHER, | ||||
| 		UCP_LETTER, | ||||
| @@ -52,7 +52,7 @@ public: | ||||
| 	}; | ||||
|  | ||||
| 	enum CharacterType | ||||
| 		/// Unicode 5.0 character types. | ||||
| 		/// Unicode character types. | ||||
| 	{ | ||||
| 		UCP_CONTROL, | ||||
| 		UCP_FORMAT, | ||||
| @@ -87,7 +87,7 @@ public: | ||||
| 	}; | ||||
| 	 | ||||
| 	enum Script | ||||
| 		/// Unicode 5.0 scripts. | ||||
| 		/// Unicode 7.0 script identifiers. | ||||
| 	{ | ||||
| 		UCP_ARABIC, | ||||
| 		UCP_ARMENIAN, | ||||
| @@ -150,11 +150,13 @@ public: | ||||
| 		UCP_TIFINAGH, | ||||
| 		UCP_UGARITIC, | ||||
| 		UCP_YI, | ||||
| 		// Unicode 5.0 | ||||
| 		UCP_BALINESE, | ||||
| 		UCP_CUNEIFORM, | ||||
| 		UCP_NKO, | ||||
| 		UCP_PHAGS_PA, | ||||
| 		UCP_PHOENICIAN, | ||||
| 		// Unicode 5.1 | ||||
| 		UCP_CARIAN, | ||||
| 		UCP_CHAM, | ||||
| 		UCP_KAYAH_LI, | ||||
| @@ -165,7 +167,59 @@ public: | ||||
| 		UCP_REJANG, | ||||
| 		UCP_SAURASHTRA, | ||||
| 		UCP_SUNDANESE, | ||||
| 		UCP_VAI | ||||
| 		UCP_VAI, | ||||
| 		// Unicode 5.2 | ||||
| 		UCP_AVESTAN, | ||||
| 		UCP_BAMUM, | ||||
| 		UCP_EGYPTIAN_HIEROGLYPHS, | ||||
| 		UCP_IMPERIAL_ARAMAIC, | ||||
| 		UCP_INSCRIPTIONAL_PAHLAVI, | ||||
| 		UCP_INSCRIPTIONAL_PARTHIAN, | ||||
| 		UCP_JAVANESE, | ||||
| 		UCP_KAITHI, | ||||
| 		UCP_LISU, | ||||
| 		UCP_MEETEI_MAYEK, | ||||
| 		UCP_OLD_SOUTH_ARABIAN, | ||||
| 		UCP_OLD_TURKIC, | ||||
| 		UCP_SAMARITAN, | ||||
| 		UCP_TAI_THAM, | ||||
| 		UCP_TAI_VIET, | ||||
| 		// Unicode 6.0 | ||||
| 		UCP_BATAK, | ||||
| 		UCP_BRAHMI, | ||||
| 		UCP_MANDAIC, | ||||
| 		// Unicode 6.1 | ||||
| 		UCP_CHAKMA, | ||||
| 		UCP_MEROITIC_CURSIVE, | ||||
| 		UCP_MEROITIC_HIEROGLYPHS, | ||||
| 		UCP_MIAO, | ||||
| 		UCP_SHARADA, | ||||
| 		UCP_SORA_SOMPENG, | ||||
| 		UCP_TAKRI, | ||||
| 		// Unicode 7.0 | ||||
| 		UCP_BASSA_VAH, | ||||
| 		UCP_CAUCASIAN_ALBANIAN, | ||||
| 		UCP_DUPLOYAN, | ||||
| 		UCP_ELBASAN, | ||||
| 		UCP_GRANTHA, | ||||
| 		UCP_KHOJKI, | ||||
| 		UCP_KHUDAWADI, | ||||
| 		UCP_LINEAR_A, | ||||
| 		UCP_MAHAJANI, | ||||
| 		UCP_MANICHAEAN, | ||||
| 		UCP_MENDE_KIKAKUI, | ||||
| 		UCP_MODI, | ||||
| 		UCP_MRO, | ||||
| 		UCP_NABATAEAN, | ||||
| 		UCP_OLD_NORTH_ARABIAN, | ||||
| 		UCP_OLD_PERMIC, | ||||
| 		UCP_PAHAWH_HMONG, | ||||
| 		UCP_PALMYRENE, | ||||
| 		UCP_PSALTER_PAHLAVI, | ||||
| 		UCP_PAU_CIN_HAU, | ||||
| 		UCP_SIDDHAM, | ||||
| 		UCP_TIRHUTA, | ||||
| 		UCP_WARANG_CITI | ||||
| 	}; | ||||
| 	 | ||||
| 	enum | ||||
|   | ||||
| @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. | ||||
| /* The current PCRE version information. */ | ||||
|  | ||||
| #define PCRE_MAJOR          8 | ||||
| #define PCRE_MINOR          35 | ||||
| #define PCRE_MINOR          40 | ||||
| #define PCRE_PRERELEASE      | ||||
| #define PCRE_DATE           2014-04-04 | ||||
| #define PCRE_DATE           2017-01-11 | ||||
|  | ||||
| /* When an application links to a PCRE DLL in Windows, the symbols that are | ||||
| imported have to be identified as such. When building PCRE, the appropriate | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -283,7 +283,7 @@ sure both macros are undefined; an emulation function will then be used. */ | ||||
| #define PACKAGE_NAME "PCRE" | ||||
|  | ||||
| /* Define to the full name and version of this package. */ | ||||
| #define PACKAGE_STRING "PCRE 8.35" | ||||
| #define PACKAGE_STRING "PCRE 8.40" | ||||
|  | ||||
| /* Define to the one symbol short name of this package. */ | ||||
| #define PACKAGE_TARNAME "pcre" | ||||
| @@ -292,7 +292,7 @@ sure both macros are undefined; an emulation function will then be used. */ | ||||
| #define PACKAGE_URL "" | ||||
|  | ||||
| /* Define to the version of this package. */ | ||||
| #define PACKAGE_VERSION "8.35" | ||||
| #define PACKAGE_VERSION "8.40" | ||||
|  | ||||
| /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested | ||||
|    parentheses (of any kind) in a pattern. This limits the amount of system | ||||
| @@ -394,7 +394,7 @@ sure both macros are undefined; an emulation function will then be used. */ | ||||
| /* #undef SUPPORT_VALGRIND */ | ||||
|  | ||||
| /* Version number of package */ | ||||
| #define VERSION "8.35" | ||||
| #define VERSION "8.40" | ||||
|  | ||||
| /* Define to empty if `const' does not conform to ANSI C. */ | ||||
| /* #undef const */ | ||||
|   | ||||
| @@ -2735,9 +2735,10 @@ for (;;) | ||||
|             condcode == OP_DNRREF) | ||||
|           return PCRE_ERROR_DFA_UCOND; | ||||
|  | ||||
|         /* The DEFINE condition is always false */ | ||||
|         /* The DEFINE condition is always false, and the assertion (?!) is | ||||
|         converted to OP_FAIL. */ | ||||
|  | ||||
|         if (condcode == OP_DEF) | ||||
|         if (condcode == OP_DEF || condcode == OP_FAIL) | ||||
|           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } | ||||
|  | ||||
|         /* The only supported version of OP_RREF is for the value RREF_ANY, | ||||
| @@ -3241,7 +3242,7 @@ md->callout_data = NULL; | ||||
|  | ||||
| if (extra_data != NULL) | ||||
|   { | ||||
|   unsigned int flags = extra_data->flags; | ||||
|   unsigned long int flags = extra_data->flags; | ||||
|   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) | ||||
|     study = (const pcre_study_data *)extra_data->study_data; | ||||
|   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; | ||||
|   | ||||
| @@ -1137,8 +1137,8 @@ for (;;) | ||||
|     printf("\n"); | ||||
| #endif | ||||
|  | ||||
|     if (offset < md->offset_max) | ||||
|       { | ||||
|     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE; | ||||
|  | ||||
|     matched_once = FALSE; | ||||
|     code_offset = (int)(ecode - md->start_code); | ||||
|  | ||||
| @@ -1168,11 +1168,16 @@ for (;;) | ||||
|       if (rrc == MATCH_KETRPOS) | ||||
|         { | ||||
|         offset_top = md->end_offset_top; | ||||
|           eptr = md->end_match_ptr; | ||||
|         ecode = md->start_code + code_offset; | ||||
|         save_capture_last = md->capture_last; | ||||
|         matched_once = TRUE; | ||||
|         mstart = md->start_match_ptr;    /* In case \K changed it */ | ||||
|         if (eptr == md->end_match_ptr)   /* Matched an empty string */ | ||||
|           { | ||||
|           do ecode += GET(ecode, 1); while (*ecode == OP_ALT); | ||||
|           break; | ||||
|           } | ||||
|         eptr = md->end_match_ptr; | ||||
|         continue; | ||||
|         } | ||||
|  | ||||
| @@ -1207,18 +1212,6 @@ for (;;) | ||||
|       } | ||||
|  | ||||
|     RRETURN(MATCH_NOMATCH); | ||||
|       } | ||||
|  | ||||
|     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat | ||||
|     as a non-capturing bracket. */ | ||||
|  | ||||
|     /* VVVVVVVVVVVVVVVVVVVVVVVVV */ | ||||
|     /* VVVVVVVVVVVVVVVVVVVVVVVVV */ | ||||
|  | ||||
|     DPRINTF(("insufficient capture room: treat as non-capturing\n")); | ||||
|  | ||||
|     /* VVVVVVVVVVVVVVVVVVVVVVVVV */ | ||||
|     /* VVVVVVVVVVVVVVVVVVVVVVVVV */ | ||||
|  | ||||
|     /* Non-capturing possessive bracket with unlimited repeat. We come here | ||||
|     from BRAZERO with allow_zero = TRUE. The code is similar to the above, | ||||
| @@ -1242,10 +1235,15 @@ for (;;) | ||||
|       if (rrc == MATCH_KETRPOS) | ||||
|         { | ||||
|         offset_top = md->end_offset_top; | ||||
|         eptr = md->end_match_ptr; | ||||
|         ecode = md->start_code + code_offset; | ||||
|         matched_once = TRUE; | ||||
|         mstart = md->start_match_ptr;   /* In case \K reset it */ | ||||
|         if (eptr == md->end_match_ptr)  /* Matched an empty string */ | ||||
|           { | ||||
|           do ecode += GET(ecode, 1); while (*ecode == OP_ALT); | ||||
|           break; | ||||
|           } | ||||
|         eptr = md->end_match_ptr; | ||||
|         continue; | ||||
|         } | ||||
|  | ||||
| @@ -1379,6 +1377,7 @@ for (;;) | ||||
|       break; | ||||
|  | ||||
|       case OP_DEF:     /* DEFINE - always false */ | ||||
|       case OP_FAIL:    /* From optimized (?!) condition */ | ||||
|       break; | ||||
|  | ||||
|       /* The condition is an assertion. Call match() to evaluate it - setting | ||||
| @@ -1395,8 +1394,11 @@ for (;;) | ||||
|         condition = TRUE; | ||||
|  | ||||
|         /* Advance ecode past the assertion to the start of the first branch, | ||||
|         but adjust it so that the general choosing code below works. */ | ||||
|         but adjust it so that the general choosing code below works. If the | ||||
|         assertion has a quantifier that allows zero repeats we must skip over | ||||
|         the BRAZERO. This is a lunatic thing to do, but somebody did! */ | ||||
|  | ||||
|         if (*ecode == OP_BRAZERO) ecode++; | ||||
|         ecode += GET(ecode, 1); | ||||
|         while (*ecode == OP_ALT) ecode += GET(ecode, 1); | ||||
|         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode]; | ||||
| @@ -1465,7 +1467,18 @@ for (;;) | ||||
|       md->offset_vector[offset] = | ||||
|         md->offset_vector[md->offset_end - number]; | ||||
|       md->offset_vector[offset+1] = (int)(eptr - md->start_subject); | ||||
|       if (offset_top <= offset) offset_top = offset + 2; | ||||
|  | ||||
|       /* If this group is at or above the current highwater mark, ensure that | ||||
|       any groups between the current high water mark and this group are marked | ||||
|       unset and then update the high water mark. */ | ||||
|  | ||||
|       if (offset >= offset_top) | ||||
|         { | ||||
|         register int *iptr = md->offset_vector + offset_top; | ||||
|         register int *iend = md->offset_vector + offset; | ||||
|         while (iptr < iend) *iptr++ = -1; | ||||
|         offset_top = offset + 2; | ||||
|         } | ||||
|       } | ||||
|     ecode += 1 + IMM2_SIZE; | ||||
|     break; | ||||
| @@ -1817,7 +1830,11 @@ for (;;) | ||||
|         are defined in a range that can be tested for. */ | ||||
|  | ||||
|         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) | ||||
|           { | ||||
|           if (new_recursive.offset_save != stacksave) | ||||
|             (PUBL(free))(new_recursive.offset_save); | ||||
|           RRETURN(MATCH_NOMATCH); | ||||
|           } | ||||
|  | ||||
|         /* Any return code other than NOMATCH is an error. */ | ||||
|  | ||||
| @@ -1980,6 +1997,19 @@ for (;;) | ||||
|         } | ||||
|       } | ||||
|  | ||||
|     /* OP_KETRPOS is a possessive repeating ket. Remember the current position, | ||||
|     and return the MATCH_KETRPOS. This makes it possible to do the repeats one | ||||
|     at a time from the outer level, thus saving stack. This must precede the | ||||
|     empty string test - in this case that test is done at the outer level. */ | ||||
|  | ||||
|     if (*ecode == OP_KETRPOS) | ||||
|       { | ||||
|       md->start_match_ptr = mstart;    /* In case \K reset it */ | ||||
|       md->end_match_ptr = eptr; | ||||
|       md->end_offset_top = offset_top; | ||||
|       RRETURN(MATCH_KETRPOS); | ||||
|       } | ||||
|  | ||||
|     /* For an ordinary non-repeating ket, just continue at this level. This | ||||
|     also happens for a repeating ket if no characters were matched in the | ||||
|     group. This is the forcible breaking of infinite loops as implemented in | ||||
| @@ -2002,18 +2032,6 @@ for (;;) | ||||
|       break; | ||||
|       } | ||||
|  | ||||
|     /* OP_KETRPOS is a possessive repeating ket. Remember the current position, | ||||
|     and return the MATCH_KETRPOS. This makes it possible to do the repeats one | ||||
|     at a time from the outer level, thus saving stack. */ | ||||
|  | ||||
|     if (*ecode == OP_KETRPOS) | ||||
|       { | ||||
|       md->start_match_ptr = mstart;    /* In case \K reset it */ | ||||
|       md->end_match_ptr = eptr; | ||||
|       md->end_offset_top = offset_top; | ||||
|       RRETURN(MATCH_KETRPOS); | ||||
|       } | ||||
|  | ||||
|     /* The normal repeating kets try the rest of the pattern or restart from | ||||
|     the preceding bracket, in the appropriate order. In the second case, we can | ||||
|     use tail recursion to avoid using another stack frame, unless we have an | ||||
| @@ -3466,7 +3484,7 @@ for (;;) | ||||
|           if (possessive) continue;    /* No backtracking */ | ||||
|           for(;;) | ||||
|             { | ||||
|             if (eptr == pp) goto TAIL_RECURSE; | ||||
|             if (eptr <= pp) goto TAIL_RECURSE; | ||||
|             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); | ||||
|             if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
| #ifdef SUPPORT_UCP | ||||
| @@ -3887,7 +3905,7 @@ for (;;) | ||||
|           if (possessive) continue;    /* No backtracking */ | ||||
|           for(;;) | ||||
|             { | ||||
|             if (eptr == pp) goto TAIL_RECURSE; | ||||
|             if (eptr <= pp) goto TAIL_RECURSE; | ||||
|             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); | ||||
|             if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
|             eptr--; | ||||
| @@ -4022,7 +4040,7 @@ for (;;) | ||||
|           if (possessive) continue;    /* No backtracking */ | ||||
|           for(;;) | ||||
|             { | ||||
|             if (eptr == pp) goto TAIL_RECURSE; | ||||
|             if (eptr <= pp) goto TAIL_RECURSE; | ||||
|             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34); | ||||
|             if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
|             eptr--; | ||||
| @@ -5593,7 +5611,7 @@ for (;;) | ||||
|         if (possessive) continue;    /* No backtracking */ | ||||
|         for(;;) | ||||
|           { | ||||
|           if (eptr == pp) goto TAIL_RECURSE; | ||||
|           if (eptr <= pp) goto TAIL_RECURSE; | ||||
|           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); | ||||
|           if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
|           eptr--; | ||||
| @@ -5635,12 +5653,17 @@ for (;;) | ||||
|  | ||||
|         if (possessive) continue;    /* No backtracking */ | ||||
|  | ||||
|         /* We use <= pp rather than == pp to detect the start of the run while | ||||
|         backtracking because the use of \C in UTF mode can cause BACKCHAR to | ||||
|         move back past pp. This is just palliative; the use of \C in UTF mode | ||||
|         is fraught with danger. */ | ||||
|  | ||||
|         for(;;) | ||||
|           { | ||||
|           int lgb, rgb; | ||||
|           PCRE_PUCHAR fptr; | ||||
|  | ||||
|           if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */ | ||||
|           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */ | ||||
|           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); | ||||
|           if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
|  | ||||
| @@ -5658,7 +5681,7 @@ for (;;) | ||||
|  | ||||
|           for (;;) | ||||
|             { | ||||
|             if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */ | ||||
|             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */ | ||||
|             fptr = eptr - 1; | ||||
|             if (!utf) c = *fptr; else | ||||
|               { | ||||
| @@ -5682,8 +5705,6 @@ for (;;) | ||||
|         switch(ctype) | ||||
|           { | ||||
|           case OP_ANY: | ||||
|           if (max < INT_MAX) | ||||
|             { | ||||
|           for (i = min; i < max; i++) | ||||
|             { | ||||
|             if (eptr >= md->end_subject) | ||||
| @@ -5704,33 +5725,6 @@ for (;;) | ||||
|             eptr++; | ||||
|             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | ||||
|             } | ||||
|             } | ||||
|  | ||||
|           /* Handle unlimited UTF-8 repeat */ | ||||
|  | ||||
|           else | ||||
|             { | ||||
|             for (i = min; i < max; i++) | ||||
|               { | ||||
|               if (eptr >= md->end_subject) | ||||
|                 { | ||||
|                 SCHECK_PARTIAL(); | ||||
|                 break; | ||||
|                 } | ||||
|               if (IS_NEWLINE(eptr)) break; | ||||
|               if (md->partial != 0 &&    /* Take care with CRLF partial */ | ||||
|                   eptr + 1 >= md->end_subject && | ||||
|                   NLBLOCK->nltype == NLTYPE_FIXED && | ||||
|                   NLBLOCK->nllen == 2 && | ||||
|                   UCHAR21(eptr) == NLBLOCK->nl[0]) | ||||
|                 { | ||||
|                 md->hitend = TRUE; | ||||
|                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | ||||
|                 } | ||||
|               eptr++; | ||||
|               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | ||||
|               } | ||||
|             } | ||||
|           break; | ||||
|  | ||||
|           case OP_ALLANY: | ||||
| @@ -5937,7 +5931,7 @@ for (;;) | ||||
|         if (possessive) continue;    /* No backtracking */ | ||||
|         for(;;) | ||||
|           { | ||||
|           if (eptr == pp) goto TAIL_RECURSE; | ||||
|           if (eptr <= pp) goto TAIL_RECURSE; | ||||
|           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46); | ||||
|           if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
|           eptr--; | ||||
| @@ -6520,7 +6514,7 @@ tables = re->tables; | ||||
|  | ||||
| if (extra_data != NULL) | ||||
|   { | ||||
|   register unsigned int flags = extra_data->flags; | ||||
|   unsigned long int flags = extra_data->flags; | ||||
|   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) | ||||
|     study = (const pcre_study_data *)extra_data->study_data; | ||||
|   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) | ||||
| @@ -6692,7 +6686,8 @@ if (md->offset_vector != NULL) | ||||
|   register int *iend = iptr - re->top_bracket; | ||||
|   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2; | ||||
|   while (--iptr >= iend) *iptr = -1; | ||||
|   md->offset_vector[0] = md->offset_vector[1] = -1; | ||||
|   if (offsetcount > 0) md->offset_vector[0] = -1; | ||||
|   if (offsetcount > 1) md->offset_vector[1] = -1; | ||||
|   } | ||||
|  | ||||
| /* Set up the first character to match, if available. The first_char value is | ||||
|   | ||||
| @@ -247,6 +247,7 @@ Arguments: | ||||
|   code         the compiled regex | ||||
|   stringname   the name of the capturing substring | ||||
|   ovector      the vector of matched substrings | ||||
|   stringcount  number of captured substrings | ||||
|  | ||||
| Returns:       the number of the first that is set, | ||||
|                or the number of the last one if none are set, | ||||
| @@ -255,13 +256,16 @@ Returns:       the number of the first that is set, | ||||
|  | ||||
| #if defined COMPILE_PCRE8 | ||||
| static int | ||||
| get_first_set(const pcre *code, const char *stringname, int *ovector) | ||||
| get_first_set(const pcre *code, const char *stringname, int *ovector, | ||||
|   int stringcount) | ||||
| #elif defined COMPILE_PCRE16 | ||||
| static int | ||||
| get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) | ||||
| get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector, | ||||
|   int stringcount) | ||||
| #elif defined COMPILE_PCRE32 | ||||
| static int | ||||
| get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector) | ||||
| get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector, | ||||
|   int stringcount) | ||||
| #endif | ||||
| { | ||||
| const REAL_PCRE *re = (const REAL_PCRE *)code; | ||||
| @@ -292,7 +296,7 @@ if (entrysize <= 0) return entrysize; | ||||
| for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) | ||||
|   { | ||||
|   int n = GET2(entry, 0); | ||||
|   if (ovector[n*2] >= 0) return n; | ||||
|   if (n < stringcount && ovector[n*2] >= 0) return n; | ||||
|   } | ||||
| return GET2(entry, 0); | ||||
| } | ||||
| @@ -399,7 +403,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, | ||||
|   PCRE_UCHAR32 *buffer, int size) | ||||
| #endif | ||||
| { | ||||
| int n = get_first_set(code, stringname, ovector); | ||||
| int n = get_first_set(code, stringname, ovector, stringcount); | ||||
| if (n <= 0) return n; | ||||
| #if defined COMPILE_PCRE8 | ||||
| return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); | ||||
| @@ -454,7 +458,10 @@ pcre_uchar **stringlist; | ||||
| pcre_uchar *p; | ||||
|  | ||||
| for (i = 0; i < double_count; i += 2) | ||||
|   size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); | ||||
|   { | ||||
|   size += sizeof(pcre_uchar *) + IN_UCHARS(1); | ||||
|   if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]); | ||||
|   } | ||||
|  | ||||
| stringlist = (pcre_uchar **)(PUBL(malloc))(size); | ||||
| if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; | ||||
| @@ -470,7 +477,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1); | ||||
|  | ||||
| for (i = 0; i < double_count; i += 2) | ||||
|   { | ||||
|   int len = ovector[i+1] - ovector[i]; | ||||
|   int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; | ||||
|   memcpy(p, subject + ovector[i], IN_UCHARS(len)); | ||||
|   *stringlist++ = p; | ||||
|   p += len; | ||||
| @@ -616,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, | ||||
|   PCRE_SPTR32 *stringptr) | ||||
| #endif | ||||
| { | ||||
| int n = get_first_set(code, stringname, ovector); | ||||
| int n = get_first_set(code, stringname, ovector, stringcount); | ||||
| if (n <= 0) return n; | ||||
| #if defined COMPILE_PCRE8 | ||||
| return pcre_get_substring(subject, ovector, stringcount, n, stringptr); | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
| and semantics are as close as possible to those of the Perl 5 language. | ||||
|  | ||||
|                        Written by Philip Hazel | ||||
|            Copyright (c) 1997-2014 University of Cambridge | ||||
|            Copyright (c) 1997-2016 University of Cambridge | ||||
|  | ||||
| ----------------------------------------------------------------------------- | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| @@ -229,9 +229,9 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not | ||||
| have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set | ||||
| by "configure". */ | ||||
|  | ||||
| #if HAVE_STDINT_H | ||||
| #if defined HAVE_STDINT_H | ||||
| #include <stdint.h> | ||||
| #elif HAVE_INTTYPES_H | ||||
| #elif defined HAVE_INTTYPES_H | ||||
| #include <inttypes.h> | ||||
| #endif | ||||
|  | ||||
| @@ -275,7 +275,7 @@ pcre.h(.in) and disable (comment out) this message. */ | ||||
|  | ||||
| typedef pcre_uint16 pcre_uchar; | ||||
| #define UCHAR_SHIFT (1) | ||||
| #define IN_UCHARS(x) ((x) << UCHAR_SHIFT) | ||||
| #define IN_UCHARS(x) ((x) * 2) | ||||
| #define MAX_255(c) ((c) <= 255u) | ||||
| #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) | ||||
|  | ||||
| @@ -283,7 +283,7 @@ typedef pcre_uint16 pcre_uchar; | ||||
|  | ||||
| typedef pcre_uint32 pcre_uchar; | ||||
| #define UCHAR_SHIFT (2) | ||||
| #define IN_UCHARS(x) ((x) << UCHAR_SHIFT) | ||||
| #define IN_UCHARS(x) ((x) * 4) | ||||
| #define MAX_255(c) ((c) <= 255u) | ||||
| #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) | ||||
|  | ||||
| @@ -984,7 +984,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ | ||||
| #ifndef EBCDIC | ||||
|  | ||||
| #define HSPACE_LIST \ | ||||
|   CHAR_HT, CHAR_SPACE, 0xa0, \ | ||||
|   CHAR_HT, CHAR_SPACE, CHAR_NBSP, \ | ||||
|   0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \ | ||||
|   0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \ | ||||
|   NOTACHAR | ||||
| @@ -1010,7 +1010,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ | ||||
| #define HSPACE_BYTE_CASES \ | ||||
|   case CHAR_HT: \ | ||||
|   case CHAR_SPACE: \ | ||||
|   case 0xa0     /* NBSP */ | ||||
|   case CHAR_NBSP | ||||
|  | ||||
| #define HSPACE_CASES \ | ||||
|   HSPACE_BYTE_CASES: \ | ||||
| @@ -1037,11 +1037,12 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ | ||||
| /* ------ EBCDIC environments ------ */ | ||||
|  | ||||
| #else | ||||
| #define HSPACE_LIST CHAR_HT, CHAR_SPACE | ||||
| #define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR | ||||
|  | ||||
| #define HSPACE_BYTE_CASES \ | ||||
|   case CHAR_HT: \ | ||||
|   case CHAR_SPACE | ||||
|   case CHAR_SPACE: \ | ||||
|   case CHAR_NBSP | ||||
|  | ||||
| #define HSPACE_CASES HSPACE_BYTE_CASES | ||||
|  | ||||
| @@ -1215,6 +1216,7 @@ same code point. */ | ||||
|  | ||||
| #define CHAR_ESC                    '\047' | ||||
| #define CHAR_DEL                    '\007' | ||||
| #define CHAR_NBSP                   '\x41' | ||||
| #define STR_ESC                     "\047" | ||||
| #define STR_DEL                     "\007" | ||||
|  | ||||
| @@ -1229,6 +1231,7 @@ a positive value. */ | ||||
| #define CHAR_NEL                    ((unsigned char)'\x85') | ||||
| #define CHAR_ESC                    '\033' | ||||
| #define CHAR_DEL                    '\177' | ||||
| #define CHAR_NBSP                   ((unsigned char)'\xa0') | ||||
|  | ||||
| #define STR_LF                      "\n" | ||||
| #define STR_NL                      STR_LF | ||||
| @@ -1606,6 +1609,7 @@ only. */ | ||||
| #define CHAR_VERTICAL_LINE          '\174' | ||||
| #define CHAR_RIGHT_CURLY_BRACKET    '\175' | ||||
| #define CHAR_TILDE                  '\176' | ||||
| #define CHAR_NBSP                   ((unsigned char)'\xa0') | ||||
|  | ||||
| #define STR_HT                      "\011" | ||||
| #define STR_VT                      "\013" | ||||
| @@ -1762,6 +1766,10 @@ only. */ | ||||
|  | ||||
| /* Escape items that are just an encoding of a particular data value. */ | ||||
|  | ||||
| #ifndef ESC_a | ||||
| #define ESC_a CHAR_BEL | ||||
| #endif | ||||
|  | ||||
| #ifndef ESC_e | ||||
| #define ESC_e CHAR_ESC | ||||
| #endif | ||||
| @@ -2281,7 +2289,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9, | ||||
|        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, | ||||
|        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, | ||||
|        ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, | ||||
|        ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERRCOUNT }; | ||||
|        ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT }; | ||||
|  | ||||
| /* JIT compiling modes. The function list is indexed by them. */ | ||||
|  | ||||
| @@ -2446,6 +2454,8 @@ typedef struct compile_data { | ||||
|   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */ | ||||
|   BOOL check_lookbehind;            /* Lookbehinds need later checking */ | ||||
|   BOOL dupnames;                    /* Duplicate names exist */ | ||||
|   BOOL dupgroups;                   /* Duplicate groups exist: (?| found */ | ||||
|   BOOL iscondassert;                /* Next assert is a condition */ | ||||
|   int  nltype;                      /* Newline type */ | ||||
|   int  nllen;                       /* Newline string length */ | ||||
|   pcre_uchar nl[4];                 /* Newline string when fixed length */ | ||||
| @@ -2459,6 +2469,13 @@ typedef struct branch_chain { | ||||
|   pcre_uchar *current_branch; | ||||
| } branch_chain; | ||||
|  | ||||
| /* Structure for mutual recursion detection. */ | ||||
|  | ||||
| typedef struct recurse_check { | ||||
|   struct recurse_check *prev; | ||||
|   const pcre_uchar *group; | ||||
| } recurse_check; | ||||
|  | ||||
| /* Structure for items in a linked list that represents an explicit recursive | ||||
| call within the pattern; used by pcre_exec(). */ | ||||
|  | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -67,7 +67,8 @@ Arguments: | ||||
|   code            pointer to start of group (the bracket) | ||||
|   startcode       pointer to start of the whole pattern's code | ||||
|   options         the compiling options | ||||
|   int             RECURSE depth | ||||
|   recurses        chain of recurse_check to catch mutual recursion | ||||
|   countptr        pointer to call count (to catch over complexity) | ||||
|  | ||||
| Returns:   the minimum length | ||||
|            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered | ||||
| @@ -77,15 +78,19 @@ Returns:   the minimum length | ||||
|  | ||||
| static int | ||||
| find_minlength(const REAL_PCRE *re, const pcre_uchar *code, | ||||
|   const pcre_uchar *startcode, int options, int recurse_depth) | ||||
|   const pcre_uchar *startcode, int options, recurse_check *recurses, | ||||
|   int *countptr) | ||||
| { | ||||
| int length = -1; | ||||
| /* PCRE_UTF16 has the same value as PCRE_UTF8. */ | ||||
| BOOL utf = (options & PCRE_UTF8) != 0; | ||||
| BOOL had_recurse = FALSE; | ||||
| recurse_check this_recurse; | ||||
| register int branchlength = 0; | ||||
| register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; | ||||
|  | ||||
| if ((*countptr)++ > 1000) return -1;   /* too complex */ | ||||
|  | ||||
| if (*code == OP_CBRA || *code == OP_SCBRA || | ||||
|     *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; | ||||
|  | ||||
| @@ -127,7 +132,7 @@ for (;;) | ||||
|     case OP_SBRAPOS: | ||||
|     case OP_ONCE: | ||||
|     case OP_ONCE_NC: | ||||
|     d = find_minlength(re, cc, startcode, options, recurse_depth); | ||||
|     d = find_minlength(re, cc, startcode, options, recurses, countptr); | ||||
|     if (d < 0) return d; | ||||
|     branchlength += d; | ||||
|     do cc += GET(cc, 1); while (*cc == OP_ALT); | ||||
| @@ -390,7 +395,7 @@ for (;;) | ||||
|         ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); | ||||
|         if (cs == NULL) return -2; | ||||
|         do ce += GET(ce, 1); while (*ce == OP_ALT); | ||||
|         if (cc > cs && cc < ce) | ||||
|         if (cc > cs && cc < ce)     /* Simple recursion */ | ||||
|           { | ||||
|           d = 0; | ||||
|           had_recurse = TRUE; | ||||
| @@ -398,9 +403,24 @@ for (;;) | ||||
|           } | ||||
|         else | ||||
|           { | ||||
|           int dd = find_minlength(re, cs, startcode, options, recurse_depth); | ||||
|           recurse_check *r = recurses; | ||||
|           for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; | ||||
|           if (r != NULL)           /* Mutual recursion */ | ||||
|             { | ||||
|             d = 0; | ||||
|             had_recurse = TRUE; | ||||
|             break; | ||||
|             } | ||||
|           else | ||||
|             { | ||||
|             int dd; | ||||
|             this_recurse.prev = recurses; | ||||
|             this_recurse.group = cs; | ||||
|             dd = find_minlength(re, cs, startcode, options, &this_recurse, | ||||
|               countptr); | ||||
|             if (dd < d) d = dd; | ||||
|             } | ||||
|           } | ||||
|         slot += re->name_entry_size; | ||||
|         } | ||||
|       } | ||||
| @@ -415,14 +435,27 @@ for (;;) | ||||
|       ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); | ||||
|       if (cs == NULL) return -2; | ||||
|       do ce += GET(ce, 1); while (*ce == OP_ALT); | ||||
|       if (cc > cs && cc < ce) | ||||
|       if (cc > cs && cc < ce)    /* Simple recursion */ | ||||
|         { | ||||
|         d = 0; | ||||
|         had_recurse = TRUE; | ||||
|         } | ||||
|       else | ||||
|         { | ||||
|         d = find_minlength(re, cs, startcode, options, recurse_depth); | ||||
|         recurse_check *r = recurses; | ||||
|         for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; | ||||
|         if (r != NULL)           /* Mutual recursion */ | ||||
|           { | ||||
|           d = 0; | ||||
|           had_recurse = TRUE; | ||||
|           } | ||||
|         else | ||||
|           { | ||||
|           this_recurse.prev = recurses; | ||||
|           this_recurse.group = cs; | ||||
|           d = find_minlength(re, cs, startcode, options, &this_recurse, | ||||
|             countptr); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     else d = 0; | ||||
| @@ -471,12 +504,21 @@ for (;;) | ||||
|     case OP_RECURSE: | ||||
|     cs = ce = (pcre_uchar *)startcode + GET(cc, 1); | ||||
|     do ce += GET(ce, 1); while (*ce == OP_ALT); | ||||
|     if ((cc > cs && cc < ce) || recurse_depth > 10) | ||||
|     if (cc > cs && cc < ce)    /* Simple recursion */ | ||||
|       had_recurse = TRUE; | ||||
|     else | ||||
|       { | ||||
|       recurse_check *r = recurses; | ||||
|       for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; | ||||
|       if (r != NULL)           /* Mutual recursion */ | ||||
|         had_recurse = TRUE; | ||||
|       else | ||||
|         { | ||||
|         this_recurse.prev = recurses; | ||||
|         this_recurse.group = cs; | ||||
|         branchlength += find_minlength(re, cs, startcode, options, | ||||
|         recurse_depth + 1); | ||||
|           &this_recurse, countptr); | ||||
|         } | ||||
|       } | ||||
|     cc += 1 + LINK_SIZE; | ||||
|     break; | ||||
| @@ -860,7 +902,6 @@ do | ||||
|       case OP_NOTUPTOI: | ||||
|       case OP_NOT_HSPACE: | ||||
|       case OP_NOT_VSPACE: | ||||
|       case OP_PROP: | ||||
|       case OP_PRUNE: | ||||
|       case OP_PRUNE_ARG: | ||||
|       case OP_RECURSE: | ||||
| @@ -878,6 +919,31 @@ do | ||||
|       case OP_THEN_ARG: | ||||
|       return SSB_FAIL; | ||||
|  | ||||
|       /* A "real" property test implies no starting bits, but the fake property | ||||
|       PT_CLIST identifies a list of characters. These lists are short, as they | ||||
|       are used for characters with more than one "other case", so there is no | ||||
|       point in recognizing them for OP_NOTPROP. */ | ||||
|  | ||||
|       case OP_PROP: | ||||
|       if (tcode[1] != PT_CLIST) return SSB_FAIL; | ||||
|         { | ||||
|         const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2]; | ||||
|         while ((c = *p++) < NOTACHAR) | ||||
|           { | ||||
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | ||||
|           if (utf) | ||||
|             { | ||||
|             pcre_uchar buff[6]; | ||||
|             (void)PRIV(ord2utf)(c, buff); | ||||
|             c = buff[0]; | ||||
|             } | ||||
| #endif | ||||
|           if (c > 0xff) SET_BIT(0xff); else SET_BIT(c); | ||||
|           } | ||||
|         } | ||||
|       try_next = FALSE; | ||||
|       break; | ||||
|  | ||||
|       /* We can ignore word boundary tests. */ | ||||
|  | ||||
|       case OP_WORD_BOUNDARY: | ||||
| @@ -1103,24 +1169,17 @@ do | ||||
|       try_next = FALSE; | ||||
|       break; | ||||
|  | ||||
|       /* The cbit_space table has vertical tab as whitespace; we have to | ||||
|       ensure it is set as not whitespace. Luckily, the code value is the same | ||||
|       (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */ | ||||
|       /* The cbit_space table has vertical tab as whitespace; we no longer | ||||
|       have to play fancy tricks because Perl added VT to its whitespace at | ||||
|       release 5.18. PCRE added it at release 8.34. */ | ||||
|  | ||||
|       case OP_NOT_WHITESPACE: | ||||
|       set_nottype_bits(start_bits, cbit_space, table_limit, cd); | ||||
|       start_bits[1] |= 0x08; | ||||
|       try_next = FALSE; | ||||
|       break; | ||||
|  | ||||
|       /* The cbit_space table has vertical tab as whitespace; we have to not | ||||
|       set it from the table. Luckily, the code value is the same (0x0b) in | ||||
|       ASCII and EBCDIC, so we can just adjust the appropriate bit. */ | ||||
|  | ||||
|       case OP_WHITESPACE: | ||||
|       c = start_bits[1];    /* Save in case it was already set */ | ||||
|       set_type_bits(start_bits, cbit_space, table_limit, cd); | ||||
|       start_bits[1] = (start_bits[1] & ~0x08) | c; | ||||
|       try_next = FALSE; | ||||
|       break; | ||||
|  | ||||
| @@ -1309,7 +1368,7 @@ do | ||||
|             for (c = 0; c < 16; c++) start_bits[c] |= map[c]; | ||||
|             for (c = 128; c < 256; c++) | ||||
|               { | ||||
|               if ((map[c/8] && (1 << (c&7))) != 0) | ||||
|               if ((map[c/8] & (1 << (c&7))) != 0) | ||||
|                 { | ||||
|                 int d = (c >> 6) | 0xc0;            /* Set bit for this starter */ | ||||
|                 start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */ | ||||
| @@ -1397,6 +1456,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr) | ||||
| #endif | ||||
| { | ||||
| int min; | ||||
| int count = 0; | ||||
| BOOL bits_set = FALSE; | ||||
| pcre_uint8 start_bits[32]; | ||||
| PUBL(extra) *extra = NULL; | ||||
| @@ -1483,7 +1543,7 @@ if ((re->options & PCRE_ANCHORED) == 0 && | ||||
|  | ||||
| /* Find the minimum length of subject string. */ | ||||
|  | ||||
| switch(min = find_minlength(re, code, code, re->options, 0)) | ||||
| switch(min = find_minlength(re, code, code, re->options, NULL, &count)) | ||||
|   { | ||||
|   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; | ||||
|   case -3: *errorptr = "internal error: opcode not recognized"; return NULL; | ||||
|   | ||||
| @@ -209,6 +209,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" | ||||
| #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" | ||||
| #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" | ||||
| #define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0" | ||||
| #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" | ||||
| #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" | ||||
| #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" | ||||
| @@ -219,6 +220,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_C0 STR_C "\0" | ||||
| #define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" | ||||
| #define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0" | ||||
| #define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0" | ||||
| #define STRING_Cc0 STR_C STR_c "\0" | ||||
| #define STRING_Cf0 STR_C STR_f "\0" | ||||
| #define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0" | ||||
| @@ -234,11 +236,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" | ||||
| #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" | ||||
| #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" | ||||
| #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" | ||||
| #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" | ||||
| #define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0" | ||||
| #define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" | ||||
| #define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" | ||||
| #define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0" | ||||
| #define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0" | ||||
| #define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0" | ||||
| #define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0" | ||||
| #define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" | ||||
| #define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" | ||||
| @@ -258,12 +263,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0" | ||||
| #define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" | ||||
| #define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0" | ||||
| #define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0" | ||||
| #define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" | ||||
| #define STRING_L0 STR_L "\0" | ||||
| #define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0" | ||||
| #define STRING_Lao0 STR_L STR_a STR_o "\0" | ||||
| #define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0" | ||||
| #define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0" | ||||
| #define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0" | ||||
| #define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0" | ||||
| #define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0" | ||||
| #define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0" | ||||
| #define STRING_Ll0 STR_L STR_l "\0" | ||||
| @@ -274,18 +282,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0" | ||||
| #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" | ||||
| #define STRING_M0 STR_M "\0" | ||||
| #define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0" | ||||
| #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" | ||||
| #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" | ||||
| #define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" | ||||
| #define STRING_Mc0 STR_M STR_c "\0" | ||||
| #define STRING_Me0 STR_M STR_e "\0" | ||||
| #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" | ||||
| #define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0" | ||||
| #define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0" | ||||
| #define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" | ||||
| #define STRING_Miao0 STR_M STR_i STR_a STR_o "\0" | ||||
| #define STRING_Mn0 STR_M STR_n "\0" | ||||
| #define STRING_Modi0 STR_M STR_o STR_d STR_i "\0" | ||||
| #define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" | ||||
| #define STRING_Mro0 STR_M STR_r STR_o "\0" | ||||
| #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0" | ||||
| #define STRING_N0 STR_N "\0" | ||||
| #define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" | ||||
| #define STRING_Nd0 STR_N STR_d "\0" | ||||
| #define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0" | ||||
| #define STRING_Nko0 STR_N STR_k STR_o "\0" | ||||
| @@ -294,12 +308,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" | ||||
| #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" | ||||
| #define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0" | ||||
| #define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" | ||||
| #define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0" | ||||
| #define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0" | ||||
| #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" | ||||
| #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" | ||||
| #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" | ||||
| #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" | ||||
| #define STRING_P0 STR_P "\0" | ||||
| #define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0" | ||||
| #define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0" | ||||
| #define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0" | ||||
| #define STRING_Pc0 STR_P STR_c "\0" | ||||
| #define STRING_Pd0 STR_P STR_d "\0" | ||||
| #define STRING_Pe0 STR_P STR_e "\0" | ||||
| @@ -309,6 +328,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Pi0 STR_P STR_i "\0" | ||||
| #define STRING_Po0 STR_P STR_o "\0" | ||||
| #define STRING_Ps0 STR_P STR_s "\0" | ||||
| #define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0" | ||||
| #define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0" | ||||
| #define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0" | ||||
| #define STRING_S0 STR_S "\0" | ||||
| @@ -317,6 +337,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Sc0 STR_S STR_c "\0" | ||||
| #define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0" | ||||
| #define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0" | ||||
| #define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0" | ||||
| #define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0" | ||||
| #define STRING_Sk0 STR_S STR_k "\0" | ||||
| #define STRING_Sm0 STR_S STR_m "\0" | ||||
| @@ -337,8 +358,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ | ||||
| #define STRING_Thai0 STR_T STR_h STR_a STR_i "\0" | ||||
| #define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0" | ||||
| #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" | ||||
| #define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0" | ||||
| #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" | ||||
| #define STRING_Vai0 STR_V STR_a STR_i "\0" | ||||
| #define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0" | ||||
| #define STRING_Xan0 STR_X STR_a STR_n "\0" | ||||
| #define STRING_Xps0 STR_X STR_p STR_s "\0" | ||||
| #define STRING_Xsp0 STR_X STR_s STR_p "\0" | ||||
| @@ -357,6 +380,7 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Avestan0 | ||||
|   STRING_Balinese0 | ||||
|   STRING_Bamum0 | ||||
|   STRING_Bassa_Vah0 | ||||
|   STRING_Batak0 | ||||
|   STRING_Bengali0 | ||||
|   STRING_Bopomofo0 | ||||
| @@ -367,6 +391,7 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_C0 | ||||
|   STRING_Canadian_Aboriginal0 | ||||
|   STRING_Carian0 | ||||
|   STRING_Caucasian_Albanian0 | ||||
|   STRING_Cc0 | ||||
|   STRING_Cf0 | ||||
|   STRING_Chakma0 | ||||
| @@ -382,11 +407,14 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Cyrillic0 | ||||
|   STRING_Deseret0 | ||||
|   STRING_Devanagari0 | ||||
|   STRING_Duployan0 | ||||
|   STRING_Egyptian_Hieroglyphs0 | ||||
|   STRING_Elbasan0 | ||||
|   STRING_Ethiopic0 | ||||
|   STRING_Georgian0 | ||||
|   STRING_Glagolitic0 | ||||
|   STRING_Gothic0 | ||||
|   STRING_Grantha0 | ||||
|   STRING_Greek0 | ||||
|   STRING_Gujarati0 | ||||
|   STRING_Gurmukhi0 | ||||
| @@ -406,12 +434,15 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Kayah_Li0 | ||||
|   STRING_Kharoshthi0 | ||||
|   STRING_Khmer0 | ||||
|   STRING_Khojki0 | ||||
|   STRING_Khudawadi0 | ||||
|   STRING_L0 | ||||
|   STRING_L_AMPERSAND0 | ||||
|   STRING_Lao0 | ||||
|   STRING_Latin0 | ||||
|   STRING_Lepcha0 | ||||
|   STRING_Limbu0 | ||||
|   STRING_Linear_A0 | ||||
|   STRING_Linear_B0 | ||||
|   STRING_Lisu0 | ||||
|   STRING_Ll0 | ||||
| @@ -422,18 +453,24 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Lycian0 | ||||
|   STRING_Lydian0 | ||||
|   STRING_M0 | ||||
|   STRING_Mahajani0 | ||||
|   STRING_Malayalam0 | ||||
|   STRING_Mandaic0 | ||||
|   STRING_Manichaean0 | ||||
|   STRING_Mc0 | ||||
|   STRING_Me0 | ||||
|   STRING_Meetei_Mayek0 | ||||
|   STRING_Mende_Kikakui0 | ||||
|   STRING_Meroitic_Cursive0 | ||||
|   STRING_Meroitic_Hieroglyphs0 | ||||
|   STRING_Miao0 | ||||
|   STRING_Mn0 | ||||
|   STRING_Modi0 | ||||
|   STRING_Mongolian0 | ||||
|   STRING_Mro0 | ||||
|   STRING_Myanmar0 | ||||
|   STRING_N0 | ||||
|   STRING_Nabataean0 | ||||
|   STRING_Nd0 | ||||
|   STRING_New_Tai_Lue0 | ||||
|   STRING_Nko0 | ||||
| @@ -442,12 +479,17 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Ogham0 | ||||
|   STRING_Ol_Chiki0 | ||||
|   STRING_Old_Italic0 | ||||
|   STRING_Old_North_Arabian0 | ||||
|   STRING_Old_Permic0 | ||||
|   STRING_Old_Persian0 | ||||
|   STRING_Old_South_Arabian0 | ||||
|   STRING_Old_Turkic0 | ||||
|   STRING_Oriya0 | ||||
|   STRING_Osmanya0 | ||||
|   STRING_P0 | ||||
|   STRING_Pahawh_Hmong0 | ||||
|   STRING_Palmyrene0 | ||||
|   STRING_Pau_Cin_Hau0 | ||||
|   STRING_Pc0 | ||||
|   STRING_Pd0 | ||||
|   STRING_Pe0 | ||||
| @@ -457,6 +499,7 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Pi0 | ||||
|   STRING_Po0 | ||||
|   STRING_Ps0 | ||||
|   STRING_Psalter_Pahlavi0 | ||||
|   STRING_Rejang0 | ||||
|   STRING_Runic0 | ||||
|   STRING_S0 | ||||
| @@ -465,6 +508,7 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Sc0 | ||||
|   STRING_Sharada0 | ||||
|   STRING_Shavian0 | ||||
|   STRING_Siddham0 | ||||
|   STRING_Sinhala0 | ||||
|   STRING_Sk0 | ||||
|   STRING_Sm0 | ||||
| @@ -485,8 +529,10 @@ const char PRIV(utt_names)[] = | ||||
|   STRING_Thai0 | ||||
|   STRING_Tibetan0 | ||||
|   STRING_Tifinagh0 | ||||
|   STRING_Tirhuta0 | ||||
|   STRING_Ugaritic0 | ||||
|   STRING_Vai0 | ||||
|   STRING_Warang_Citi0 | ||||
|   STRING_Xan0 | ||||
|   STRING_Xps0 | ||||
|   STRING_Xsp0 | ||||
| @@ -505,146 +551,169 @@ const ucp_type_table PRIV(utt)[] = { | ||||
|   {  20, PT_SC, ucp_Avestan }, | ||||
|   {  28, PT_SC, ucp_Balinese }, | ||||
|   {  37, PT_SC, ucp_Bamum }, | ||||
|   {  43, PT_SC, ucp_Batak }, | ||||
|   {  49, PT_SC, ucp_Bengali }, | ||||
|   {  57, PT_SC, ucp_Bopomofo }, | ||||
|   {  66, PT_SC, ucp_Brahmi }, | ||||
|   {  73, PT_SC, ucp_Braille }, | ||||
|   {  81, PT_SC, ucp_Buginese }, | ||||
|   {  90, PT_SC, ucp_Buhid }, | ||||
|   {  96, PT_GC, ucp_C }, | ||||
|   {  98, PT_SC, ucp_Canadian_Aboriginal }, | ||||
|   { 118, PT_SC, ucp_Carian }, | ||||
|   { 125, PT_PC, ucp_Cc }, | ||||
|   { 128, PT_PC, ucp_Cf }, | ||||
|   { 131, PT_SC, ucp_Chakma }, | ||||
|   { 138, PT_SC, ucp_Cham }, | ||||
|   { 143, PT_SC, ucp_Cherokee }, | ||||
|   { 152, PT_PC, ucp_Cn }, | ||||
|   { 155, PT_PC, ucp_Co }, | ||||
|   { 158, PT_SC, ucp_Common }, | ||||
|   { 165, PT_SC, ucp_Coptic }, | ||||
|   { 172, PT_PC, ucp_Cs }, | ||||
|   { 175, PT_SC, ucp_Cuneiform }, | ||||
|   { 185, PT_SC, ucp_Cypriot }, | ||||
|   { 193, PT_SC, ucp_Cyrillic }, | ||||
|   { 202, PT_SC, ucp_Deseret }, | ||||
|   { 210, PT_SC, ucp_Devanagari }, | ||||
|   { 221, PT_SC, ucp_Egyptian_Hieroglyphs }, | ||||
|   { 242, PT_SC, ucp_Ethiopic }, | ||||
|   { 251, PT_SC, ucp_Georgian }, | ||||
|   { 260, PT_SC, ucp_Glagolitic }, | ||||
|   { 271, PT_SC, ucp_Gothic }, | ||||
|   { 278, PT_SC, ucp_Greek }, | ||||
|   { 284, PT_SC, ucp_Gujarati }, | ||||
|   { 293, PT_SC, ucp_Gurmukhi }, | ||||
|   { 302, PT_SC, ucp_Han }, | ||||
|   { 306, PT_SC, ucp_Hangul }, | ||||
|   { 313, PT_SC, ucp_Hanunoo }, | ||||
|   { 321, PT_SC, ucp_Hebrew }, | ||||
|   { 328, PT_SC, ucp_Hiragana }, | ||||
|   { 337, PT_SC, ucp_Imperial_Aramaic }, | ||||
|   { 354, PT_SC, ucp_Inherited }, | ||||
|   { 364, PT_SC, ucp_Inscriptional_Pahlavi }, | ||||
|   { 386, PT_SC, ucp_Inscriptional_Parthian }, | ||||
|   { 409, PT_SC, ucp_Javanese }, | ||||
|   { 418, PT_SC, ucp_Kaithi }, | ||||
|   { 425, PT_SC, ucp_Kannada }, | ||||
|   { 433, PT_SC, ucp_Katakana }, | ||||
|   { 442, PT_SC, ucp_Kayah_Li }, | ||||
|   { 451, PT_SC, ucp_Kharoshthi }, | ||||
|   { 462, PT_SC, ucp_Khmer }, | ||||
|   { 468, PT_GC, ucp_L }, | ||||
|   { 470, PT_LAMP, 0 }, | ||||
|   { 473, PT_SC, ucp_Lao }, | ||||
|   { 477, PT_SC, ucp_Latin }, | ||||
|   { 483, PT_SC, ucp_Lepcha }, | ||||
|   { 490, PT_SC, ucp_Limbu }, | ||||
|   { 496, PT_SC, ucp_Linear_B }, | ||||
|   { 505, PT_SC, ucp_Lisu }, | ||||
|   { 510, PT_PC, ucp_Ll }, | ||||
|   { 513, PT_PC, ucp_Lm }, | ||||
|   { 516, PT_PC, ucp_Lo }, | ||||
|   { 519, PT_PC, ucp_Lt }, | ||||
|   { 522, PT_PC, ucp_Lu }, | ||||
|   { 525, PT_SC, ucp_Lycian }, | ||||
|   { 532, PT_SC, ucp_Lydian }, | ||||
|   { 539, PT_GC, ucp_M }, | ||||
|   { 541, PT_SC, ucp_Malayalam }, | ||||
|   { 551, PT_SC, ucp_Mandaic }, | ||||
|   { 559, PT_PC, ucp_Mc }, | ||||
|   { 562, PT_PC, ucp_Me }, | ||||
|   { 565, PT_SC, ucp_Meetei_Mayek }, | ||||
|   { 578, PT_SC, ucp_Meroitic_Cursive }, | ||||
|   { 595, PT_SC, ucp_Meroitic_Hieroglyphs }, | ||||
|   { 616, PT_SC, ucp_Miao }, | ||||
|   { 621, PT_PC, ucp_Mn }, | ||||
|   { 624, PT_SC, ucp_Mongolian }, | ||||
|   { 634, PT_SC, ucp_Myanmar }, | ||||
|   { 642, PT_GC, ucp_N }, | ||||
|   { 644, PT_PC, ucp_Nd }, | ||||
|   { 647, PT_SC, ucp_New_Tai_Lue }, | ||||
|   { 659, PT_SC, ucp_Nko }, | ||||
|   { 663, PT_PC, ucp_Nl }, | ||||
|   { 666, PT_PC, ucp_No }, | ||||
|   { 669, PT_SC, ucp_Ogham }, | ||||
|   { 675, PT_SC, ucp_Ol_Chiki }, | ||||
|   { 684, PT_SC, ucp_Old_Italic }, | ||||
|   { 695, PT_SC, ucp_Old_Persian }, | ||||
|   { 707, PT_SC, ucp_Old_South_Arabian }, | ||||
|   { 725, PT_SC, ucp_Old_Turkic }, | ||||
|   { 736, PT_SC, ucp_Oriya }, | ||||
|   { 742, PT_SC, ucp_Osmanya }, | ||||
|   { 750, PT_GC, ucp_P }, | ||||
|   { 752, PT_PC, ucp_Pc }, | ||||
|   { 755, PT_PC, ucp_Pd }, | ||||
|   { 758, PT_PC, ucp_Pe }, | ||||
|   { 761, PT_PC, ucp_Pf }, | ||||
|   { 764, PT_SC, ucp_Phags_Pa }, | ||||
|   { 773, PT_SC, ucp_Phoenician }, | ||||
|   { 784, PT_PC, ucp_Pi }, | ||||
|   { 787, PT_PC, ucp_Po }, | ||||
|   { 790, PT_PC, ucp_Ps }, | ||||
|   { 793, PT_SC, ucp_Rejang }, | ||||
|   { 800, PT_SC, ucp_Runic }, | ||||
|   { 806, PT_GC, ucp_S }, | ||||
|   { 808, PT_SC, ucp_Samaritan }, | ||||
|   { 818, PT_SC, ucp_Saurashtra }, | ||||
|   { 829, PT_PC, ucp_Sc }, | ||||
|   { 832, PT_SC, ucp_Sharada }, | ||||
|   { 840, PT_SC, ucp_Shavian }, | ||||
|   { 848, PT_SC, ucp_Sinhala }, | ||||
|   { 856, PT_PC, ucp_Sk }, | ||||
|   { 859, PT_PC, ucp_Sm }, | ||||
|   { 862, PT_PC, ucp_So }, | ||||
|   { 865, PT_SC, ucp_Sora_Sompeng }, | ||||
|   { 878, PT_SC, ucp_Sundanese }, | ||||
|   { 888, PT_SC, ucp_Syloti_Nagri }, | ||||
|   { 901, PT_SC, ucp_Syriac }, | ||||
|   { 908, PT_SC, ucp_Tagalog }, | ||||
|   { 916, PT_SC, ucp_Tagbanwa }, | ||||
|   { 925, PT_SC, ucp_Tai_Le }, | ||||
|   { 932, PT_SC, ucp_Tai_Tham }, | ||||
|   { 941, PT_SC, ucp_Tai_Viet }, | ||||
|   { 950, PT_SC, ucp_Takri }, | ||||
|   { 956, PT_SC, ucp_Tamil }, | ||||
|   { 962, PT_SC, ucp_Telugu }, | ||||
|   { 969, PT_SC, ucp_Thaana }, | ||||
|   { 976, PT_SC, ucp_Thai }, | ||||
|   { 981, PT_SC, ucp_Tibetan }, | ||||
|   { 989, PT_SC, ucp_Tifinagh }, | ||||
|   { 998, PT_SC, ucp_Ugaritic }, | ||||
|   { 1007, PT_SC, ucp_Vai }, | ||||
|   { 1011, PT_ALNUM, 0 }, | ||||
|   { 1015, PT_PXSPACE, 0 }, | ||||
|   { 1019, PT_SPACE, 0 }, | ||||
|   { 1023, PT_UCNC, 0 }, | ||||
|   { 1027, PT_WORD, 0 }, | ||||
|   { 1031, PT_SC, ucp_Yi }, | ||||
|   { 1034, PT_GC, ucp_Z }, | ||||
|   { 1036, PT_PC, ucp_Zl }, | ||||
|   { 1039, PT_PC, ucp_Zp }, | ||||
|   { 1042, PT_PC, ucp_Zs } | ||||
|   {  43, PT_SC, ucp_Bassa_Vah }, | ||||
|   {  53, PT_SC, ucp_Batak }, | ||||
|   {  59, PT_SC, ucp_Bengali }, | ||||
|   {  67, PT_SC, ucp_Bopomofo }, | ||||
|   {  76, PT_SC, ucp_Brahmi }, | ||||
|   {  83, PT_SC, ucp_Braille }, | ||||
|   {  91, PT_SC, ucp_Buginese }, | ||||
|   { 100, PT_SC, ucp_Buhid }, | ||||
|   { 106, PT_GC, ucp_C }, | ||||
|   { 108, PT_SC, ucp_Canadian_Aboriginal }, | ||||
|   { 128, PT_SC, ucp_Carian }, | ||||
|   { 135, PT_SC, ucp_Caucasian_Albanian }, | ||||
|   { 154, PT_PC, ucp_Cc }, | ||||
|   { 157, PT_PC, ucp_Cf }, | ||||
|   { 160, PT_SC, ucp_Chakma }, | ||||
|   { 167, PT_SC, ucp_Cham }, | ||||
|   { 172, PT_SC, ucp_Cherokee }, | ||||
|   { 181, PT_PC, ucp_Cn }, | ||||
|   { 184, PT_PC, ucp_Co }, | ||||
|   { 187, PT_SC, ucp_Common }, | ||||
|   { 194, PT_SC, ucp_Coptic }, | ||||
|   { 201, PT_PC, ucp_Cs }, | ||||
|   { 204, PT_SC, ucp_Cuneiform }, | ||||
|   { 214, PT_SC, ucp_Cypriot }, | ||||
|   { 222, PT_SC, ucp_Cyrillic }, | ||||
|   { 231, PT_SC, ucp_Deseret }, | ||||
|   { 239, PT_SC, ucp_Devanagari }, | ||||
|   { 250, PT_SC, ucp_Duployan }, | ||||
|   { 259, PT_SC, ucp_Egyptian_Hieroglyphs }, | ||||
|   { 280, PT_SC, ucp_Elbasan }, | ||||
|   { 288, PT_SC, ucp_Ethiopic }, | ||||
|   { 297, PT_SC, ucp_Georgian }, | ||||
|   { 306, PT_SC, ucp_Glagolitic }, | ||||
|   { 317, PT_SC, ucp_Gothic }, | ||||
|   { 324, PT_SC, ucp_Grantha }, | ||||
|   { 332, PT_SC, ucp_Greek }, | ||||
|   { 338, PT_SC, ucp_Gujarati }, | ||||
|   { 347, PT_SC, ucp_Gurmukhi }, | ||||
|   { 356, PT_SC, ucp_Han }, | ||||
|   { 360, PT_SC, ucp_Hangul }, | ||||
|   { 367, PT_SC, ucp_Hanunoo }, | ||||
|   { 375, PT_SC, ucp_Hebrew }, | ||||
|   { 382, PT_SC, ucp_Hiragana }, | ||||
|   { 391, PT_SC, ucp_Imperial_Aramaic }, | ||||
|   { 408, PT_SC, ucp_Inherited }, | ||||
|   { 418, PT_SC, ucp_Inscriptional_Pahlavi }, | ||||
|   { 440, PT_SC, ucp_Inscriptional_Parthian }, | ||||
|   { 463, PT_SC, ucp_Javanese }, | ||||
|   { 472, PT_SC, ucp_Kaithi }, | ||||
|   { 479, PT_SC, ucp_Kannada }, | ||||
|   { 487, PT_SC, ucp_Katakana }, | ||||
|   { 496, PT_SC, ucp_Kayah_Li }, | ||||
|   { 505, PT_SC, ucp_Kharoshthi }, | ||||
|   { 516, PT_SC, ucp_Khmer }, | ||||
|   { 522, PT_SC, ucp_Khojki }, | ||||
|   { 529, PT_SC, ucp_Khudawadi }, | ||||
|   { 539, PT_GC, ucp_L }, | ||||
|   { 541, PT_LAMP, 0 }, | ||||
|   { 544, PT_SC, ucp_Lao }, | ||||
|   { 548, PT_SC, ucp_Latin }, | ||||
|   { 554, PT_SC, ucp_Lepcha }, | ||||
|   { 561, PT_SC, ucp_Limbu }, | ||||
|   { 567, PT_SC, ucp_Linear_A }, | ||||
|   { 576, PT_SC, ucp_Linear_B }, | ||||
|   { 585, PT_SC, ucp_Lisu }, | ||||
|   { 590, PT_PC, ucp_Ll }, | ||||
|   { 593, PT_PC, ucp_Lm }, | ||||
|   { 596, PT_PC, ucp_Lo }, | ||||
|   { 599, PT_PC, ucp_Lt }, | ||||
|   { 602, PT_PC, ucp_Lu }, | ||||
|   { 605, PT_SC, ucp_Lycian }, | ||||
|   { 612, PT_SC, ucp_Lydian }, | ||||
|   { 619, PT_GC, ucp_M }, | ||||
|   { 621, PT_SC, ucp_Mahajani }, | ||||
|   { 630, PT_SC, ucp_Malayalam }, | ||||
|   { 640, PT_SC, ucp_Mandaic }, | ||||
|   { 648, PT_SC, ucp_Manichaean }, | ||||
|   { 659, PT_PC, ucp_Mc }, | ||||
|   { 662, PT_PC, ucp_Me }, | ||||
|   { 665, PT_SC, ucp_Meetei_Mayek }, | ||||
|   { 678, PT_SC, ucp_Mende_Kikakui }, | ||||
|   { 692, PT_SC, ucp_Meroitic_Cursive }, | ||||
|   { 709, PT_SC, ucp_Meroitic_Hieroglyphs }, | ||||
|   { 730, PT_SC, ucp_Miao }, | ||||
|   { 735, PT_PC, ucp_Mn }, | ||||
|   { 738, PT_SC, ucp_Modi }, | ||||
|   { 743, PT_SC, ucp_Mongolian }, | ||||
|   { 753, PT_SC, ucp_Mro }, | ||||
|   { 757, PT_SC, ucp_Myanmar }, | ||||
|   { 765, PT_GC, ucp_N }, | ||||
|   { 767, PT_SC, ucp_Nabataean }, | ||||
|   { 777, PT_PC, ucp_Nd }, | ||||
|   { 780, PT_SC, ucp_New_Tai_Lue }, | ||||
|   { 792, PT_SC, ucp_Nko }, | ||||
|   { 796, PT_PC, ucp_Nl }, | ||||
|   { 799, PT_PC, ucp_No }, | ||||
|   { 802, PT_SC, ucp_Ogham }, | ||||
|   { 808, PT_SC, ucp_Ol_Chiki }, | ||||
|   { 817, PT_SC, ucp_Old_Italic }, | ||||
|   { 828, PT_SC, ucp_Old_North_Arabian }, | ||||
|   { 846, PT_SC, ucp_Old_Permic }, | ||||
|   { 857, PT_SC, ucp_Old_Persian }, | ||||
|   { 869, PT_SC, ucp_Old_South_Arabian }, | ||||
|   { 887, PT_SC, ucp_Old_Turkic }, | ||||
|   { 898, PT_SC, ucp_Oriya }, | ||||
|   { 904, PT_SC, ucp_Osmanya }, | ||||
|   { 912, PT_GC, ucp_P }, | ||||
|   { 914, PT_SC, ucp_Pahawh_Hmong }, | ||||
|   { 927, PT_SC, ucp_Palmyrene }, | ||||
|   { 937, PT_SC, ucp_Pau_Cin_Hau }, | ||||
|   { 949, PT_PC, ucp_Pc }, | ||||
|   { 952, PT_PC, ucp_Pd }, | ||||
|   { 955, PT_PC, ucp_Pe }, | ||||
|   { 958, PT_PC, ucp_Pf }, | ||||
|   { 961, PT_SC, ucp_Phags_Pa }, | ||||
|   { 970, PT_SC, ucp_Phoenician }, | ||||
|   { 981, PT_PC, ucp_Pi }, | ||||
|   { 984, PT_PC, ucp_Po }, | ||||
|   { 987, PT_PC, ucp_Ps }, | ||||
|   { 990, PT_SC, ucp_Psalter_Pahlavi }, | ||||
|   { 1006, PT_SC, ucp_Rejang }, | ||||
|   { 1013, PT_SC, ucp_Runic }, | ||||
|   { 1019, PT_GC, ucp_S }, | ||||
|   { 1021, PT_SC, ucp_Samaritan }, | ||||
|   { 1031, PT_SC, ucp_Saurashtra }, | ||||
|   { 1042, PT_PC, ucp_Sc }, | ||||
|   { 1045, PT_SC, ucp_Sharada }, | ||||
|   { 1053, PT_SC, ucp_Shavian }, | ||||
|   { 1061, PT_SC, ucp_Siddham }, | ||||
|   { 1069, PT_SC, ucp_Sinhala }, | ||||
|   { 1077, PT_PC, ucp_Sk }, | ||||
|   { 1080, PT_PC, ucp_Sm }, | ||||
|   { 1083, PT_PC, ucp_So }, | ||||
|   { 1086, PT_SC, ucp_Sora_Sompeng }, | ||||
|   { 1099, PT_SC, ucp_Sundanese }, | ||||
|   { 1109, PT_SC, ucp_Syloti_Nagri }, | ||||
|   { 1122, PT_SC, ucp_Syriac }, | ||||
|   { 1129, PT_SC, ucp_Tagalog }, | ||||
|   { 1137, PT_SC, ucp_Tagbanwa }, | ||||
|   { 1146, PT_SC, ucp_Tai_Le }, | ||||
|   { 1153, PT_SC, ucp_Tai_Tham }, | ||||
|   { 1162, PT_SC, ucp_Tai_Viet }, | ||||
|   { 1171, PT_SC, ucp_Takri }, | ||||
|   { 1177, PT_SC, ucp_Tamil }, | ||||
|   { 1183, PT_SC, ucp_Telugu }, | ||||
|   { 1190, PT_SC, ucp_Thaana }, | ||||
|   { 1197, PT_SC, ucp_Thai }, | ||||
|   { 1202, PT_SC, ucp_Tibetan }, | ||||
|   { 1210, PT_SC, ucp_Tifinagh }, | ||||
|   { 1219, PT_SC, ucp_Tirhuta }, | ||||
|   { 1227, PT_SC, ucp_Ugaritic }, | ||||
|   { 1236, PT_SC, ucp_Vai }, | ||||
|   { 1240, PT_SC, ucp_Warang_Citi }, | ||||
|   { 1252, PT_ALNUM, 0 }, | ||||
|   { 1256, PT_PXSPACE, 0 }, | ||||
|   { 1260, PT_SPACE, 0 }, | ||||
|   { 1264, PT_UCNC, 0 }, | ||||
|   { 1268, PT_WORD, 0 }, | ||||
|   { 1272, PT_SC, ucp_Yi }, | ||||
|   { 1275, PT_GC, ucp_Z }, | ||||
|   { 1277, PT_PC, ucp_Zl }, | ||||
|   { 1280, PT_PC, ucp_Zp }, | ||||
|   { 1283, PT_PC, ucp_Zs } | ||||
| }; | ||||
|  | ||||
| const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -242,7 +242,7 @@ while ((t = *data++) != XCL_END) | ||||
|  | ||||
|       case PT_PXPUNCT: | ||||
|       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || | ||||
|             (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) | ||||
|             (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) | ||||
|         return !negated; | ||||
|       break; | ||||
|  | ||||
|   | ||||
| @@ -192,7 +192,31 @@ enum { | ||||
|   ucp_Miao, | ||||
|   ucp_Sharada, | ||||
|   ucp_Sora_Sompeng, | ||||
|   ucp_Takri | ||||
|   ucp_Takri, | ||||
|   /* New for Unicode 7.0.0: */ | ||||
|   ucp_Bassa_Vah, | ||||
|   ucp_Caucasian_Albanian, | ||||
|   ucp_Duployan, | ||||
|   ucp_Elbasan, | ||||
|   ucp_Grantha, | ||||
|   ucp_Khojki, | ||||
|   ucp_Khudawadi, | ||||
|   ucp_Linear_A, | ||||
|   ucp_Mahajani, | ||||
|   ucp_Manichaean, | ||||
|   ucp_Mende_Kikakui, | ||||
|   ucp_Modi, | ||||
|   ucp_Mro, | ||||
|   ucp_Nabataean, | ||||
|   ucp_Old_North_Arabian, | ||||
|   ucp_Old_Permic, | ||||
|   ucp_Pahawh_Hmong, | ||||
|   ucp_Palmyrene, | ||||
|   ucp_Psalter_Pahlavi, | ||||
|   ucp_Pau_Cin_Hau, | ||||
|   ucp_Siddham, | ||||
|   ucp_Tirhuta, | ||||
|   ucp_Warang_Citi | ||||
| }; | ||||
|  | ||||
| #endif | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Guenter Obiltschnig
					Guenter Obiltschnig