mirror of
				https://github.com/Tencent/rapidjson.git
				synced 2025-11-04 12:17:41 +01:00 
			
		
		
		
	Merge pull request #87 from miloyip/issue85simdsegfault
Fix SIMD page fault by using aligned load
This commit is contained in:
		@@ -178,21 +178,56 @@ void SkipWhitespace(InputStream& is) {
 | 
				
			|||||||
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
 | 
					//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
 | 
				
			||||||
inline const char *SkipWhitespace_SIMD(const char* p) {
 | 
					inline const char *SkipWhitespace_SIMD(const char* p) {
 | 
				
			||||||
	static const char whitespace[16] = " \n\r\t";
 | 
						static const char whitespace[16] = " \n\r\t";
 | 
				
			||||||
	__m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
 | 
						static const char whitespaces[4][17] = {
 | 
				
			||||||
 | 
							"                ",
 | 
				
			||||||
 | 
							"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
 | 
				
			||||||
 | 
							"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
 | 
				
			||||||
 | 
							"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (;;) {
 | 
						// 16-byte align to the lower boundary
 | 
				
			||||||
		__m128i s = _mm_loadu_si128((const __m128i *)p);
 | 
						const char* ap = reinterpret_cast<const char*>(reinterpret_cast<size_t>(p) & ~15);
 | 
				
			||||||
		unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
 | 
					
 | 
				
			||||||
		if (r == 0)	// all 16 characters are whitespace
 | 
						// Test first unaligned characters
 | 
				
			||||||
			p += 16;
 | 
						// Cannot make use of _mm_cmpistrm() because it stops when encounters '\0' before p
 | 
				
			||||||
		else {		// some of characters may be non-whitespace
 | 
						if (ap != p) {
 | 
				
			||||||
 | 
							const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
 | 
				
			||||||
 | 
							const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
 | 
				
			||||||
 | 
							const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
 | 
				
			||||||
 | 
							const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							unsigned char shift = reinterpret_cast<size_t>(p) & 15;
 | 
				
			||||||
 | 
							const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(ap));
 | 
				
			||||||
 | 
							__m128i x = _mm_cmpeq_epi8(s, w0);
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
 | 
				
			||||||
 | 
							unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
 | 
				
			||||||
 | 
							r = r >> shift << shift; // Clear results before p
 | 
				
			||||||
 | 
							if (r != 0) {
 | 
				
			||||||
#ifdef _MSC_VER		// Find the index of first non-whitespace
 | 
					#ifdef _MSC_VER		// Find the index of first non-whitespace
 | 
				
			||||||
			unsigned long offset;
 | 
								unsigned long offset;
 | 
				
			||||||
			if (_BitScanForward(&offset, r))
 | 
								_BitScanForward(&offset, r);
 | 
				
			||||||
				return p + offset;
 | 
								return ap + offset;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
			if (r != 0)
 | 
								return ap + __builtin_ffs(r) - 1;
 | 
				
			||||||
				return p + __builtin_ffs(r) - 1;
 | 
					#endif
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							ap += 16;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// The rest of string
 | 
				
			||||||
 | 
						for (;; ap += 16) {
 | 
				
			||||||
 | 
							const __m128i s = _mm_load_si128((const __m128i *)ap);
 | 
				
			||||||
 | 
							const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
 | 
				
			||||||
 | 
							if (r != 0) {	// some of characters is non-whitespace
 | 
				
			||||||
 | 
					#ifdef _MSC_VER			// Find the index of first non-whitespace
 | 
				
			||||||
 | 
								unsigned long offset;
 | 
				
			||||||
 | 
								_BitScanForward(&offset, r);
 | 
				
			||||||
 | 
								return ap + offset;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
								return ap + __builtin_ffs(r) - 1;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -208,28 +243,51 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
 | 
				
			|||||||
		"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
 | 
							"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
 | 
				
			||||||
		"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
 | 
							"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
 | 
						const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
 | 
				
			||||||
	__m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
 | 
						const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
 | 
				
			||||||
	__m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
 | 
						const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
 | 
				
			||||||
	__m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
 | 
						const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (;;) {
 | 
						// 16-byte align to the lower boundary
 | 
				
			||||||
		__m128i s = _mm_loadu_si128((const __m128i *)p);
 | 
						const char* ap = reinterpret_cast<const char*>(reinterpret_cast<size_t>(p) & ~15);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Test first unaligned characters
 | 
				
			||||||
 | 
						if (ap != p) {
 | 
				
			||||||
 | 
							unsigned char shift = reinterpret_cast<size_t>(p) & 15;
 | 
				
			||||||
 | 
							const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(ap));
 | 
				
			||||||
		__m128i x = _mm_cmpeq_epi8(s, w0);
 | 
							__m128i x = _mm_cmpeq_epi8(s, w0);
 | 
				
			||||||
		x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
 | 
				
			||||||
		x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
 | 
				
			||||||
		x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
 | 
				
			||||||
		unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
 | 
							unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
 | 
				
			||||||
		if (r == 0)	// all 16 characters are whitespace
 | 
							r = r >> shift << shift; // Clear results before p
 | 
				
			||||||
			p += 16;
 | 
							if (r != 0) {
 | 
				
			||||||
		else {		// some of characters may be non-whitespace
 | 
					 | 
				
			||||||
#ifdef _MSC_VER		// Find the index of first non-whitespace
 | 
					#ifdef _MSC_VER		// Find the index of first non-whitespace
 | 
				
			||||||
			unsigned long offset;
 | 
								unsigned long offset;
 | 
				
			||||||
			if (_BitScanForward(&offset, r))
 | 
								_BitScanForward(&offset, r);
 | 
				
			||||||
				return p + offset;
 | 
								return ap + offset;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
			if (r != 0)
 | 
								return ap + __builtin_ffs(r) - 1;
 | 
				
			||||||
				return p + __builtin_ffs(r) - 1;
 | 
					#endif
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							ap += 16;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// The rest of string
 | 
				
			||||||
 | 
						for (;; ap += 16) {
 | 
				
			||||||
 | 
							const __m128i s = _mm_load_si128((const __m128i *)ap);
 | 
				
			||||||
 | 
							__m128i x = _mm_cmpeq_epi8(s, w0);
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
 | 
				
			||||||
 | 
							x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
 | 
				
			||||||
 | 
							unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
 | 
				
			||||||
 | 
							if (r != 0) {	// some of characters may be non-whitespace
 | 
				
			||||||
 | 
					#ifdef _MSC_VER			// Find the index of first non-whitespace
 | 
				
			||||||
 | 
								unsigned long offset;
 | 
				
			||||||
 | 
								_BitScanForward(&offset, r);
 | 
				
			||||||
 | 
								return ap + offset;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
								return ap + __builtin_ffs(r) - 1;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user