From 24ae8f8e5b96bd7825ccdee40de2b45c8db66341 Mon Sep 17 00:00:00 2001 From: Howard Hinnant Date: Wed, 3 Jul 2013 17:39:28 +0000 Subject: [PATCH] Matthew Dempsky: Attached patch replaces the type punning with memcpy(), which on x86/x86-64 clang optimizes to direct word accesses anyway. This fixes an unaligned word access in murmurhash/cityhash. git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@185558 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/memory | 74 ++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/include/memory b/include/memory index 758e364b..ffd0cd0c 100644 --- a/include/memory +++ b/include/memory @@ -3128,6 +3128,16 @@ template template struct hash; +template +inline _LIBCPP_INLINE_VISIBILITY +_Size +__loadword(const void* __p) +{ + _Size __r; + std::memcpy(&__r, __p, sizeof(__r)); + return __r; +} + // We use murmur2 when size_t is 32 bits, and cityhash64 when size_t // is 64 bits. This is because cityhash64 uses 64bit x 64bit // multiplication, which can be very slow on 32-bit systems. @@ -3151,7 +3161,7 @@ __murmur2_or_cityhash<_Size, 32>::operator()(const void* __key, _Size __len) const unsigned char* __data = static_cast(__key); for (; __len >= 4; __data += 4, __len -= 4) { - _Size __k = *(const _Size*)__data; + _Size __k = __loadword<_Size>(__data); __k *= __m; __k ^= __k >> __r; __k *= __m; @@ -3210,13 +3220,13 @@ struct __murmur2_or_cityhash<_Size, 64> static _Size __hash_len_0_to_16(const char* __s, _Size __len) { if (__len > 8) { - const _Size __a = *(const _Size*)__s; - const _Size __b = *(const _Size*)(__s + __len - 8); + const _Size __a = __loadword<_Size>(__s); + const _Size __b = __loadword<_Size>(__s + __len - 8); return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b; } if (__len >= 4) { - const uint32_t __a = *(const uint32_t*)(__s); - const uint32_t __b = *(const uint32_t*)(__s + __len - 4); + const uint32_t __a = __loadword(__s); + const uint32_t __b = __loadword(__s + __len - 4); return __hash_len_16(__len + (__a << 3), __b); } if (__len > 0) { @@ -3232,10 +3242,10 @@ struct __murmur2_or_cityhash<_Size, 64> } static _Size __hash_len_17_to_32(const char *__s, _Size __len) { - const _Size __a = *(const _Size*)(__s) * __k1; - const _Size __b = *(const _Size*)(__s + 8); - const _Size __c = *(const _Size*)(__s + __len - 8) * __k2; - const _Size __d = *(const _Size*)(__s + __len - 16) * __k0; + const _Size __a = __loadword<_Size>(__s) * __k1; + const _Size __b = __loadword<_Size>(__s + 8); + const _Size __c = __loadword<_Size>(__s + __len - 8) * __k2; + const _Size __d = __loadword<_Size>(__s + __len - 16) * __k0; return __hash_len_16(__rotate(__a - __b, 43) + __rotate(__c, 30) + __d, __a + __rotate(__b ^ __k3, 20) - __c + __len); } @@ -3256,33 +3266,33 @@ struct __murmur2_or_cityhash<_Size, 64> // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. static pair<_Size, _Size> __weak_hash_len_32_with_seeds( const char* __s, _Size __a, _Size __b) { - return __weak_hash_len_32_with_seeds(*(const _Size*)(__s), - *(const _Size*)(__s + 8), - *(const _Size*)(__s + 16), - *(const _Size*)(__s + 24), + return __weak_hash_len_32_with_seeds(__loadword<_Size>(__s), + __loadword<_Size>(__s + 8), + __loadword<_Size>(__s + 16), + __loadword<_Size>(__s + 24), __a, __b); } // Return an 8-byte hash for 33 to 64 bytes. static _Size __hash_len_33_to_64(const char *__s, size_t __len) { - _Size __z = *(const _Size*)(__s + 24); - _Size __a = *(const _Size*)(__s) + - (__len + *(const _Size*)(__s + __len - 16)) * __k0; + _Size __z = __loadword<_Size>(__s + 24); + _Size __a = __loadword<_Size>(__s) + + (__len + __loadword<_Size>(__s + __len - 16)) * __k0; _Size __b = __rotate(__a + __z, 52); _Size __c = __rotate(__a, 37); - __a += *(const _Size*)(__s + 8); + __a += __loadword<_Size>(__s + 8); __c += __rotate(__a, 7); - __a += *(const _Size*)(__s + 16); + __a += __loadword<_Size>(__s + 16); _Size __vf = __a + __z; _Size __vs = __b + __rotate(__a, 31) + __c; - __a = *(const _Size*)(__s + 16) + *(const _Size*)(__s + __len - 32); - __z += *(const _Size*)(__s + __len - 8); + __a = __loadword<_Size>(__s + 16) + __loadword<_Size>(__s + __len - 32); + __z += __loadword<_Size>(__s + __len - 8); __b = __rotate(__a + __z, 52); __c = __rotate(__a, 37); - __a += *(const _Size*)(__s + __len - 24); + __a += __loadword<_Size>(__s + __len - 24); __c += __rotate(__a, 7); - __a += *(const _Size*)(__s + __len - 16); + __a += __loadword<_Size>(__s + __len - 16); _Size __wf = __a + __z; _Size __ws = __b + __rotate(__a, 31) + __c; _Size __r = __shift_mix((__vf + __ws) * __k2 + (__wf + __vs) * __k0); @@ -3308,26 +3318,26 @@ __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len) // For strings over 64 bytes we hash the end first, and then as we // loop we keep 56 bytes of state: v, w, x, y, and z. - _Size __x = *(const _Size*)(__s + __len - 40); - _Size __y = *(const _Size*)(__s + __len - 16) + - *(const _Size*)(__s + __len - 56); - _Size __z = __hash_len_16(*(const _Size*)(__s + __len - 48) + __len, - *(const _Size*)(__s + __len - 24)); + _Size __x = __loadword<_Size>(__s + __len - 40); + _Size __y = __loadword<_Size>(__s + __len - 16) + + __loadword<_Size>(__s + __len - 56); + _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len, + __loadword<_Size>(__s + __len - 24)); pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z); pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x); - __x = __x * __k1 + *(const _Size*)(__s); + __x = __x * __k1 + __loadword<_Size>(__s); // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. __len = (__len - 1) & ~static_cast<_Size>(63); do { - __x = __rotate(__x + __y + __v.first + *(const _Size*)(__s + 8), 37) * __k1; - __y = __rotate(__y + __v.second + *(const _Size*)(__s + 48), 42) * __k1; + __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1; + __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1; __x ^= __w.second; - __y += __v.first + *(const _Size*)(__s + 40); + __y += __v.first + __loadword<_Size>(__s + 40); __z = __rotate(__z + __w.first, 33) * __k1; __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first); __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second, - __y + *(const _Size*)(__s + 16)); + __y + __loadword<_Size>(__s + 16)); std::swap(__z, __x); __s += 64; __len -= 64;