From 4a0c83fb12ee1b99ff201730d065ddd657ff8ff5 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 19 Jan 2019 19:39:42 +0000 Subject: [PATCH 1/4] Problem: yqueue false sharing issues on PPC64 Solution: detect cacheline size for aligment purposes at build time instead of hard-coding it, so that PPC and S390 can align to a value greater than the 64 bytes default. Uses libc getconf program, and falls back to the previous value of 64 if not found. --- CMakeLists.txt | 8 ++++++++ acinclude.m4 | 20 ++++++++++++++++++++ builds/cmake/platform.hpp.in | 2 ++ configure.ac | 3 +++ src/yqueue.hpp | 5 +++-- 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e09cbf49..07bb3edc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -251,6 +251,14 @@ include(CMakeDependentOption) include(CheckCXXSymbolExists) include(CheckSymbolExists) +execute_process(COMMAND getconf LEVEL1_DCACHE_LINESIZE OUTPUT_VARIABLE CACHELINE_SIZE ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +if(CACHELINE_SIZE STREQUAL "" OR CACHELINE_SIZE EQUAL 0 OR CACHELINE_SIZE EQUAL -1) + set(ZMQ_CACHELINE_SIZE 64) +else() + set(ZMQ_CACHELINE_SIZE ${CACHELINE_SIZE}) +endif() +message(STATUS "Using ${ZMQ_CACHELINE_SIZE} bytes alignment for lock-free data structures") + if(NOT CYGWIN) # TODO cannot we simply do 'if(WIN32) set(ZMQ_HAVE_WINDOWS ON)' or similar? check_include_files(windows.h ZMQ_HAVE_WINDOWS) diff --git a/acinclude.m4 b/acinclude.m4 index 036a30a0..8e3f4d2a 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1169,3 +1169,23 @@ AC_DEFUN([LIBZMQ_CHECK_POLLER], [{ AC_MSG_ERROR([Invalid API poller '$api_poller' specified]) fi }]) + +dnl ############################################################################## +dnl # LIBZMQ_CHECK_CACHELINE # +dnl # Check cacheline size for alignment purposes # +dnl ############################################################################## +AC_DEFUN([LIBZMQ_CHECK_CACHELINE], [{ + + zmq_cacheline_size=64 + AC_CHECK_TOOL(libzmq_getconf, getconf) + if ! test "x$libzmq_getconf" = "x"; then + zmq_cacheline_size=$($libzmq_getconf LEVEL1_DCACHE_LINESIZE 2>/dev/null || echo 64) + if test "x$zmq_cacheline_size" = "x0" -o "x$zmq_cacheline_size" = "x-1"; then + # getconf on some architectures does not know the size, try to fallback to + # the value the kernel knows on Linux + zmq_cacheline_size=$(cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size 2>/dev/null || echo 64) + fi + fi + AC_MSG_NOTICE([Using "$zmq_cacheline_size" bytes alignment for lock-free data structures]) + AC_DEFINE_UNQUOTED(ZMQ_CACHELINE_SIZE, $zmq_cacheline_size, [Using "$zmq_cacheline_size" bytes alignment for lock-free data structures]) +}]) diff --git a/builds/cmake/platform.hpp.in b/builds/cmake/platform.hpp.in index 82c49d8f..21a848fd 100644 --- a/builds/cmake/platform.hpp.in +++ b/builds/cmake/platform.hpp.in @@ -11,6 +11,8 @@ #cmakedefine ZMQ_POLL_BASED_ON_SELECT #cmakedefine ZMQ_POLL_BASED_ON_POLL +#cmakedefine ZMQ_CACHELINE_SIZE @ZMQ_CACHELINE_SIZE@ + #cmakedefine ZMQ_FORCE_MUTEXES #cmakedefine HAVE_FORK diff --git a/configure.ac b/configure.ac index ec593c87..71d393ea 100644 --- a/configure.ac +++ b/configure.ac @@ -376,6 +376,9 @@ LIBZMQ_CHECK_DOC_BUILD # Check polling system, set appropriate macro in src/platform.hpp LIBZMQ_CHECK_POLLER +# Check cacheline size, set appropriate macro in src/platform.hpp +LIBZMQ_CHECK_CACHELINE + # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS(\ diff --git a/src/yqueue.hpp b/src/yqueue.hpp index 9e8b3ec0..c2dde691 100644 --- a/src/yqueue.hpp +++ b/src/yqueue.hpp @@ -55,8 +55,9 @@ namespace zmq // posix_memalign available. Default value is 64, this alignment will // prevent two queue chunks from occupying the same CPU cache line on // architectures where cache lines are <= 64 bytes (e.g. most things -// except POWER). -template class yqueue_t +// except POWER). It is detected at build time to try to account for other +// platforms like POWER and s390x. +template class yqueue_t #else template class yqueue_t #endif From 8040e28b2640d59fcd1ec7638b5bbb43965dbbf6 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 19 Jan 2019 20:50:26 +0000 Subject: [PATCH 2/4] Problem: posix_memalign autoconf check broken on some platforms Solution: import macro from autoconf-archive that works better than AC_CHECK_FUNCS --- configure.ac | 4 ++- m4/ax_func_posix_memalign.m4 | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 m4/ax_func_posix_memalign.m4 diff --git a/configure.ac b/configure.ac index 71d393ea..90c042ec 100644 --- a/configure.ac +++ b/configure.ac @@ -19,6 +19,7 @@ m4_include([m4/ax_cxx_compile_stdcxx_11.m4]) m4_include([m4/ax_code_coverage.m4]) m4_include([m4/ax_valgrind_check.m4]) m4_include([m4/ax_check_vscript.m4]) +m4_include([m4/ax_func_posix_memalign.m4]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) # This lets us use PACKAGE_VERSION in Makefiles @@ -78,6 +79,7 @@ AC_LIBTOOL_WIN32_DLL AC_PROG_LIBTOOL AX_VALGRIND_CHECK AX_CHECK_VSCRIPT +AX_FUNC_POSIX_MEMALIGN AC_ARG_ENABLE([force-CXX98-compat], [AS_HELP_STRING([--enable-force-CXX98-compat], [force C++98 build [default=disabled]])]) @@ -670,7 +672,7 @@ AC_LANG_POP([C++]) # Checks for library functions. AC_TYPE_SIGNAL -AC_CHECK_FUNCS(perror gettimeofday clock_gettime memset socket getifaddrs freeifaddrs fork posix_memalign mkdtemp accept4) +AC_CHECK_FUNCS(perror gettimeofday clock_gettime memset socket getifaddrs freeifaddrs fork mkdtemp accept4) AC_CHECK_HEADERS([alloca.h]) # pthread_setname is non-posix, and there are at least 4 different implementations diff --git a/m4/ax_func_posix_memalign.m4 b/m4/ax_func_posix_memalign.m4 new file mode 100644 index 00000000..2442ceca --- /dev/null +++ b/m4/ax_func_posix_memalign.m4 @@ -0,0 +1,50 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_FUNC_POSIX_MEMALIGN +# +# DESCRIPTION +# +# Some versions of posix_memalign (notably glibc 2.2.5) incorrectly apply +# their power-of-two check to the size argument, not the alignment +# argument. AX_FUNC_POSIX_MEMALIGN defines HAVE_POSIX_MEMALIGN if the +# power-of-two check is correctly applied to the alignment argument. +# +# LICENSE +# +# Copyright (c) 2008 Scott Pakin +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 9 + +AC_DEFUN([AX_FUNC_POSIX_MEMALIGN], +[AC_CACHE_CHECK([for working posix_memalign], + [ax_cv_func_posix_memalign_works], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include + +int +main () +{ + void *buffer; + + /* Some versions of glibc incorrectly perform the alignment check on + * the size word. */ + exit (posix_memalign (&buffer, sizeof(void *), 123) != 0); +} + ]])], + [ax_cv_func_posix_memalign_works=yes], + [ax_cv_func_posix_memalign_works=no], + [ax_cv_func_posix_memalign_works=no])]) +if test "$ax_cv_func_posix_memalign_works" = "yes" ; then + AC_DEFINE([HAVE_POSIX_MEMALIGN], [1], + [Define to 1 if `posix_memalign' works.]) +fi +]) From bfb4a868fc9643c3783ed83a9910f1c4023baf16 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 19 Jan 2019 21:42:54 +0000 Subject: [PATCH 3/4] Problem: atomic intrinsics unreliable on PPC64 and RISC-V Solution: prefer CXX11 atomics if they are available to compiler intrinsics. test_hwm_pubsub fails 50% of the times on PPC64 and RISC-V with an apparent memory corruption of messages sent by the application thread to the I/O thread when using compiler intrinsics. Switching to CXX11 atomics makes the test reliable again. The standard API should be preferred anyway, if available. --- src/atomic_counter.hpp | 4 ++-- src/atomic_ptr.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/atomic_counter.hpp b/src/atomic_counter.hpp index 4f2ff584..3f62f6fd 100644 --- a/src/atomic_counter.hpp +++ b/src/atomic_counter.hpp @@ -35,11 +35,11 @@ #if defined ZMQ_FORCE_MUTEXES #define ZMQ_ATOMIC_COUNTER_MUTEX -#elif defined ZMQ_HAVE_ATOMIC_INTRINSICS -#define ZMQ_ATOMIC_COUNTER_INTRINSIC #elif (defined __cplusplus && __cplusplus >= 201103L) \ || (defined _MSC_VER && _MSC_VER >= 1900) #define ZMQ_ATOMIC_COUNTER_CXX11 +#elif defined ZMQ_HAVE_ATOMIC_INTRINSICS +#define ZMQ_ATOMIC_COUNTER_INTRINSIC #elif (defined __i386__ || defined __x86_64__) && defined __GNUC__ #define ZMQ_ATOMIC_COUNTER_X86 #elif defined __ARM_ARCH_7A__ && defined __GNUC__ diff --git a/src/atomic_ptr.hpp b/src/atomic_ptr.hpp index e16259f1..7b345006 100644 --- a/src/atomic_ptr.hpp +++ b/src/atomic_ptr.hpp @@ -34,11 +34,11 @@ #if defined ZMQ_FORCE_MUTEXES #define ZMQ_ATOMIC_PTR_MUTEX -#elif defined ZMQ_HAVE_ATOMIC_INTRINSICS -#define ZMQ_ATOMIC_PTR_INTRINSIC #elif (defined __cplusplus && __cplusplus >= 201103L) \ || (defined _MSC_VER && _MSC_VER >= 1900) #define ZMQ_ATOMIC_PTR_CXX11 +#elif defined ZMQ_HAVE_ATOMIC_INTRINSICS +#define ZMQ_ATOMIC_PTR_INTRINSIC #elif (defined __i386__ || defined __x86_64__) && defined __GNUC__ #define ZMQ_ATOMIC_PTR_X86 #elif defined __ARM_ARCH_7A__ && defined __GNUC__ From 3b6db4b370ae6ef364c2e8284b2841f8e2b65391 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 19 Jan 2019 22:28:15 +0000 Subject: [PATCH 4/4] Problem: test_pair_ipc fails on GNU/Hurd due to wildcard bind Solution: mark it as XFAIL like the other tests that use ipc://* --- Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.am b/Makefile.am index 279fb0c2..42982d9f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1047,6 +1047,7 @@ endif if ON_GNU XFAIL_TESTS += tests/test_ipc_wildcard \ tests/test_reqrep_ipc \ + tests/test_pair_ipc \ tests/test_term_endpoint endif