Problem: yqueue false sharing issues on PPC64

Solution: detect cacheline size for aligment purposes at build time
instead of hard-coding it, so that PPC and S390 can align to a value
greater than the 64 bytes default.
Uses libc getconf program, and falls back to the previous value of 64
if not found.
This commit is contained in:
Luca Boccassi 2019-01-19 19:39:42 +00:00
parent 0761e6bb48
commit 4a0c83fb12
5 changed files with 36 additions and 2 deletions

View File

@ -251,6 +251,14 @@ include(CMakeDependentOption)
include(CheckCXXSymbolExists)
include(CheckSymbolExists)
execute_process(COMMAND getconf LEVEL1_DCACHE_LINESIZE OUTPUT_VARIABLE CACHELINE_SIZE ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(CACHELINE_SIZE STREQUAL "" OR CACHELINE_SIZE EQUAL 0 OR CACHELINE_SIZE EQUAL -1)
set(ZMQ_CACHELINE_SIZE 64)
else()
set(ZMQ_CACHELINE_SIZE ${CACHELINE_SIZE})
endif()
message(STATUS "Using ${ZMQ_CACHELINE_SIZE} bytes alignment for lock-free data structures")
if(NOT CYGWIN)
# TODO cannot we simply do 'if(WIN32) set(ZMQ_HAVE_WINDOWS ON)' or similar?
check_include_files(windows.h ZMQ_HAVE_WINDOWS)

View File

@ -1169,3 +1169,23 @@ AC_DEFUN([LIBZMQ_CHECK_POLLER], [{
AC_MSG_ERROR([Invalid API poller '$api_poller' specified])
fi
}])
dnl ##############################################################################
dnl # LIBZMQ_CHECK_CACHELINE #
dnl # Check cacheline size for alignment purposes #
dnl ##############################################################################
AC_DEFUN([LIBZMQ_CHECK_CACHELINE], [{
zmq_cacheline_size=64
AC_CHECK_TOOL(libzmq_getconf, getconf)
if ! test "x$libzmq_getconf" = "x"; then
zmq_cacheline_size=$($libzmq_getconf LEVEL1_DCACHE_LINESIZE 2>/dev/null || echo 64)
if test "x$zmq_cacheline_size" = "x0" -o "x$zmq_cacheline_size" = "x-1"; then
# getconf on some architectures does not know the size, try to fallback to
# the value the kernel knows on Linux
zmq_cacheline_size=$(cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size 2>/dev/null || echo 64)
fi
fi
AC_MSG_NOTICE([Using "$zmq_cacheline_size" bytes alignment for lock-free data structures])
AC_DEFINE_UNQUOTED(ZMQ_CACHELINE_SIZE, $zmq_cacheline_size, [Using "$zmq_cacheline_size" bytes alignment for lock-free data structures])
}])

View File

@ -11,6 +11,8 @@
#cmakedefine ZMQ_POLL_BASED_ON_SELECT
#cmakedefine ZMQ_POLL_BASED_ON_POLL
#cmakedefine ZMQ_CACHELINE_SIZE @ZMQ_CACHELINE_SIZE@
#cmakedefine ZMQ_FORCE_MUTEXES
#cmakedefine HAVE_FORK

View File

@ -376,6 +376,9 @@ LIBZMQ_CHECK_DOC_BUILD
# Check polling system, set appropriate macro in src/platform.hpp
LIBZMQ_CHECK_POLLER
# Check cacheline size, set appropriate macro in src/platform.hpp
LIBZMQ_CHECK_CACHELINE
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(\

View File

@ -55,8 +55,9 @@ namespace zmq
// posix_memalign available. Default value is 64, this alignment will
// prevent two queue chunks from occupying the same CPU cache line on
// architectures where cache lines are <= 64 bytes (e.g. most things
// except POWER).
template <typename T, int N, size_t ALIGN = 64> class yqueue_t
// except POWER). It is detected at build time to try to account for other
// platforms like POWER and s390x.
template <typename T, int N, size_t ALIGN = ZMQ_CACHELINE_SIZE> class yqueue_t
#else
template <typename T, int N> class yqueue_t
#endif