400 lines
10 KiB
Plaintext
400 lines
10 KiB
Plaintext
[/
|
|
/ Copyright (c) 2009 Helge Bahmann
|
|
/
|
|
/ Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
/ file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
/]
|
|
|
|
[section:example_reference_counters Reference counting]
|
|
|
|
The purpose of a ['reference counter] is to count the number
|
|
of pointers to an object. The object can be destroyed as
|
|
soon as the reference counter reaches zero.
|
|
|
|
[section Implementation]
|
|
|
|
[c++]
|
|
|
|
#include <boost/intrusive_ptr.hpp>
|
|
#include <boost/atomic.hpp>
|
|
|
|
class X {
|
|
public:
|
|
typedef boost::intrusive_ptr<X> pointer;
|
|
X() : refcount_(0) {}
|
|
|
|
private:
|
|
mutable boost::atomic<int> refcount_;
|
|
friend void intrusive_ptr_add_ref(const X * x)
|
|
{
|
|
x->refcount_.fetch_add(1, boost::memory_order_relaxed);
|
|
}
|
|
friend void intrusive_ptr_release(const X * x)
|
|
{
|
|
if (x->refcount_.fetch_sub(1, boost::memory_order_release) == 1) {
|
|
boost::atomic_thread_fence(boost::memory_order_acquire);
|
|
delete x;
|
|
}
|
|
}
|
|
};
|
|
|
|
[endsect]
|
|
|
|
[section Usage]
|
|
|
|
[c++]
|
|
|
|
X::pointer x = new X;
|
|
|
|
[endsect]
|
|
|
|
[section Discussion]
|
|
|
|
Increasing the reference counter can always be done with
|
|
[^memory_order_relaxed]: New references to an object can only
|
|
be formed from an existing reference, and passing an existing
|
|
reference from one thread to another must already provide any
|
|
required synchronization.
|
|
|
|
It is important to enforce any possible access to the object in
|
|
one thread (through an existing reference) to ['happen before]
|
|
deleting the object in a different thread. This is achieved
|
|
by a "release" operation after dropping a reference (any
|
|
access to the object through this reference must obviously
|
|
happened before), and an "acquire" operation before
|
|
deleting the object.
|
|
|
|
It would be possible to use [^memory_order_acq_rel] for the
|
|
[^fetch_sub] operation, but this results in unneeded "acquire"
|
|
operations when the reference counter does not yet reach zero
|
|
and may impose a performance penalty.
|
|
|
|
[endsect]
|
|
|
|
[endsect]
|
|
|
|
[section:example_spinlock Spinlock]
|
|
|
|
The purpose of a ['spin lock] is to prevent multiple threads
|
|
from concurrently accessing a shared data structure. In contrast
|
|
to a mutex, threads will busy-wait and waste CPU cycles instead
|
|
of yielding the CPU to another thread. ['Do not use spinlocks
|
|
unless you are certain that you understand the consequences.]
|
|
|
|
[section Implementation]
|
|
|
|
[c++]
|
|
|
|
#include <boost/atomic.hpp>
|
|
|
|
class spinlock {
|
|
private:
|
|
typedef enum {Locked, Unlocked} LockState;
|
|
boost::atomic<LockState> state_;
|
|
|
|
public:
|
|
spinlock() : state_(Unlocked) {}
|
|
|
|
void lock()
|
|
{
|
|
while (state_.exchange(Locked, boost::memory_order_acquire) == Locked) {
|
|
/* busy-wait */
|
|
}
|
|
}
|
|
void unlock()
|
|
{
|
|
state_.store(Unlocked, boost::memory_order_release);
|
|
}
|
|
};
|
|
|
|
[endsect]
|
|
|
|
[section Usage]
|
|
|
|
[c++]
|
|
|
|
spinlock s;
|
|
|
|
s.lock();
|
|
// access data structure here
|
|
s.unlock();
|
|
|
|
[endsect]
|
|
|
|
[section Discussion]
|
|
|
|
The purpose of the spinlock is to make sure that one access
|
|
to the shared data structure always strictly "happens before"
|
|
another. The usage of acquire/release in lock/unlock is required
|
|
and sufficient to guarantee this ordering.
|
|
|
|
It would be correct to write the "lock" operation in the following
|
|
way:
|
|
|
|
[c++]
|
|
|
|
lock()
|
|
{
|
|
while (state_.exchange(Locked, boost::memory_order_relaxed) == Locked) {
|
|
/* busy-wait */
|
|
}
|
|
atomic_thread_fence(boost::memory_order_acquire);
|
|
}
|
|
|
|
This "optimization" is however a) useless and b) may in fact hurt:
|
|
a) Since the thread will be busily spinning on a blocked spinlock,
|
|
it does not matter if it will waste the CPU cycles with just
|
|
"exchange" operations or with both useless "exchange" and "acquire"
|
|
operations. b) A tight "exchange" loop without any
|
|
memory-synchronizing instruction introduced through an "acquire"
|
|
operation will on some systems monopolize the memory subsystem
|
|
and degrade the performance of other system components.
|
|
|
|
[endsect]
|
|
|
|
[endsect]
|
|
|
|
[section:singleton Singleton with double-checked locking pattern]
|
|
|
|
The purpose of the ['Singleton with double-checked locking pattern] is to ensure
|
|
that at most one instance of a particular object is created.
|
|
If one instance has been created already, access to the existing
|
|
object should be as light-weight as possible.
|
|
|
|
[section Implementation]
|
|
|
|
[c++]
|
|
|
|
#include <boost/atomic.hpp>
|
|
#include <boost/thread/mutex.hpp>
|
|
|
|
class X {
|
|
public:
|
|
static X * instance()
|
|
{
|
|
X * tmp = instance_.load(boost::memory_order_consume);
|
|
if (!tmp) {
|
|
boost::mutex::scoped_lock guard(instantiation_mutex);
|
|
tmp = instance_.load(boost::memory_order_consume);
|
|
if (!tmp) {
|
|
tmp = new X;
|
|
instance_.store(tmp, boost::memory_order_release);
|
|
}
|
|
}
|
|
return tmp;
|
|
}
|
|
private:
|
|
static boost::atomic<X *> instance_;
|
|
static boost::mutex instantiation_mutex;
|
|
};
|
|
|
|
boost::atomic<X *> X::instance_(0);
|
|
|
|
[endsect]
|
|
|
|
[section Usage]
|
|
|
|
[c++]
|
|
|
|
X * x = X::instance();
|
|
// dereference x
|
|
|
|
[endsect]
|
|
|
|
[section Discussion]
|
|
|
|
The mutex makes sure that only one instance of the object is
|
|
ever created. The [^instance] method must make sure that any
|
|
dereference of the object strictly "happens after" creating
|
|
the instance in another thread. The use of [^memory_order_release]
|
|
after creating and initializing the object and [^memory_order_consume]
|
|
before dereferencing the object provides this guarantee.
|
|
|
|
It would be permissible to use [^memory_order_acquire] instead of
|
|
[^memory_order_consume], but this provides a stronger guarantee
|
|
than is required since only operations depending on the value of
|
|
the pointer need to be ordered.
|
|
|
|
[endsect]
|
|
|
|
[endsect]
|
|
|
|
[section:example_ringbuffer Wait-free ring buffer]
|
|
|
|
A ['wait-free ring buffer] provides a mechanism for relaying objects
|
|
from one single "producer" thread to one single "consumer" thread without
|
|
any locks. The operations on this data structure are "wait-free" which
|
|
means that each operation finishes within a constant number of steps.
|
|
This makes this data structure suitable for use in hard real-time systems
|
|
or for communication with interrupt/signal handlers.
|
|
|
|
[section Implementation]
|
|
|
|
[c++]
|
|
|
|
#include <boost/atomic.hpp>
|
|
|
|
template<typename T, size_t Size>
|
|
class ringbuffer {
|
|
public:
|
|
ringbuffer() : head_(0), tail_(0) {}
|
|
|
|
bool push(const T & value)
|
|
{
|
|
size_t head = head_.load(boost::memory_order_relaxed);
|
|
size_t next_head = next(head);
|
|
if (next_head == tail_.load(boost::memory_order_acquire))
|
|
return false;
|
|
ring_[head] = value;
|
|
head_.store(next_head, boost::memory_order_release);
|
|
return true;
|
|
}
|
|
bool pop(T & value)
|
|
{
|
|
size_t tail = tail_.load(boost::memory_order_relaxed);
|
|
if (tail == head_.load(boost::memory_order_acquire))
|
|
return false;
|
|
value = ring_[tail];
|
|
tail_.store(next(tail), boost::memory_order_release);
|
|
return true;
|
|
}
|
|
private:
|
|
size_t next(size_t current)
|
|
{
|
|
return (current + 1) % Size;
|
|
}
|
|
T ring_[Size];
|
|
boost::atomic<size_t> head_, tail_;
|
|
};
|
|
|
|
[endsect]
|
|
|
|
[section Usage]
|
|
|
|
[c++]
|
|
|
|
ringbuffer<int, 32> r;
|
|
|
|
// try to insert an element
|
|
if (r.push(42)) { /* succeeded */ }
|
|
else { /* buffer full */ }
|
|
|
|
// try to retrieve an element
|
|
int value;
|
|
if (r.pop(value)) { /* succeeded */ }
|
|
else { /* buffer empty */ }
|
|
|
|
[endsect]
|
|
|
|
[section Discussion]
|
|
|
|
The implementation makes sure that the ring indices do
|
|
not "lap-around" each other to ensure that no elements
|
|
are either lost or read twice.
|
|
|
|
Furthermore it must guarantee that read-access to a
|
|
particular object in [^pop] "happens after" it has been
|
|
written in [^push]. This is achieved by writing [^head_ ]
|
|
with "release" and reading it with "acquire". Conversely
|
|
the implementation also ensures that read access to
|
|
a particular ring element "happens before" before
|
|
rewriting this element with a new value by accessing [^tail_]
|
|
with appropriate ordering constraints.
|
|
|
|
[endsect]
|
|
|
|
[endsect]
|
|
|
|
[section:mp_queue Lock-free multi-producer queue]
|
|
|
|
The purpose of the ['lock-free multi-producer queue] is to allow
|
|
an arbitrary number of producers to enqueue objects which are
|
|
retrieved and processed in FIFO order by a single consumer.
|
|
|
|
[section Implementation]
|
|
|
|
[c++]
|
|
|
|
template<typename T>
|
|
class lockfree_queue {
|
|
public:
|
|
struct node {
|
|
T data;
|
|
node * next;
|
|
};
|
|
void push(const T &data)
|
|
{
|
|
node * n = new node;
|
|
n->data = data;
|
|
node * stale_head = head_.load(boost::memory_order_relaxed);
|
|
do {
|
|
n->next = stale_head;
|
|
} while (!head_.compare_exchange_weak(stale_head, n, boost::memory_order_release));
|
|
}
|
|
|
|
node * pop_all(void)
|
|
{
|
|
T * last = pop_all_reverse(), * first = 0;
|
|
while(last) {
|
|
T * tmp = last;
|
|
last = last->next;
|
|
tmp->next = first;
|
|
first = tmp;
|
|
}
|
|
return first;
|
|
}
|
|
|
|
lockfree_queue() : head_(0) {}
|
|
|
|
// alternative interface if ordering is of no importance
|
|
node * pop_all_reverse(void)
|
|
{
|
|
return head_.exchange(0, boost::memory_order_consume);
|
|
}
|
|
private:
|
|
boost::atomic<node *> head_;
|
|
};
|
|
|
|
[endsect]
|
|
|
|
[section Usage]
|
|
|
|
[c++]
|
|
|
|
lockfree_queue<int> q;
|
|
|
|
// insert elements
|
|
q.push(42);
|
|
q.push(2);
|
|
|
|
// pop elements
|
|
lockfree_queue<int>::node * x = q.pop_all()
|
|
while(x) {
|
|
X * tmp = x;
|
|
x = x->next;
|
|
// process tmp->data, probably delete it afterwards
|
|
delete tmp;
|
|
}
|
|
|
|
[endsect]
|
|
|
|
[section Discussion]
|
|
|
|
The implementation guarantees that all objects enqueued are
|
|
processed in the order they were enqueued by building a singly-linked
|
|
list of object in reverse processing order. The queue is atomically
|
|
emptied by the consumer (in an operation that is not only lock-free but
|
|
wait-free) and brought into correct order.
|
|
|
|
It must be guaranteed that any access to an object to be enqueued
|
|
by the producer "happens before" any access by the consumer. This
|
|
is assured by inserting objects into the list with ['release] and
|
|
dequeuing them with ['consume] memory order. It is not
|
|
necessary to use ['acquire] memory order in [^waitfree_queue::pop_all]
|
|
because all operations involved depend on the value of
|
|
the atomic pointer through dereference.
|
|
|
|
[endsect]
|
|
|
|
[endsect]
|