Command are now stored in ypipes instead of in socketpairs

Storing commands in OS socket buffers caused whole lot of
problems when free space in the buffer ran out. This patch
stores commands in ypipes instead and uses socketpair just
to signal the other thread, ie. at most one byte is stored
in the socketpair at any single instant.

Signed-off-by: Martin Sustrik <sustrik@250bpm.com>
This commit is contained in:
Martin Sustrik
2011-07-03 13:33:45 +02:00
parent de3838403b
commit 7c0c798120
7 changed files with 462 additions and 426 deletions

View File

@@ -18,439 +18,64 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "platform.hpp"
#if defined ZMQ_FORCE_SELECT
#define ZMQ_RCVTIMEO_BASED_ON_SELECT
#elif defined ZMQ_FORCE_POLL
#define ZMQ_RCVTIMEO_BASED_ON_POLL
#elif defined ZMQ_HAVE_LINUX || defined ZMQ_HAVE_FREEBSD ||\
defined ZMQ_HAVE_OPENBSD || defined ZMQ_HAVE_SOLARIS ||\
defined ZMQ_HAVE_OSX || defined ZMQ_HAVE_QNXNTO ||\
defined ZMQ_HAVE_HPUX || defined ZMQ_HAVE_AIX ||\
defined ZMQ_HAVE_NETBSD
#define ZMQ_RCVTIMEO_BASED_ON_POLL
#elif defined ZMQ_HAVE_WINDOWS || defined ZMQ_HAVE_OPENVMS
#define ZMQ_RCVTIMEO_BASED_ON_SELECT
#endif
// On AIX, poll.h has to be included before zmq.h to get consistent
// definition of pollfd structure (AIX uses 'reqevents' and 'retnevents'
// instead of 'events' and 'revents' and defines macros to map from POSIX-y
// names to AIX-specific names).
#if defined ZMQ_RCVTIMEO_BASED_ON_POLL
#include <poll.h>
#elif defined ZMQ_RCVTIMEO_BASED_ON_SELECT
#if defined ZMQ_HAVE_WINDOWS
#include "windows.hpp"
#elif defined ZMQ_HAVE_HPUX
#include <sys/param.h>
#include <sys/types.h>
#include <sys/time.h>
#elif defined ZMQ_HAVE_OPENVMS
#include <sys/types.h>
#include <sys/time.h>
#else
#include <sys/select.h>
#endif
#endif
#include "mailbox.hpp"
#include "err.hpp"
#include "fd.hpp"
#include "ip.hpp"
#if defined ZMQ_HAVE_WINDOWS
#include "windows.hpp"
#else
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <netinet/tcp.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#endif
zmq::mailbox_t::mailbox_t ()
{
// Get the pipe into passive state. That way, if the users starts by
// polling on the associated file descriptor it will get woken up when
// new command is posted.
bool ok = cpipe.read (NULL);
zmq_assert (!ok);
active = false;
}
zmq::mailbox_t::~mailbox_t ()
{
// TODO: Retrieve and deallocate commands inside the cpipe.
}
zmq::fd_t zmq::mailbox_t::get_fd ()
{
return r;
}
#if defined ZMQ_HAVE_WINDOWS
zmq::mailbox_t::mailbox_t () :
blocking (true)
{
// Create the socketpair for signalling.
int rc = make_socketpair (&r, &w);
errno_assert (rc == 0);
// Set the writer to non-blocking mode.
unsigned long argp = 1;
rc = ioctlsocket (w, FIONBIO, &argp);
wsa_assert (rc != SOCKET_ERROR);
}
zmq::mailbox_t::~mailbox_t ()
{
int rc = closesocket (w);
wsa_assert (rc != SOCKET_ERROR);
rc = closesocket (r);
wsa_assert (rc != SOCKET_ERROR);
return signaler.get_fd ();
}
void zmq::mailbox_t::send (const command_t &cmd_)
{
// TODO: Implement SNDBUF auto-resizing as for POSIX platforms.
// In the mean time, the following code with assert if the send()
// call would block.
int nbytes = ::send (w, (char *)&cmd_, sizeof (command_t), 0);
wsa_assert (nbytes != SOCKET_ERROR);
zmq_assert (nbytes == sizeof (command_t));
sync.lock ();
cpipe.write (cmd_, false);
bool ok = cpipe.flush ();
sync.unlock ();
if (!ok)
signaler.send ();
}
int zmq::mailbox_t::recv (command_t *cmd_, int timeout_)
{
// If there's a finite timeout, poll on the fd.
if (timeout_ > 0)
return recv_timeout (cmd_, timeout_);
// Try to get the command straight away.
if (active) {
bool ok = cpipe.read (cmd_);
if (ok)
return 0;
// If required, switch the reader to blocking or non-blocking mode.
if ((timeout_ < 0 && !blocking) || (timeout_ == 0 && blocking)) {
blocking = (timeout_ < 0);
unsigned long argp = blocking ? 0 : 1;
int rc = ioctlsocket (r, FIONBIO, &argp);
wsa_assert (rc != SOCKET_ERROR);
// If there are no more commands available, switch into passive state.
active = false;
signaler.recv ();
}
// Attempt to read an entire command.
int nbytes = ::recv (r, (char*) cmd_, sizeof (command_t), 0);
if (nbytes == -1 && WSAGetLastError () == WSAEWOULDBLOCK) {
errno = EAGAIN;
// Wait for signal from the command sender.
int rc = signaler.wait (timeout_);
if (rc != 0 && errno == EAGAIN)
return -1;
}
// Sanity check for success.
wsa_assert (nbytes != SOCKET_ERROR);
// We've got the signal. Now we can switch into active state.
active = true;
// Check whether we haven't got half of command.
zmq_assert (nbytes == sizeof (command_t));
return 0;
}
#else
zmq::mailbox_t::mailbox_t () :
blocking (true)
{
#ifdef PIPE_BUF
// Make sure that command can be written to the socket in atomic fashion.
// If this wasn't guaranteed, commands from different threads would be
// interleaved.
zmq_assert (sizeof (command_t) <= PIPE_BUF);
#endif
// Create the socketpair for signaling.
int rc = make_socketpair (&r, &w);
// Get a command.
errno_assert (rc == 0);
// Set the writer to non-blocking mode.
int flags = fcntl (w, F_GETFL, 0);
errno_assert (flags >= 0);
rc = fcntl (w, F_SETFL, flags | O_NONBLOCK);
errno_assert (rc == 0);
}
zmq::mailbox_t::~mailbox_t ()
{
close (w);
close (r);
}
void zmq::mailbox_t::send (const command_t &cmd_)
{
// Attempt to write an entire command without blocking.
ssize_t nbytes;
do {
nbytes = ::send (w, &cmd_, sizeof (command_t), 0);
} while (nbytes == -1 && errno == EINTR);
// Attempt to increase mailbox SNDBUF if the send failed.
if (nbytes == -1 && errno == EAGAIN) {
int old_sndbuf, new_sndbuf;
socklen_t sndbuf_size = sizeof old_sndbuf;
// Retrieve current send buffer size.
int rc = getsockopt (w, SOL_SOCKET, SO_SNDBUF, &old_sndbuf,
&sndbuf_size);
errno_assert (rc == 0);
new_sndbuf = old_sndbuf * 2;
// Double the new send buffer size.
rc = setsockopt (w, SOL_SOCKET, SO_SNDBUF, &new_sndbuf, sndbuf_size);
errno_assert (rc == 0);
// Verify that the OS actually honored the request.
rc = getsockopt (w, SOL_SOCKET, SO_SNDBUF, &new_sndbuf, &sndbuf_size);
errno_assert (rc == 0);
zmq_assert (new_sndbuf > old_sndbuf);
// Retry the sending operation; at this point it must succeed.
do {
nbytes = ::send (w, &cmd_, sizeof (command_t), 0);
} while (nbytes == -1 && errno == EINTR);
}
errno_assert (nbytes != -1);
// This should never happen as we've already checked that command size is
// less than PIPE_BUF.
zmq_assert (nbytes == sizeof (command_t));
}
int zmq::mailbox_t::recv (command_t *cmd_, int timeout_)
{
// If there's a finite timeout, poll on the fd.
if (timeout_ > 0)
return recv_timeout (cmd_, timeout_);
#ifdef MSG_DONTWAIT
// Attempt to read an entire command. Returns EAGAIN if non-blocking
// mode is requested and a command is not available.
ssize_t nbytes = ::recv (r, cmd_, sizeof (command_t),
timeout_ < 0 ? 0 : MSG_DONTWAIT);
if (nbytes == -1 && (errno == EAGAIN || errno == EINTR))
return -1;
#else
// If required, switch the reader to blocking or non-blocking mode.
if ((timeout_ < 0 && !blocking) || (timeout_ == 0 && blocking)) {
blocking = (timeout_ < 0);
int flags = fcntl (r, F_GETFL, 0);
errno_assert (flags >= 0);
int rc = fcntl (r, F_SETFL,
blocking ? flags | O_NONBLOCK : flags & ~O_NONBLOCK);
errno_assert (rc == 0);
}
// Attempt to read an entire command.
ssize_t nbytes = ::recv (r, cmd_, sizeof (command_t), 0);
if (nbytes == -1 && (errno == EAGAIN || errno == EINTR))
return -1;
#endif
// Sanity check for success.
errno_assert (nbytes != -1);
// Check whether we haven't got half of command.
zmq_assert (nbytes == sizeof (command_t));
bool ok = cpipe.read (cmd_);
zmq_assert (ok);
return 0;
}
#endif
int zmq::mailbox_t::make_socketpair (fd_t *r_, fd_t *w_)
{
#if defined ZMQ_HAVE_WINDOWS
// Windows has no 'socketpair' function. CreatePipe is no good as pipe
// handles cannot be polled on. Here we create the socketpair by hand.
*w_ = INVALID_SOCKET;
*r_ = INVALID_SOCKET;
// Create listening socket.
SOCKET listener;
listener = socket (AF_INET, SOCK_STREAM, 0);
wsa_assert (listener != INVALID_SOCKET);
// Set SO_REUSEADDR and TCP_NODELAY on listening socket.
BOOL so_reuseaddr = 1;
int rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR,
(char *)&so_reuseaddr, sizeof (so_reuseaddr));
wsa_assert (rc != SOCKET_ERROR);
BOOL tcp_nodelay = 1;
rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY,
(char *)&tcp_nodelay, sizeof (tcp_nodelay));
wsa_assert (rc != SOCKET_ERROR);
// Bind listening socket to any free local port.
struct sockaddr_in addr;
memset (&addr, 0, sizeof (addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
addr.sin_port = 0;
rc = bind (listener, (const struct sockaddr*) &addr, sizeof (addr));
wsa_assert (rc != SOCKET_ERROR);
// Retrieve local port listener is bound to (into addr).
int addrlen = sizeof (addr);
rc = getsockname (listener, (struct sockaddr*) &addr, &addrlen);
wsa_assert (rc != SOCKET_ERROR);
// Listen for incomming connections.
rc = listen (listener, 1);
wsa_assert (rc != SOCKET_ERROR);
// Create the writer socket.
*w_ = WSASocket (AF_INET, SOCK_STREAM, 0, NULL, 0, 0);
wsa_assert (*w_ != INVALID_SOCKET);
// Set TCP_NODELAY on writer socket.
rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY,
(char *)&tcp_nodelay, sizeof (tcp_nodelay));
wsa_assert (rc != SOCKET_ERROR);
// Connect writer to the listener.
rc = connect (*w_, (sockaddr *) &addr, sizeof (addr));
wsa_assert (rc != SOCKET_ERROR);
// Accept connection from writer.
*r_ = accept (listener, NULL, NULL);
wsa_assert (*r_ != INVALID_SOCKET);
// We don't need the listening socket anymore. Close it.
rc = closesocket (listener);
wsa_assert (rc != SOCKET_ERROR);
return 0;
#elif defined ZMQ_HAVE_OPENVMS
// Whilst OpenVMS supports socketpair - it maps to AF_INET only. Further,
// it does not set the socket options TCP_NODELAY and TCP_NODELACK which
// can lead to performance problems.
//
// The bug will be fixed in V5.6 ECO4 and beyond. In the meantime, we'll
// create the socket pair manually.
sockaddr_in lcladdr;
memset (&lcladdr, 0, sizeof (lcladdr));
lcladdr.sin_family = AF_INET;
lcladdr.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
lcladdr.sin_port = 0;
int listener = socket (AF_INET, SOCK_STREAM, 0);
errno_assert (listener != -1);
int on = 1;
int rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on));
errno_assert (rc != -1);
rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on));
errno_assert (rc != -1);
rc = bind(listener, (struct sockaddr*) &lcladdr, sizeof (lcladdr));
errno_assert (rc != -1);
socklen_t lcladdr_len = sizeof (lcladdr);
rc = getsockname (listener, (struct sockaddr*) &lcladdr, &lcladdr_len);
errno_assert (rc != -1);
rc = listen (listener, 1);
errno_assert (rc != -1);
*w_ = socket (AF_INET, SOCK_STREAM, 0);
errno_assert (*w_ != -1);
rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on));
errno_assert (rc != -1);
rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on));
errno_assert (rc != -1);
rc = connect (*w_, (struct sockaddr*) &lcladdr, sizeof (lcladdr));
errno_assert (rc != -1);
*r_ = accept (listener, NULL, NULL);
errno_assert (*r_ != -1);
close (listener);
return 0;
#else // All other implementations support socketpair()
int sv [2];
int rc = socketpair (AF_UNIX, SOCK_STREAM, 0, sv);
errno_assert (rc == 0);
*w_ = sv [0];
*r_ = sv [1];
return 0;
#endif
}
int zmq::mailbox_t::recv_timeout (command_t *cmd_, int timeout_)
{
#ifdef ZMQ_RCVTIMEO_BASED_ON_POLL
struct pollfd pfd;
pfd.fd = r;
pfd.events = POLLIN;
int rc = poll (&pfd, 1, timeout_);
if (unlikely (rc < 0)) {
zmq_assert (errno == EINTR);
return -1;
}
else if (unlikely (rc == 0)) {
errno = EAGAIN;
return -1;
}
zmq_assert (rc == 1);
zmq_assert (pfd.revents & POLLIN);
#elif defined ZMQ_RCVTIMEO_BASED_ON_SELECT
fd_set fds;
FD_ZERO (&fds);
FD_SET (r, &fds);
struct timeval timeout;
timeout.tv_sec = timeout_ / 1000;
timeout.tv_usec = timeout_ % 1000 * 1000;
#ifdef ZMQ_HAVE_WINDOWS
int rc = select (0, &fds, NULL, NULL, &timeout);
wsa_assert (rc != SOCKET_ERROR);
#else
int rc = select (r + 1, &fds, NULL, NULL, &timeout);
if (unlikely (rc < 0)) {
zmq_assert (errno == EINTR);
return -1;
}
#endif
if (unlikely (rc == 0)) {
errno = EAGAIN;
return -1;
}
zmq_assert (rc == 1);
#else
#error
#endif
// The file descriptor is ready for reading. Extract one command out of it.
#ifdef ZMQ_HAVE_WINDOWS
int nbytes = ::recv (r, (char*) cmd_, sizeof (command_t), 0);
wsa_assert (nbytes != SOCKET_ERROR);
#else
ssize_t nbytes = ::recv (r, cmd_, sizeof (command_t), 0);
if (unlikely (rc < 0 && errno == EINTR))
return -1;
errno_assert (nbytes > 0);
#endif
zmq_assert (nbytes == sizeof (command_t));
return 0;
}
#if defined ZMQ_RCVTIMEO_BASED_ON_SELECT
#undef ZMQ_RCVTIMEO_BASED_ON_SELECT
#endif
#if defined ZMQ_RCVTIMEO_BASED_ON_POLL
#undef ZMQ_RCVTIMEO_BASED_ON_POLL
#endif