mirror of
https://github.com/zeromq/libzmq.git
synced 2025-10-29 20:59:47 +01:00
ZMQII-26: Use zero-copy for large messages (rx side)
This commit is contained in:
@@ -22,8 +22,11 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "err.hpp"
|
||||||
|
|
||||||
namespace zmq
|
namespace zmq
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -42,31 +45,80 @@ namespace zmq
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
inline decoder_t () :
|
inline decoder_t (size_t bufsize_) :
|
||||||
read_ptr (NULL),
|
read_pos (NULL),
|
||||||
to_read (0),
|
to_read (0),
|
||||||
next (NULL)
|
next (NULL),
|
||||||
|
bufsize (bufsize_)
|
||||||
{
|
{
|
||||||
|
buf = (unsigned char*) malloc (bufsize_);
|
||||||
|
zmq_assert (buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push the binary data to the decoder. Returns number of bytes
|
inline ~decoder_t ()
|
||||||
// actually parsed.
|
|
||||||
inline size_t write (unsigned char *data_, size_t size_)
|
|
||||||
{
|
{
|
||||||
|
free (buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a buffer to be filled with binary data.
|
||||||
|
inline void get_buffer (unsigned char **data_, size_t *size_)
|
||||||
|
{
|
||||||
|
// If we are expected to read large message, we'll opt for zero-
|
||||||
|
// copy, i.e. we'll ask caller to fill the data directly to the
|
||||||
|
// message. Note that subsequent read(s) are non-blocking, thus
|
||||||
|
// each single read reads at most SO_RCVBUF bytes at once not
|
||||||
|
// depending on how large is the chunk returned from here.
|
||||||
|
// As a consequence, large messages being received won't block
|
||||||
|
// other engines running in the same I/O thread for excessive
|
||||||
|
// amounts of time.
|
||||||
|
if (to_read >= bufsize) {
|
||||||
|
*data_ = read_pos;
|
||||||
|
*size_ = to_read;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
*data_ = buf;
|
||||||
|
*size_ = bufsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Processes the data in the buffer previously allocated using
|
||||||
|
// get_buffer function. size_ argument specifies nemuber of bytes
|
||||||
|
// actually filled into the buffer. Function returns number of
|
||||||
|
// bytes actually processed.
|
||||||
|
inline size_t process_buffer (unsigned char *data_, size_t size_)
|
||||||
|
{
|
||||||
|
// In case of zero-copy simply adjust the pointers, no copying
|
||||||
|
// is required. Also, run the state machine in case all the data
|
||||||
|
// were processed.
|
||||||
|
if (data_ == read_pos) {
|
||||||
|
read_pos += size_;
|
||||||
|
to_read -= size_;
|
||||||
|
|
||||||
|
while (!to_read)
|
||||||
|
if (!(static_cast <T*> (this)->*next) ())
|
||||||
|
return size_;
|
||||||
|
return size_;
|
||||||
|
}
|
||||||
|
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
size_t to_copy = std::min (to_read, size_ - pos);
|
|
||||||
if (read_ptr) {
|
// Try to get more space in the message to fill in.
|
||||||
memcpy (read_ptr, data_ + pos, to_copy);
|
// If none is available, return.
|
||||||
read_ptr += to_copy;
|
|
||||||
}
|
|
||||||
pos += to_copy;
|
|
||||||
to_read -= to_copy;
|
|
||||||
while (!to_read)
|
while (!to_read)
|
||||||
if (!(static_cast <T*> (this)->*next) ())
|
if (!(static_cast <T*> (this)->*next) ())
|
||||||
return pos;
|
return pos;
|
||||||
|
|
||||||
|
// If there are no more data in the buffer, return.
|
||||||
if (pos == size_)
|
if (pos == size_)
|
||||||
return pos;
|
return pos;
|
||||||
|
|
||||||
|
// Copy the data from buffer to the message.
|
||||||
|
size_t to_copy = std::min (to_read, size_ - pos);
|
||||||
|
memcpy (read_pos, data_ + pos, to_copy);
|
||||||
|
read_pos += to_copy;
|
||||||
|
pos += to_copy;
|
||||||
|
to_read -= to_copy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,20 +130,23 @@ namespace zmq
|
|||||||
|
|
||||||
// This function should be called from derived class to read data
|
// This function should be called from derived class to read data
|
||||||
// from the buffer and schedule next state machine action.
|
// from the buffer and schedule next state machine action.
|
||||||
inline void next_step (void *read_ptr_, size_t to_read_,
|
inline void next_step (void *read_pos_, size_t to_read_,
|
||||||
step_t next_)
|
step_t next_)
|
||||||
{
|
{
|
||||||
read_ptr = (unsigned char*) read_ptr_;
|
read_pos = (unsigned char*) read_pos_;
|
||||||
to_read = to_read_;
|
to_read = to_read_;
|
||||||
next = next_;
|
next = next_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
unsigned char *read_ptr;
|
unsigned char *read_pos;
|
||||||
size_t to_read;
|
size_t to_read;
|
||||||
step_t next;
|
step_t next;
|
||||||
|
|
||||||
|
size_t bufsize;
|
||||||
|
unsigned char *buf;
|
||||||
|
|
||||||
decoder_t (const decoder_t&);
|
decoder_t (const decoder_t&);
|
||||||
void operator = (const decoder_t&);
|
void operator = (const decoder_t&);
|
||||||
};
|
};
|
||||||
|
|||||||
107
src/encoder.hpp
107
src/encoder.hpp
@@ -22,8 +22,11 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "err.hpp"
|
||||||
|
|
||||||
namespace zmq
|
namespace zmq
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -35,68 +38,81 @@ namespace zmq
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
inline encoder_t ()
|
inline encoder_t (size_t bufsize_) :
|
||||||
|
bufsize (bufsize_)
|
||||||
{
|
{
|
||||||
|
buf = (unsigned char*) malloc (bufsize_);
|
||||||
|
zmq_assert (buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The function tries to fill the supplied chunk by binary data.
|
inline ~encoder_t ()
|
||||||
// If offset is not NULL, it is filled by offset of the first message
|
{
|
||||||
// in the batch. If there's no beginning of a message in the batch,
|
free (buf);
|
||||||
// offset is set to -1. Both data_ and size_ are in/out parameters.
|
}
|
||||||
// Upon exit, data_ contains actual position of the data read (may
|
|
||||||
// be different from the position requested) and size_ contains number
|
// The function returns a batch of binary data. If offset is not NULL,
|
||||||
// of bytes actually provided.
|
// it is filled by offset of the first message in the batch. If there's
|
||||||
inline void read (unsigned char **data_, size_t *size_,
|
// no beginning of a message in the batch, offset is set to -1.
|
||||||
|
inline void get_buffer (unsigned char **data_, size_t *size_,
|
||||||
int *offset_ = NULL)
|
int *offset_ = NULL)
|
||||||
{
|
{
|
||||||
int offset = -1;
|
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
|
if (offset_)
|
||||||
|
*offset_ = -1;
|
||||||
|
|
||||||
while (pos < *size_) {
|
while (true) {
|
||||||
|
|
||||||
// If we are able to fill whole buffer in a single go, let's
|
// If there are no more data to return, run the state machine.
|
||||||
// use zero-copy. There's no disadvantage to it as we cannot
|
// If there are still no data, return what we already have
|
||||||
// stuck multiple messages into the buffer anyway. Note that
|
// in the buffer.
|
||||||
// subsequent write(s) are non-blocking, thus each single
|
if (!to_write) {
|
||||||
// write writes at most SO_SNDBUF bytes at once not depending
|
if (!(static_cast <T*> (this)->*next) ()) {
|
||||||
// on how large is the chunk returned from here.
|
*data_ = buf;
|
||||||
|
*size_ = pos;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If beginning of the message was processed, adjust the
|
||||||
|
// first-message-offset.
|
||||||
|
if (beginning) {
|
||||||
|
if (offset_ && *offset_ == -1)
|
||||||
|
*offset_ = pos;
|
||||||
|
beginning = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are no data in the buffer yet and we are able to
|
||||||
|
// fill whole buffer in a single go, let's use zero-copy.
|
||||||
|
// There's no disadvantage to it as we cannot stuck multiple
|
||||||
|
// messages into the buffer anyway. Note that subsequent
|
||||||
|
// write(s) are non-blocking, thus each single write writes
|
||||||
|
// at most SO_SNDBUF bytes at once not depending on how large
|
||||||
|
// is the chunk returned from here.
|
||||||
// As a consequence, large messages being sent won't block
|
// As a consequence, large messages being sent won't block
|
||||||
// other engines running in the same I/O thread for excessive
|
// other engines running in the same I/O thread for excessive
|
||||||
// amounts of time.
|
// amounts of time.
|
||||||
if (pos == 0 && to_write >= *size_) {
|
if (!pos && to_write >= bufsize) {
|
||||||
*data_ = write_pos;
|
*data_ = write_pos;
|
||||||
write_pos += to_write;
|
*size_ = to_write;
|
||||||
pos = to_write;
|
write_pos = NULL;
|
||||||
to_write = 0;
|
to_write = 0;
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_write) {
|
// Copy data to the buffer. If the buffer is full, return.
|
||||||
|
size_t to_copy = std::min (to_write, bufsize - pos);
|
||||||
size_t to_copy = std::min (to_write, *size_ - pos);
|
memcpy (buf + pos, write_pos, to_copy);
|
||||||
memcpy (*data_ + pos, write_pos, to_copy);
|
pos += to_copy;
|
||||||
pos += to_copy;
|
write_pos += to_copy;
|
||||||
write_pos += to_copy;
|
to_write -= to_copy;
|
||||||
to_write -= to_copy;
|
if (pos == bufsize) {
|
||||||
}
|
*data_ = buf;
|
||||||
else {
|
*size_ = pos;
|
||||||
bool more = (static_cast <T*> (this)->*next) ();
|
return;
|
||||||
if (beginning && offset == -1) {
|
|
||||||
offset = pos;
|
|
||||||
beginning = false;
|
|
||||||
}
|
|
||||||
if (!more)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return offset of the first message in the buffer.
|
|
||||||
if (offset_)
|
|
||||||
*offset_ = offset;
|
|
||||||
|
|
||||||
// Return the size of the filled-in portion of the buffer.
|
|
||||||
*size_ = pos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
// Prototype of state machine action.
|
// Prototype of state machine action.
|
||||||
@@ -121,6 +137,9 @@ namespace zmq
|
|||||||
step_t next;
|
step_t next;
|
||||||
bool beginning;
|
bool beginning;
|
||||||
|
|
||||||
|
size_t bufsize;
|
||||||
|
unsigned char *buf;
|
||||||
|
|
||||||
encoder_t (const encoder_t&);
|
encoder_t (const encoder_t&);
|
||||||
void operator = (const encoder_t&);
|
void operator = (const encoder_t&);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -22,7 +22,8 @@
|
|||||||
#include "wire.hpp"
|
#include "wire.hpp"
|
||||||
#include "err.hpp"
|
#include "err.hpp"
|
||||||
|
|
||||||
zmq::zmq_decoder_t::zmq_decoder_t () :
|
zmq::zmq_decoder_t::zmq_decoder_t (size_t bufsize_) :
|
||||||
|
decoder_t <zmq_decoder_t> (bufsize_),
|
||||||
destination (NULL)
|
destination (NULL)
|
||||||
{
|
{
|
||||||
zmq_msg_init (&in_progress);
|
zmq_msg_init (&in_progress);
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ namespace zmq
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
zmq_decoder_t ();
|
zmq_decoder_t (size_t bufsize_);
|
||||||
~zmq_decoder_t ();
|
~zmq_decoder_t ();
|
||||||
|
|
||||||
void set_inout (struct i_inout *destination_);
|
void set_inout (struct i_inout *destination_);
|
||||||
|
|||||||
@@ -21,7 +21,8 @@
|
|||||||
#include "i_inout.hpp"
|
#include "i_inout.hpp"
|
||||||
#include "wire.hpp"
|
#include "wire.hpp"
|
||||||
|
|
||||||
zmq::zmq_encoder_t::zmq_encoder_t () :
|
zmq::zmq_encoder_t::zmq_encoder_t (size_t bufsize_) :
|
||||||
|
encoder_t <zmq_encoder_t> (bufsize_),
|
||||||
source (NULL)
|
source (NULL)
|
||||||
{
|
{
|
||||||
zmq_msg_init (&in_progress);
|
zmq_msg_init (&in_progress);
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ namespace zmq
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
zmq_encoder_t ();
|
zmq_encoder_t (size_t bufsize_);
|
||||||
~zmq_encoder_t ();
|
~zmq_encoder_t ();
|
||||||
|
|
||||||
void set_inout (struct i_inout *source_);
|
void set_inout (struct i_inout *source_);
|
||||||
|
|||||||
@@ -27,21 +27,15 @@
|
|||||||
zmq::zmq_engine_t::zmq_engine_t (io_thread_t *parent_, fd_t fd_,
|
zmq::zmq_engine_t::zmq_engine_t (io_thread_t *parent_, fd_t fd_,
|
||||||
const options_t &options_) :
|
const options_t &options_) :
|
||||||
io_object_t (parent_),
|
io_object_t (parent_),
|
||||||
inbuf (NULL),
|
inpos (NULL),
|
||||||
insize (0),
|
insize (0),
|
||||||
inpos (0),
|
decoder (in_batch_size),
|
||||||
outbuf (NULL),
|
outpos (NULL),
|
||||||
outsize (0),
|
outsize (0),
|
||||||
outpos (0),
|
encoder (out_batch_size),
|
||||||
inout (NULL),
|
inout (NULL),
|
||||||
options (options_)
|
options (options_)
|
||||||
{
|
{
|
||||||
// Allocate read & write buffer.
|
|
||||||
inbuf_storage = (unsigned char*) malloc (in_batch_size);
|
|
||||||
zmq_assert (inbuf_storage);
|
|
||||||
outbuf_storage = (unsigned char*) malloc (out_batch_size);
|
|
||||||
zmq_assert (outbuf_storage);
|
|
||||||
|
|
||||||
// Initialise the underlying socket.
|
// Initialise the underlying socket.
|
||||||
int rc = tcp_socket.open (fd_, options.sndbuf, options.rcvbuf);
|
int rc = tcp_socket.open (fd_, options.sndbuf, options.rcvbuf);
|
||||||
zmq_assert (rc == 0);
|
zmq_assert (rc == 0);
|
||||||
@@ -49,8 +43,6 @@ zmq::zmq_engine_t::zmq_engine_t (io_thread_t *parent_, fd_t fd_,
|
|||||||
|
|
||||||
zmq::zmq_engine_t::~zmq_engine_t ()
|
zmq::zmq_engine_t::~zmq_engine_t ()
|
||||||
{
|
{
|
||||||
free (outbuf_storage);
|
|
||||||
free (inbuf_storage);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void zmq::zmq_engine_t::plug (i_inout *inout_)
|
void zmq::zmq_engine_t::plug (i_inout *inout_)
|
||||||
@@ -80,13 +72,12 @@ void zmq::zmq_engine_t::unplug ()
|
|||||||
|
|
||||||
void zmq::zmq_engine_t::in_event ()
|
void zmq::zmq_engine_t::in_event ()
|
||||||
{
|
{
|
||||||
// If there's no data to process in the buffer, read new data.
|
// If there's no data to process in the buffer...
|
||||||
if (inpos == insize) {
|
if (!insize) {
|
||||||
|
|
||||||
// Read as much data as possible to the read buffer.
|
// Retrieve the buffer and read as much data as possible.
|
||||||
inbuf = inbuf_storage;
|
decoder.get_buffer (&inpos, &insize);
|
||||||
insize = tcp_socket.read (inbuf, in_batch_size);
|
insize = tcp_socket.read (inpos, insize);
|
||||||
inpos = 0;
|
|
||||||
|
|
||||||
// Check whether the peer has closed the connection.
|
// Check whether the peer has closed the connection.
|
||||||
if (insize == (size_t) -1) {
|
if (insize == (size_t) -1) {
|
||||||
@@ -96,15 +87,15 @@ void zmq::zmq_engine_t::in_event ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Following code should be executed even if there's not a single byte in
|
|
||||||
// the buffer. There still can be a decoded messages stored in the decoder.
|
|
||||||
|
|
||||||
// Push the data to the decoder.
|
// Push the data to the decoder.
|
||||||
int nbytes = decoder.write (inbuf + inpos, insize - inpos);
|
size_t processed = decoder.process_buffer (inpos, insize);
|
||||||
|
|
||||||
// Adjust read position. Stop polling for input if we got stuck.
|
// Adjust the buffer.
|
||||||
inpos += nbytes;
|
inpos += processed;
|
||||||
if (inpos < insize)
|
insize -= processed;
|
||||||
|
|
||||||
|
// Stop polling for input if we got stuck.
|
||||||
|
if (processed < insize)
|
||||||
reset_pollin (handle);
|
reset_pollin (handle);
|
||||||
|
|
||||||
// Flush all messages the decoder may have produced.
|
// Flush all messages the decoder may have produced.
|
||||||
@@ -114,31 +105,28 @@ void zmq::zmq_engine_t::in_event ()
|
|||||||
void zmq::zmq_engine_t::out_event ()
|
void zmq::zmq_engine_t::out_event ()
|
||||||
{
|
{
|
||||||
// If write buffer is empty, try to read new data from the encoder.
|
// If write buffer is empty, try to read new data from the encoder.
|
||||||
if (outpos == outsize) {
|
if (!outsize) {
|
||||||
|
encoder.get_buffer (&outpos, &outsize);
|
||||||
outbuf = outbuf_storage;
|
|
||||||
outsize = out_batch_size;
|
|
||||||
encoder.read (&outbuf, &outsize);
|
|
||||||
outpos = 0;
|
|
||||||
|
|
||||||
// If there is no data to send, stop polling for output.
|
// If there is no data to send, stop polling for output.
|
||||||
if (outsize == 0)
|
if (outsize == 0) {
|
||||||
reset_pollout (handle);
|
reset_pollout (handle);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are any data to write in write buffer, write as much as
|
// If there are any data to write in write buffer, write as much as
|
||||||
// possible to the socket.
|
// possible to the socket.
|
||||||
if (outpos < outsize) {
|
int nbytes = tcp_socket.write (outpos, outsize);
|
||||||
int nbytes = tcp_socket.write (outbuf + outpos, outsize - outpos);
|
|
||||||
|
|
||||||
// Handle problems with the connection.
|
// Handle problems with the connection.
|
||||||
if (nbytes == -1) {
|
if (nbytes == -1) {
|
||||||
error ();
|
error ();
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
outpos += nbytes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
outpos += nbytes;
|
||||||
|
outsize -= nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void zmq::zmq_engine_t::revive ()
|
void zmq::zmq_engine_t::revive ()
|
||||||
|
|||||||
@@ -57,21 +57,16 @@ namespace zmq
|
|||||||
tcp_socket_t tcp_socket;
|
tcp_socket_t tcp_socket;
|
||||||
handle_t handle;
|
handle_t handle;
|
||||||
|
|
||||||
unsigned char *inbuf_storage;
|
unsigned char *inpos;
|
||||||
unsigned char *inbuf;
|
|
||||||
size_t insize;
|
size_t insize;
|
||||||
size_t inpos;
|
zmq_decoder_t decoder;
|
||||||
|
|
||||||
unsigned char *outbuf_storage;
|
unsigned char *outpos;
|
||||||
unsigned char *outbuf;
|
|
||||||
size_t outsize;
|
size_t outsize;
|
||||||
size_t outpos;
|
zmq_encoder_t encoder;
|
||||||
|
|
||||||
i_inout *inout;
|
i_inout *inout;
|
||||||
|
|
||||||
zmq_encoder_t encoder;
|
|
||||||
zmq_decoder_t decoder;
|
|
||||||
|
|
||||||
options_t options;
|
options_t options;
|
||||||
|
|
||||||
zmq_engine_t (const zmq_engine_t&);
|
zmq_engine_t (const zmq_engine_t&);
|
||||||
|
|||||||
Reference in New Issue
Block a user