Align data payload for better performance.

This commit is contained in:
Andy Polyakov 2006-10-20 11:26:00 +00:00
parent 1e7b6c029c
commit a4d64c7f49
3 changed files with 126 additions and 56 deletions

View File

@ -589,13 +589,17 @@ int ssl_verify_alarm_type(long type)
int ssl3_setup_buffers(SSL *s) int ssl3_setup_buffers(SSL *s)
{ {
unsigned char *p; unsigned char *p;
size_t len; size_t len,align=0;
#if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0
align = (-SSL3_RT_HEADER_LENGTH)&(SSL3_ALIGN_PAYLOAD-1);
#endif
if (s->s3->rbuf.buf == NULL) if (s->s3->rbuf.buf == NULL)
{ {
len = SSL3_RT_MAX_PLAIN_LENGTH len = SSL3_RT_MAX_PLAIN_LENGTH
+ SSL3_RT_MAX_ENCRYPTED_OVERHEAD + SSL3_RT_MAX_ENCRYPTED_OVERHEAD
+ SSL3_RT_HEADER_LENGTH; + SSL3_RT_HEADER_LENGTH + align;
if (s->options & SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER) if (s->options & SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER)
{ {
s->s3->init_extra = 1; s->s3->init_extra = 1;
@ -615,13 +619,13 @@ int ssl3_setup_buffers(SSL *s)
{ {
len = s->max_send_fragment len = s->max_send_fragment
+ SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD
+ SSL3_RT_HEADER_LENGTH; + SSL3_RT_HEADER_LENGTH + align;
#ifndef OPENSSL_NO_COMP #ifndef OPENSSL_NO_COMP
if (!(s->options & SSL_OP_NO_COMPRESSION)) if (!(s->options & SSL_OP_NO_COMPRESSION))
len += SSL3_RT_MAX_COMPRESSED_OVERHEAD; len += SSL3_RT_MAX_COMPRESSED_OVERHEAD;
#endif #endif
if (!(s->options & SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS)) if (!(s->options & SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS))
len += SSL3_RT_HEADER_LENGTH len += SSL3_RT_HEADER_LENGTH + align
+ SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD; + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD;
if ((p=OPENSSL_malloc(len)) == NULL) if ((p=OPENSSL_malloc(len)) == NULL)
goto err; goto err;

View File

@ -129,14 +129,44 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
* (If s->read_ahead is set, 'max' bytes may be stored in rbuf * (If s->read_ahead is set, 'max' bytes may be stored in rbuf
* [plus s->packet_length bytes if extend == 1].) * [plus s->packet_length bytes if extend == 1].)
*/ */
int i,off,newb; int i,len,left,align=0;
unsigned char *pkt;
SSL3_BUFFER *rb;
if (n <= 0) return n;
rb = &(s->s3->rbuf);
left = rb->left;
#if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0
align = (int)rb->buf + SSL3_RT_HEADER_LENGTH;
align = (-align)&(SSL3_ALIGN_PAYLOAD-1);
#endif
if (!extend) if (!extend)
{ {
/* start with empty packet ... */ /* start with empty packet ... */
if (s->s3->rbuf.left == 0) if (left == 0)
s->s3->rbuf.offset = 0; rb->offset = align;
s->packet = s->s3->rbuf.buf + s->s3->rbuf.offset; else if (align != 0 && left >= SSL3_RT_HEADER_LENGTH)
{
/* check if next packet length is large
* enough to justify payload alignment... */
pkt = rb->buf + rb->offset;
if (pkt[0] == SSL3_RT_APPLICATION_DATA
&& (pkt[3]<<8|pkt[4]) >= 128)
{
/* Note that even if packet is corrupted
* and its length field is insane, we can
* only be led to wrong decision about
* whether memmove will occur or not.
* Header values has no effect on memmove
* arguments and therefore no buffer
* overrun can be triggered. */
memmove (rb->buf+align,pkt,left);
rb->offset = align;
}
}
s->packet = rb->buf + rb->offset;
s->packet_length = 0; s->packet_length = 0;
/* ... now we can act as if 'extend' was set */ /* ... now we can act as if 'extend' was set */
} }
@ -145,57 +175,54 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
if ( SSL_version(s) == DTLS1_VERSION && if ( SSL_version(s) == DTLS1_VERSION &&
extend) extend)
{ {
if ( s->s3->rbuf.left > 0 && n > s->s3->rbuf.left) if ( left > 0 && n > left)
n = s->s3->rbuf.left; n = left;
} }
/* if there is enough in the buffer from a previous read, take some */ /* if there is enough in the buffer from a previous read, take some */
if (s->s3->rbuf.left >= (int)n) if (left >= n)
{ {
s->packet_length+=n; s->packet_length+=n;
s->s3->rbuf.left-=n; rb->left=left-n;
s->s3->rbuf.offset+=n; rb->offset+=n;
return(n); return(n);
} }
/* else we need to read more data */ /* else we need to read more data */
if (!s->read_ahead)
max=n;
{ len = s->packet_length;
/* avoid buffer overflow */ pkt = rb->buf+align;
int max_max = s->s3->rbuf.len - s->packet_length; /* Move any available bytes to front of buffer:
if (max > max_max) * 'len' bytes already pointed to by 'packet',
max = max_max; * 'left' extra ones at the end */
} if (s->packet != pkt) /* len > 0 */
{
memmove(pkt, s->packet, len+left);
s->packet = pkt;
rb->offset = len + align;
}
max = rb->len - rb->offset;
if (n > max) /* does not happen */ if (n > max) /* does not happen */
{ {
SSLerr(SSL_F_SSL3_READ_N,ERR_R_INTERNAL_ERROR); SSLerr(SSL_F_SSL3_READ_N,ERR_R_INTERNAL_ERROR);
return -1; return -1;
} }
off = s->packet_length; if (!s->read_ahead)
newb = s->s3->rbuf.left; max=n;
/* Move any available bytes to front of buffer:
* 'off' bytes already pointed to by 'packet',
* 'newb' extra ones at the end */
if (s->packet != s->s3->rbuf.buf)
{
/* off > 0 */
memmove(s->s3->rbuf.buf, s->packet, off+newb);
s->packet = s->s3->rbuf.buf;
}
while (newb < n) while (left < n)
{ {
/* Now we have off+newb bytes at the front of s->s3->rbuf.buf and need /* Now we have len+left bytes at the front of s->s3->rbuf.buf
* to read in more until we have off+n (up to off+max if possible) */ * and need to read in more until we have len+n (up to
* len+max if possible) */
clear_sys_error(); clear_sys_error();
if (s->rbio != NULL) if (s->rbio != NULL)
{ {
s->rwstate=SSL_READING; s->rwstate=SSL_READING;
i=BIO_read(s->rbio, &(s->s3->rbuf.buf[off+newb]), max-newb); i=BIO_read(s->rbio,pkt+len+left, max-left);
} }
else else
{ {
@ -205,15 +232,15 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
if (i <= 0) if (i <= 0)
{ {
s->s3->rbuf.left = newb; rb->left = left;
return(i); return(i);
} }
newb+=i; left+=i;
} }
/* done reading, now the book-keeping */ /* done reading, now the book-keeping */
s->s3->rbuf.offset = off + n; rb->offset += n;
s->s3->rbuf.left = newb - n; rb->left = left - n;
s->packet_length += n; s->packet_length += n;
s->rwstate=SSL_NOTHING; s->rwstate=SSL_NOTHING;
return(n); return(n);
@ -579,14 +606,14 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
{ {
unsigned char *p,*plen; unsigned char *p,*plen;
int i,mac_size,clear=0; int i,mac_size,clear=0;
int prefix_len = 0; int prefix_len=0,align=0;
SSL3_RECORD *wr; SSL3_RECORD *wr;
SSL3_BUFFER *wb; SSL3_BUFFER *wb=&(s->s3->wbuf);
SSL_SESSION *sess; SSL_SESSION *sess;
/* first check if there is a SSL3_BUFFER still being written /* first check if there is a SSL3_BUFFER still being written
* out. This will happen with non blocking IO */ * out. This will happen with non blocking IO */
if (s->s3->wbuf.left != 0) if (wb->left != 0)
return(ssl3_write_pending(s,type,buf,len)); return(ssl3_write_pending(s,type,buf,len));
/* If we have an alert to send, lets send it */ /* If we have an alert to send, lets send it */
@ -602,7 +629,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
return 0; return 0;
wr= &(s->s3->wrec); wr= &(s->s3->wrec);
wb= &(s->s3->wbuf);
sess=s->session; sess=s->session;
if ( (sess == NULL) || if ( (sess == NULL) ||
@ -643,7 +669,32 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
s->s3->empty_fragment_done = 1; s->s3->empty_fragment_done = 1;
} }
p = wb->buf + prefix_len; if (create_empty_fragment)
{
#if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0
/* extra fragment would be couple of cipher blocks,
* which would be multiple of SSL3_ALIGN_PAYLOAD, so
* if we want to align the real payload, then we can
* just pretent we simply have two headers. */
align = (int)wb->buf + 2*SSL3_RT_HEADER_LENGTH;
align = (-align)&(SSL3_ALIGN_PAYLOAD-1);
#endif
p = wb->buf + align;
wb->offset = align;
}
else if (prefix_len)
{
p = wb->buf + wb->offset + prefix_len;
}
else
{
#if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0
align = (int)wb->buf + SSL3_RT_HEADER_LENGTH;
align = (-align)&(SSL3_ALIGN_PAYLOAD-1);
#endif
p = wb->buf + align;
wb->offset = align;
}
/* write the header */ /* write the header */
@ -714,7 +765,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
/* now let's set up wb */ /* now let's set up wb */
wb->left = prefix_len + wr->length; wb->left = prefix_len + wr->length;
wb->offset = 0;
/* memorize arguments so that ssl3_write_pending can detect bad write retries later */ /* memorize arguments so that ssl3_write_pending can detect bad write retries later */
s->s3->wpend_tot=len; s->s3->wpend_tot=len;
@ -733,6 +783,7 @@ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf,
unsigned int len) unsigned int len)
{ {
int i; int i;
SSL3_BUFFER *wb=&(s->s3->wbuf);
/* XXXX */ /* XXXX */
if ((s->s3->wpend_tot > (int)len) if ((s->s3->wpend_tot > (int)len)
@ -751,24 +802,25 @@ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf,
{ {
s->rwstate=SSL_WRITING; s->rwstate=SSL_WRITING;
i=BIO_write(s->wbio, i=BIO_write(s->wbio,
(char *)&(s->s3->wbuf.buf[s->s3->wbuf.offset]), (char *)&(wb->buf[wb->offset]),
(unsigned int)s->s3->wbuf.left); (unsigned int)wb->left);
} }
else else
{ {
SSLerr(SSL_F_SSL3_WRITE_PENDING,SSL_R_BIO_NOT_SET); SSLerr(SSL_F_SSL3_WRITE_PENDING,SSL_R_BIO_NOT_SET);
i= -1; i= -1;
} }
if (i == s->s3->wbuf.left) if (i == wb->left)
{ {
s->s3->wbuf.left=0; wb->left=0;
wb->offset+=i;
s->rwstate=SSL_NOTHING; s->rwstate=SSL_NOTHING;
return(s->s3->wpend_ret); return(s->s3->wpend_ret);
} }
else if (i <= 0) else if (i <= 0)
return(i); return(i);
s->s3->wbuf.offset+=i; wb->offset+=i;
s->s3->wbuf.left-=i; wb->left-=i;
} }
} }

View File

@ -248,12 +248,27 @@ extern "C" {
#define SSL3_SESSION_ID_SIZE 32 #define SSL3_SESSION_ID_SIZE 32
#define SSL3_RT_HEADER_LENGTH 5 #define SSL3_RT_HEADER_LENGTH 5
#ifndef SSL3_ALIGN_PAYLOAD
/* Some will argue that this increases memory footprint, but it's
* not actually true. Point is that malloc has to return at least
* 64-bit aligned pointers, meaning that allocating 5 bytes wastes
* 3 bytes in either case. Suggested pre-gaping simply moves these
* wasted bytes from the end of allocated region to its front,
* but makes data payload aligned, which improves performance:-) */
# define SSL3_ALIGN_PAYLOAD 8
#else
# if (SSL3_ALIGN_PAYLOAD&(SSL3_ALIGN_PAYLOAD-1))!=0
# error "insane SSL3_ALIGN_PAYLOAD"
# undef SSL3_ALIGN_PAYLOAD
# endif
#endif
/* This is the maximum MAC (digest) size used by the SSL library. /* This is the maximum MAC (digest) size used by the SSL library.
* Currently this is 20 when SHA1 is used. This must be updated if larger * Currently maximum of 20 is used by SHA1, but we reserve for
* digests are used in future. * future extension for 512-bit hashes.
*/ */
#define SSL3_RT_MAX_MD_SIZE 20 #define SSL3_RT_MAX_MD_SIZE 64
/* Maximum block size used in all ciphersuites. Currently 16 for AES. /* Maximum block size used in all ciphersuites. Currently 16 for AES.
*/ */
@ -292,7 +307,6 @@ extern "C" {
(SSL3_RT_MAX_ENCRYPTED_OVERHEAD+SSL3_RT_MAX_COMPRESSED_LENGTH) (SSL3_RT_MAX_ENCRYPTED_OVERHEAD+SSL3_RT_MAX_COMPRESSED_LENGTH)
#define SSL3_RT_MAX_PACKET_SIZE \ #define SSL3_RT_MAX_PACKET_SIZE \
(SSL3_RT_MAX_ENCRYPTED_LENGTH+SSL3_RT_HEADER_LENGTH) (SSL3_RT_MAX_ENCRYPTED_LENGTH+SSL3_RT_HEADER_LENGTH)
#define SSL3_RT_MAX_DATA_SIZE (1024*1024)
#define SSL3_MD_CLIENT_FINISHED_CONST "\x43\x4C\x4E\x54" #define SSL3_MD_CLIENT_FINISHED_CONST "\x43\x4C\x4E\x54"
#define SSL3_MD_SERVER_FINISHED_CONST "\x53\x52\x56\x52" #define SSL3_MD_SERVER_FINISHED_CONST "\x53\x52\x56\x52"