From 8bfcae65ef9f7835b447a1e210c99ec8f5ac6198 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Wed, 27 Oct 2004 21:46:11 +0000 Subject: [PATCH] Dan Fandrich's gzip handling fix --- CHANGES | 27 ++++++ lib/content_encoding.c | 199 +++++++++++++++++++++++++---------------- 2 files changed, 148 insertions(+), 78 deletions(-) diff --git a/CHANGES b/CHANGES index 89122c79e..28d96faff 100644 --- a/CHANGES +++ b/CHANGES @@ -7,6 +7,33 @@ Changelog Daniel (27 October 2004) +- Dan Fandrich: + + An improvement to the gzip handling of libcurl. There were two problems with + the old version: it was possible for a malicious gzip file to cause libcurl + to leak memory, as a buffer was malloced to hold the header and never freed + if the header ended with no file contents. The second problem is that the + 64 KiB decompression buffer was allocated on the stack, which caused + unexpectedly high stack usage and overflowed the stack on some systems + (someone complained about that in the mailing list about a year ago). + + Both problems are fixed by this patch. The first one is fixed when a recent + (1.2) version of zlib is used, as it takes care of gzip header parsing + itself. A check for the version number is done at run-time and libcurl uses + that feature if it's present. I've created a define OLD_ZLIB_SUPPORT that + can be commented out to save some code space if libcurl is guaranteed to be + using a 1.2 version of zlib. + + The second problem is solved by dynamically allocating the memory buffer + instead of storing it on the stack. The allocation/free is done for every + incoming packet, which is suboptimal, but should be dwarfed by the actual + decompression computation. + + I've also factored out some common code between deflate and gzip to reduce + the code footprint somewhat. I've tested the gzip code on a few test files + and I tried deflate using the freshmeat.net server, and it all looks OK. I + didn't try running it with valgrind, however. + - Added a --retry option to curl that takes a numerical option for the number of times the operation should be retried. It is retried if a transient error is detected or if a timeout occurred. By default, it will first wait one diff --git a/lib/content_encoding.c b/lib/content_encoding.c index b786e3f9c..ac17befaf 100644 --- a/lib/content_encoding.c +++ b/lib/content_encoding.c @@ -36,6 +36,10 @@ #include "memdebug.h" +/* Comment this out if zlib is always going to be at least ver. 1.2.0.4 + (doing so will reduce code size slightly). */ +#define OLD_ZLIB_SUPPORT 1 + #define DSIZ 0x10000 /* buffer size for decompressed data */ #define GZIP_MAGIC_0 0x1f @@ -49,14 +53,23 @@ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ +enum zlibState { + ZLIB_UNINIT, /* uninitialized */ + ZLIB_INIT, /* initialized */ + ZLIB_GZIP_HEADER, /* reading gzip header */ + ZLIB_GZIP_INFLATING, /* inflating gzip stream */ + ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ +}; + static CURLcode -process_zlib_error(struct SessionHandle *data, z_stream *z) +process_zlib_error(struct SessionHandle *data, + z_stream *z) { if (z->msg) - failf (data, "Error while processing content unencoding.\n%s", + failf (data, "Error while processing content unencoding: %s", z->msg); else - failf (data, "Error while processing content unencoding.\n" + failf (data, "Error while processing content unencoding: " "Unknown failure within decompression software."); return CURLE_BAD_CONTENT_ENCODING; @@ -66,55 +79,48 @@ static CURLcode exit_zlib(z_stream *z, bool *zlib_init, CURLcode result) { inflateEnd(z); - *zlib_init = 0; + *zlib_init = ZLIB_UNINIT; return result; } -CURLcode -Curl_unencode_deflate_write(struct SessionHandle *data, - struct Curl_transfer_keeper *k, - ssize_t nread) +static CURLcode +inflate_stream(struct SessionHandle *data, + struct Curl_transfer_keeper *k) { + z_stream *z = &k->z; /* zlib state structure */ int status; /* zlib status */ CURLcode result = CURLE_OK; /* Curl_client_write status */ - char decomp[DSIZ]; /* Put the decompressed data here. */ - z_stream *z = &k->z; /* zlib state structure */ + char *decomp; /* Put the decompressed data here. */ - /* Initialize zlib? */ - if (!k->zlib_init) { - z->zalloc = (alloc_func)Z_NULL; - z->zfree = (free_func)Z_NULL; - z->opaque = 0; - z->next_in = NULL; - z->avail_in = 0; - if (inflateInit(z) != Z_OK) - return process_zlib_error(data, z); - k->zlib_init = 1; + /* Dynamically allocate a buffer for decompression because it's uncommonly + large to hold on the stack */ + decomp = (char*)malloc(DSIZ); + if (decomp == NULL) { + return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); } - /* Set the compressed input when this function is called */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; - - /* because the buffer size is fixed, iteratively decompress - and transfer to the client via client_write. */ + /* because the buffer size is fixed, iteratively decompress and transfer to + the client via client_write. */ for (;;) { /* (re)set buffer for decompressed output for every iteration */ - z->next_out = (Bytef *)&decomp[0]; + z->next_out = (Bytef *)decomp; z->avail_out = DSIZ; status = inflate(z, Z_SYNC_FLUSH); if (status == Z_OK || status == Z_STREAM_END) { - if (DSIZ - z->avail_out) { + if(DSIZ - z->avail_out) { result = Curl_client_write(data, CLIENTWRITE_BODY, decomp, DSIZ - z->avail_out); /* if !CURLE_OK, clean up, return */ - if (result) + if (result) { + free(decomp); return exit_zlib(z, &k->zlib_init, result); + } } - /* Done?; clean up, return */ + /* Done? clean up, return */ if (status == Z_STREAM_END) { + free(decomp); if (inflateEnd(z) == Z_OK) return exit_zlib(z, &k->zlib_init, result); else @@ -122,15 +128,47 @@ Curl_unencode_deflate_write(struct SessionHandle *data, } /* Done with these bytes, exit */ - if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0) + if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0) { + free(decomp); return result; + } } else { /* Error; exit loop, handle below */ + free(decomp); return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z)); } } + /* Will never get here */ } +CURLcode +Curl_unencode_deflate_write(struct SessionHandle *data, + struct Curl_transfer_keeper *k, + ssize_t nread) +{ + z_stream *z = &k->z; /* zlib state structure */ + + /* Initialize zlib? */ + if (k->zlib_init == ZLIB_UNINIT) { + z->zalloc = (alloc_func)Z_NULL; + z->zfree = (free_func)Z_NULL; + z->opaque = 0; + z->next_in = NULL; + z->avail_in = 0; + if (inflateInit(z) != Z_OK) + return process_zlib_error(data, z); + k->zlib_init = ZLIB_INIT; + } + + /* Set the compressed input when this function is called */ + z->next_in = (Bytef *)k->str; + z->avail_in = (uInt)nread; + + /* Now uncompress the data */ + return inflate_stream(data, k); +} + +#ifdef OLD_ZLIB_SUPPORT /* Skip over the gzip header */ static enum { GZIP_OK, @@ -213,38 +251,67 @@ static enum { *headerlen = totallen - len; return GZIP_OK; } +#endif CURLcode Curl_unencode_gzip_write(struct SessionHandle *data, struct Curl_transfer_keeper *k, ssize_t nread) { - int status; /* zlib status */ - CURLcode result = CURLE_OK; /* Curl_client_write status */ - char decomp[DSIZ]; /* Put the decompressed data here. */ z_stream *z = &k->z; /* zlib state structure */ /* Initialize zlib? */ - if (!k->zlib_init) { + if (k->zlib_init == ZLIB_UNINIT) { z->zalloc = (alloc_func)Z_NULL; z->zfree = (free_func)Z_NULL; z->opaque = 0; z->next_in = NULL; z->avail_in = 0; - if (inflateInit2(z, -MAX_WBITS) != Z_OK) - return process_zlib_error(data, z); - k->zlib_init = 1; /* Initial call state */ + + if (strcmp(zlibVersion(), "1.2.0.4") >= 0) { + /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ + if (inflateInit2(z, MAX_WBITS+32) != Z_OK) { + return process_zlib_error(data, z); + } + k->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ + + } else { + /* we must parse the gzip header ourselves */ + if (inflateInit2(z, -MAX_WBITS) != Z_OK) { + return process_zlib_error(data, z); + } + k->zlib_init = ZLIB_INIT; /* Initial call state */ + } } + if (k->zlib_init == ZLIB_INIT_GZIP) { + /* Let zlib handle the gzip decompression entirely */ + z->next_in = (Bytef *)k->str; + z->avail_in = (uInt)nread; + /* Now uncompress the data */ + return inflate_stream(data, k); + } + +#ifndef OLD_ZLIB_SUPPORT + /* Support for old zlib versions is compiled away and we are running with + an old version, so return an error. */ + return exit_zlib(z, &k->zlib_init, CURLE_FUNCTION_NOT_FOUND); + +#else /* This next mess is to get around the potential case where there isn't * enough data passed in to skip over the gzip header. If that happens, we * malloc a block and copy what we have then wait for the next call. If * there still isn't enough (this is definitely a worst-case scenario), we * make the block bigger, copy the next part in and keep waiting. + * + * This is only required with zlib versions < 1.2.0.4 as newer versions + * can handle the gzip header themselves. */ + switch (k->zlib_init) { /* Skip over gzip header? */ - if (k->zlib_init == 1) { + case ZLIB_INIT: + { /* Initial call state */ ssize_t hlen; @@ -252,7 +319,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data, case GZIP_OK: z->next_in = (Bytef *)k->str + hlen; z->avail_in = (uInt)(nread - hlen); - k->zlib_init = 3; /* Inflating stream state */ + k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -269,7 +336,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data, return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); } memcpy(z->next_in, k->str, z->avail_in); - k->zlib_init = 2; /* Need more gzip header data state */ + k->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ /* We don't have any data to inflate yet */ return CURLE_OK; @@ -279,7 +346,10 @@ Curl_unencode_gzip_write(struct SessionHandle *data, } } - else if (k->zlib_init == 2) { + break; + + case ZLIB_GZIP_HEADER: + { /* Need more gzip header data state */ ssize_t hlen; unsigned char *oldblock = z->next_in; @@ -300,7 +370,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data, /* Don't point into the malloced block since we just freed it */ z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in; z->avail_in = (uInt)(z->avail_in - hlen); - k->zlib_init = 3; /* Inflating stream state */ + k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -314,10 +384,14 @@ Curl_unencode_gzip_write(struct SessionHandle *data, } } - else { + break; + + case ZLIB_GZIP_INFLATING: + default: /* Inflating stream state */ z->next_in = (Bytef *)k->str; z->avail_in = (uInt)nread; + break; } if (z->avail_in == 0) { @@ -325,39 +399,8 @@ Curl_unencode_gzip_write(struct SessionHandle *data, return CURLE_OK; } - /* because the buffer size is fixed, iteratively decompress and transfer to - the client via client_write. */ - for (;;) { - /* (re)set buffer for decompressed output for every iteration */ - z->next_out = (Bytef *)&decomp[0]; - z->avail_out = DSIZ; - - status = inflate(z, Z_SYNC_FLUSH); - if (status == Z_OK || status == Z_STREAM_END) { - if(DSIZ - z->avail_out) { - result = Curl_client_write(data, CLIENTWRITE_BODY, decomp, - DSIZ - z->avail_out); - /* if !CURLE_OK, clean up, return */ - if (result) - return exit_zlib(z, &k->zlib_init, result); - } - - /* Done?; clean up, return */ - /* We should really check the gzip CRC here */ - if (status == Z_STREAM_END) { - if (inflateEnd(z) == Z_OK) - return exit_zlib(z, &k->zlib_init, result); - else - return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z)); - } - - /* Done with these bytes, exit */ - if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0) - return result; - } - else { /* Error; exit loop, handle below */ - return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z)); - } - } + /* We've parsed the header, now uncompress the data */ + return inflate_stream(data, k); +#endif } #endif /* HAVE_LIBZ */