chunked parsing: relax the CR strictness

Allow for chunked-encoding data to get parsed with only LF line endings.
This is allowed by browsers.
This commit is contained in:
Daniel Stenberg 2014-01-17 00:34:36 +01:00
parent 3f5546b2be
commit eb02a99c61
2 changed files with 35 additions and 92 deletions

View File

@ -88,8 +88,8 @@ static bool Curl_isxdigit(char digit)
void Curl_httpchunk_init(struct connectdata *conn)
{
struct Curl_chunker *chunk = &conn->chunk;
chunk->hexindex=0; /* start at 0 */
chunk->dataleft=0; /* no data left yet! */
chunk->hexindex=0; /* start at 0 */
chunk->dataleft=0; /* no data left yet! */
chunk->state = CHUNK_HEX; /* we get hex first! */
}
@ -143,11 +143,11 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
else {
char *endptr;
if(0 == ch->hexindex) {
if(0 == ch->hexindex)
/* This is illegal data, we received junk where we expected
a hexadecimal digit. */
return CHUNKE_ILLEGAL_HEX;
}
/* length and datap are unmodified */
ch->hexbuffer[ch->hexindex]=0;
@ -164,44 +164,29 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(errno == ERANGE)
/* over or underflow is an error */
return CHUNKE_ILLEGAL_HEX;
ch->state = CHUNK_POSTHEX;
ch->state = CHUNK_LF; /* now wait for the CRLF */
}
break;
case CHUNK_POSTHEX:
/* In this state, we're waiting for CRLF to arrive. We support
this to allow so called chunk-extensions to show up here
before the CRLF comes. */
if(*datap == 0x0d)
ch->state = CHUNK_CR;
length--;
datap++;
break;
case CHUNK_CR:
/* waiting for the LF */
case CHUNK_LF:
/* waiting for the LF after a chunk size */
if(*datap == 0x0a) {
/* we're now expecting data to come, unless size was zero! */
if(0 == ch->datasize) {
ch->state = CHUNK_TRAILER; /* now check for trailers */
conn->trlPos=0;
}
else {
else
ch->state = CHUNK_DATA;
}
}
else
/* previously we got a fake CR, go back to CR waiting! */
ch->state = CHUNK_CR;
datap++;
length--;
break;
case CHUNK_DATA:
/* we get pure and fine data
We expect another 'datasize' of data. We have 'length' right now,
it can be more or less than 'datasize'. Get the smallest piece.
/* We expect 'datasize' of data. We have 'length' right now, it can be
more or less than 'datasize'. Get the smallest piece.
*/
piece = (ch->datasize >= length)?length:ch->datasize;
@ -256,37 +241,22 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(0 == ch->datasize)
/* end of data this round, we now expect a trailing CRLF */
ch->state = CHUNK_POSTCR;
break;
case CHUNK_POSTCR:
if(*datap == 0x0d) {
ch->state = CHUNK_POSTLF;
datap++;
length--;
}
else
return CHUNKE_BAD_CHUNK;
break;
case CHUNK_POSTLF:
if(*datap == 0x0a) {
/*
* The last one before we go back to hex state and start all
* over.
*/
Curl_httpchunk_init(conn);
datap++;
length--;
/* The last one before we go back to hex state and start all over. */
Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
}
else
else if(*datap != 0x0d)
return CHUNKE_BAD_CHUNK;
datap++;
length--;
break;
case CHUNK_TRAILER:
if(*datap == 0x0d) {
if((*datap == 0x0d) || (*datap == 0x0a)) {
/* this is the end of a trailer, but if the trailer was zero bytes
there was no trailer and we move on */
@ -312,6 +282,9 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
conn->trlPos=0;
ch->state = CHUNK_TRAILER_CR;
if(*datap == 0x0a)
/* already on the LF */
break;
}
else {
/* no trailer, we're on the final CRLF pair */
@ -357,27 +330,18 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
case CHUNK_TRAILER_POSTCR:
/* We enter this state when a CR should arrive so we expect to
have to first pass a CR before we wait for LF */
if(*datap != 0x0d) {
if((*datap != 0x0d) && (*datap != 0x0a)) {
/* not a CR then it must be another header in the trailer */
ch->state = CHUNK_TRAILER;
break;
}
datap++;
length--;
/* now wait for the final LF */
ch->state = CHUNK_STOP;
break;
case CHUNK_STOPCR:
/* Read the final CRLF that ends all chunk bodies */
if(*datap == 0x0d) {
ch->state = CHUNK_STOP;
/* skip if CR */
datap++;
length--;
}
else
return CHUNKE_BAD_CHUNK;
/* now wait for the final LF */
ch->state = CHUNK_STOP;
break;
case CHUNK_STOP:
@ -392,9 +356,6 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
else
return CHUNKE_BAD_CHUNK;
default:
return CHUNKE_STATE_ERROR;
}
}
return CHUNKE_OK;

View File

@ -29,40 +29,25 @@
#define MAXNUM_SIZE 16
typedef enum {
CHUNK_FIRST, /* never use */
/* In this we await and buffer all hexadecimal digits until we get one
that isn't a hexadecimal digit. When done, we go POSTHEX */
/* await and buffer all hexadecimal digits until we get one that isn't a
hexadecimal digit. When done, we go CHUNK_LF */
CHUNK_HEX,
/* We have received the hexadecimal digit and we eat all characters until
we get a CRLF pair. When we see a CR we go to the CR state. */
CHUNK_POSTHEX,
/* A single CR has been found and we should get a LF right away in this
state or we go back to POSTHEX. When LF is received, we go to DATA.
If the size given was zero, we set state to STOP and return. */
CHUNK_CR,
/* wait for LF, ignore all else */
CHUNK_LF,
/* We eat the amount of data specified. When done, we move on to the
POST_CR state. */
CHUNK_DATA,
/* POSTCR should get a CR and nothing else, then move to POSTLF */
CHUNK_POSTCR,
/* POSTLF should get a LF and nothing else, then move back to HEX as the
CRLF combination marks the end of a chunk */
/* POSTLF should get a CR and then a LF and nothing else, then move back to
HEX as the CRLF combination marks the end of a chunk. A missing CR is no
big deal. */
CHUNK_POSTLF,
/* Each Chunk body should end with a CRLF. Read a CR and nothing else,
then move to CHUNK_STOP */
CHUNK_STOPCR,
/* This is mainly used to really mark that we're out of the game.
NOTE: that there's a 'dataleft' field in the struct that will tell how
many bytes that were not passed to the client in the end of the last
buffer! */
/* Used to mark that we're out of the game. NOTE: that there's a 'dataleft'
field in the struct that will tell how many bytes that were not passed to
the client in the end of the last buffer! */
CHUNK_STOP,
/* At this point optional trailer headers can be found, unless the next line
@ -77,10 +62,7 @@ typedef enum {
signalled If this is an empty trailer CHUNKE_STOP will be signalled.
Otherwise the trailer will be broadcasted via Curl_client_write() and the
next state will be CHUNK_TRAILER */
CHUNK_TRAILER_POSTCR,
CHUNK_LAST /* never use */
CHUNK_TRAILER_POSTCR
} ChunkyState;
typedef enum {