big reorg to make it not exit when it fails, but instead just not do any
globbing, it makes IPv6 support easier and smoother to add.
This commit is contained in:
parent
6f438bc8fb
commit
210aa4371c
167
src/urlglob.c
167
src/urlglob.c
@ -32,9 +32,23 @@
|
|||||||
#include "../lib/memdebug.h"
|
#include "../lib/memdebug.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int glob_word(URLGlob *, char*, int);
|
typedef enum {
|
||||||
|
GLOB_OK,
|
||||||
|
GLOB_ERROR
|
||||||
|
} GlobCode;
|
||||||
|
|
||||||
int glob_set(URLGlob *glob, char *pattern, int pos)
|
/*
|
||||||
|
* glob_word()
|
||||||
|
*
|
||||||
|
* Input a full globbed string, set the forth argument to the amount of
|
||||||
|
* strings we get out of this. Return GlobCode.
|
||||||
|
*/
|
||||||
|
GlobCode glob_word(URLGlob *, /* object anchor */
|
||||||
|
char *, /* globbed string */
|
||||||
|
int, /* position */
|
||||||
|
int *); /* returned number of strings */
|
||||||
|
|
||||||
|
GlobCode glob_set(URLGlob *glob, char *pattern, int pos, int *amount)
|
||||||
{
|
{
|
||||||
/* processes a set expression with the point behind the opening '{'
|
/* processes a set expression with the point behind the opening '{'
|
||||||
','-separated elements are collected until the next closing '}'
|
','-separated elements are collected until the next closing '}'
|
||||||
@ -52,13 +66,15 @@ int glob_set(URLGlob *glob, char *pattern, int pos)
|
|||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
switch (*pattern) {
|
switch (*pattern) {
|
||||||
case '\0': /* URL ended while set was still open */
|
case '\0': /* URL ended while set was still open */
|
||||||
printf("error: unmatched brace at pos %d\n", pos);
|
/*printf("error: unmatched brace at pos %d\n", pos);*/
|
||||||
exit (CURLE_URL_MALFORMAT);
|
return GLOB_ERROR;
|
||||||
|
|
||||||
case '{':
|
case '{':
|
||||||
case '[': /* no nested expressions at this time */
|
case '[': /* no nested expressions at this time */
|
||||||
printf("error: nested braces not supported %d\n", pos);
|
/*printf("error: nested braces not supported %d\n", pos);*/
|
||||||
exit (CURLE_URL_MALFORMAT);
|
return GLOB_ERROR;
|
||||||
|
|
||||||
case ',':
|
case ',':
|
||||||
case '}': /* set element completed */
|
case '}': /* set element completed */
|
||||||
*buf = '\0';
|
*buf = '\0';
|
||||||
@ -66,40 +82,51 @@ int glob_set(URLGlob *glob, char *pattern, int pos)
|
|||||||
realloc(pat->content.Set.elements,
|
realloc(pat->content.Set.elements,
|
||||||
(pat->content.Set.size + 1) * sizeof(char*));
|
(pat->content.Set.size + 1) * sizeof(char*));
|
||||||
if (!pat->content.Set.elements) {
|
if (!pat->content.Set.elements) {
|
||||||
printf("out of memory in set pattern\n");
|
/*printf("out of memory in set pattern\n");*/
|
||||||
exit(CURLE_OUT_OF_MEMORY);
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
pat->content.Set.elements[pat->content.Set.size] =
|
pat->content.Set.elements[pat->content.Set.size] =
|
||||||
strdup(glob->glob_buffer);
|
strdup(glob->glob_buffer);
|
||||||
++pat->content.Set.size;
|
++pat->content.Set.size;
|
||||||
|
|
||||||
if (*pattern == '}') /* entire set pattern completed */
|
if (*pattern == '}') {
|
||||||
|
/* entire set pattern completed */
|
||||||
|
int wordamount;
|
||||||
|
|
||||||
/* always check for a literal (may be "") between patterns */
|
/* always check for a literal (may be "") between patterns */
|
||||||
return pat->content.Set.size * glob_word(glob, ++pattern, ++pos);
|
if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount))
|
||||||
|
wordamount=1;
|
||||||
|
*amount = pat->content.Set.size * wordamount;
|
||||||
|
|
||||||
|
return GLOB_OK;
|
||||||
|
}
|
||||||
|
|
||||||
buf = glob->glob_buffer;
|
buf = glob->glob_buffer;
|
||||||
++pattern;
|
++pattern;
|
||||||
++pos;
|
++pos;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ']': /* illegal closing bracket */
|
case ']': /* illegal closing bracket */
|
||||||
printf("error: illegal pattern at pos %d\n", pos);
|
/*printf("error: illegal pattern at pos %d\n", pos);*/
|
||||||
exit (CURLE_URL_MALFORMAT);
|
return GLOB_ERROR;
|
||||||
|
|
||||||
case '\\': /* escaped character, skip '\' */
|
case '\\': /* escaped character, skip '\' */
|
||||||
if (*(buf+1) == '\0') { /* but no escaping of '\0'! */
|
if (*(buf+1) == '\0') { /* but no escaping of '\0'! */
|
||||||
printf("error: illegal pattern at pos %d\n", pos);
|
/*printf("error: illegal pattern at pos %d\n", pos); */
|
||||||
exit (CURLE_URL_MALFORMAT);
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
++pattern;
|
++pattern;
|
||||||
++pos; /* intentional fallthrough */
|
++pos; /* intentional fallthrough */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
*buf++ = *pattern++; /* copy character to set element */
|
*buf++ = *pattern++; /* copy character to set element */
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exit (CURLE_FAILED_INIT);
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
int glob_range(URLGlob *glob, char *pattern, int pos)
|
GlobCode glob_range(URLGlob *glob, char *pattern, int pos, int *amount)
|
||||||
{
|
{
|
||||||
/* processes a range expression with the point behind the opening '['
|
/* processes a range expression with the point behind the opening '['
|
||||||
- char range: e.g. "a-z]", "B-Q]"
|
- char range: e.g. "a-z]", "B-Q]"
|
||||||
@ -109,6 +136,7 @@ int glob_range(URLGlob *glob, char *pattern, int pos)
|
|||||||
*/
|
*/
|
||||||
URLPattern *pat;
|
URLPattern *pat;
|
||||||
char *c;
|
char *c;
|
||||||
|
int wordamount=1;
|
||||||
|
|
||||||
pat = (URLPattern*)&glob->pattern[glob->size / 2];
|
pat = (URLPattern*)&glob->pattern[glob->size / 2];
|
||||||
/* patterns 0,1,2,... correspond to size=1,3,5,... */
|
/* patterns 0,1,2,... correspond to size=1,3,5,... */
|
||||||
@ -116,44 +144,71 @@ int glob_range(URLGlob *glob, char *pattern, int pos)
|
|||||||
|
|
||||||
if (isalpha((int)*pattern)) { /* character range detected */
|
if (isalpha((int)*pattern)) { /* character range detected */
|
||||||
pat->type = UPTCharRange;
|
pat->type = UPTCharRange;
|
||||||
if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c, &pat->content.CharRange.max_c) != 2 ||
|
if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c,
|
||||||
|
&pat->content.CharRange.max_c) != 2 ||
|
||||||
pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
|
pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
|
||||||
pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
|
pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
|
||||||
/* the pattern is not well-formed */
|
/* the pattern is not well-formed */
|
||||||
printf("error: illegal pattern or range specification after pos %d\n", pos);
|
#if 0
|
||||||
exit (CURLE_URL_MALFORMAT);
|
printf("error: illegal pattern or range specification after pos %d\n",
|
||||||
|
pos);
|
||||||
|
#endif
|
||||||
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
|
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
|
||||||
/* always check for a literal (may be "") between patterns */
|
/* always check for a literal (may be "") between patterns */
|
||||||
return (pat->content.CharRange.max_c - pat->content.CharRange.min_c + 1) *
|
|
||||||
glob_word(glob, pattern + 4, pos + 4);
|
if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount))
|
||||||
|
wordamount=1;
|
||||||
|
|
||||||
|
*amount = (pat->content.CharRange.max_c -
|
||||||
|
pat->content.CharRange.min_c + 1) *
|
||||||
|
wordamount;
|
||||||
|
|
||||||
|
return GLOB_OK;
|
||||||
}
|
}
|
||||||
if (isdigit((int)*pattern)) { /* numeric range detected */
|
|
||||||
|
if (isdigit((int)*pattern)) { /* numeric range detected */
|
||||||
|
|
||||||
pat->type = UPTNumRange;
|
pat->type = UPTNumRange;
|
||||||
pat->content.NumRange.padlength = 0;
|
pat->content.NumRange.padlength = 0;
|
||||||
if (sscanf(pattern, "%d-%d]", &pat->content.NumRange.min_n, &pat->content.NumRange.max_n) != 2 ||
|
if (sscanf(pattern, "%d-%d]",
|
||||||
|
&pat->content.NumRange.min_n,
|
||||||
|
&pat->content.NumRange.max_n) != 2 ||
|
||||||
pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
|
pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
|
||||||
/* the pattern is not well-formed */
|
/* the pattern is not well-formed */
|
||||||
printf("error: illegal pattern or range specification after pos %d\n", pos);
|
#if 0
|
||||||
exit (CURLE_URL_MALFORMAT);
|
printf("error: illegal pattern or range specification after pos %d\n",
|
||||||
|
pos);
|
||||||
|
#endif
|
||||||
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
if (*pattern == '0') { /* leading zero specified */
|
if (*pattern == '0') { /* leading zero specified */
|
||||||
c = pattern;
|
c = pattern;
|
||||||
while (isdigit((int)*c++))
|
while (isdigit((int)*c++))
|
||||||
++pat->content.NumRange.padlength; /* padding length is set for all instances
|
++pat->content.NumRange.padlength; /* padding length is set for all
|
||||||
of this pattern */
|
instances of this pattern */
|
||||||
}
|
}
|
||||||
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
|
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
|
||||||
c = (char*)(strchr(pattern, ']') + 1); /* continue after next ']' */
|
c = (char*)(strchr(pattern, ']') + 1); /* continue after next ']' */
|
||||||
|
|
||||||
/* always check for a literal (may be "") between patterns */
|
/* always check for a literal (may be "") between patterns */
|
||||||
return (pat->content.NumRange.max_n - pat->content.NumRange.min_n + 1) *
|
|
||||||
glob_word(glob, c, pos + (c - pattern));
|
if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
|
||||||
|
wordamount = 1;
|
||||||
|
|
||||||
|
*amount = (pat->content.NumRange.max_n -
|
||||||
|
pat->content.NumRange.min_n + 1) *
|
||||||
|
wordamount;
|
||||||
|
|
||||||
|
return GLOB_OK;
|
||||||
}
|
}
|
||||||
printf("error: illegal character in range specification at pos %d\n", pos);
|
/*printf("error: illegal character in range specification at pos %d\n",
|
||||||
exit (CURLE_URL_MALFORMAT);
|
pos);*/
|
||||||
|
return GLOB_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
int glob_word(URLGlob *glob, char *pattern, int pos)
|
GlobCode glob_word(URLGlob *glob, char *pattern, int pos, int *amount)
|
||||||
{
|
{
|
||||||
/* processes a literal string component of a URL
|
/* processes a literal string component of a URL
|
||||||
special characters '{' and '[' branch to set/range processing functions
|
special characters '{' and '[' branch to set/range processing functions
|
||||||
@ -161,17 +216,17 @@ int glob_word(URLGlob *glob, char *pattern, int pos)
|
|||||||
char* buf = glob->glob_buffer;
|
char* buf = glob->glob_buffer;
|
||||||
int litindex;
|
int litindex;
|
||||||
|
|
||||||
|
*amount = 1; /* default is one single string */
|
||||||
|
|
||||||
while (*pattern != '\0' && *pattern != '{' && *pattern != '[') {
|
while (*pattern != '\0' && *pattern != '{' && *pattern != '[') {
|
||||||
if (*pattern == '}' || *pattern == ']') {
|
if (*pattern == '}' || *pattern == ']') {
|
||||||
printf("illegal character at position %d\n", pos);
|
return GLOB_ERROR;
|
||||||
exit (CURLE_URL_MALFORMAT);
|
|
||||||
}
|
}
|
||||||
if (*pattern == '\\') { /* escape character, skip '\' */
|
if (*pattern == '\\') { /* escape character, skip '\' */
|
||||||
++pattern;
|
++pattern;
|
||||||
++pos;
|
++pos;
|
||||||
if (*pattern == '\0') { /* but no escaping of '\0'! */
|
if (*pattern == '\0') { /* but no escaping of '\0'! */
|
||||||
printf("illegal character at position %d\n", pos);
|
return GLOB_ERROR;
|
||||||
exit (CURLE_URL_MALFORMAT);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*buf++ = *pattern++; /* copy character to literal */
|
*buf++ = *pattern++; /* copy character to literal */
|
||||||
@ -182,16 +237,21 @@ int glob_word(URLGlob *glob, char *pattern, int pos)
|
|||||||
/* literals 0,1,2,... correspond to size=0,2,4,... */
|
/* literals 0,1,2,... correspond to size=0,2,4,... */
|
||||||
glob->literal[litindex] = strdup(glob->glob_buffer);
|
glob->literal[litindex] = strdup(glob->glob_buffer);
|
||||||
++glob->size;
|
++glob->size;
|
||||||
if (*pattern == '\0')
|
|
||||||
return 1; /* singular URL processed */
|
switch (*pattern) {
|
||||||
if (*pattern == '{') {
|
case '\0':
|
||||||
return glob_set(glob, ++pattern, ++pos); /* process set pattern */
|
return GLOB_OK; /* singular URL processed */
|
||||||
|
|
||||||
|
case '{':
|
||||||
|
/* process set pattern */
|
||||||
|
return glob_set(glob, ++pattern, ++pos, amount);
|
||||||
|
|
||||||
|
case '[':
|
||||||
|
/* process range pattern */
|
||||||
|
return glob_range(glob, ++pattern, ++pos, amount);
|
||||||
}
|
}
|
||||||
if (*pattern == '[') {
|
|
||||||
return glob_range(glob, ++pattern, ++pos);/* process range pattern */
|
return GLOB_ERROR; /* something got wrong */
|
||||||
}
|
|
||||||
printf("internal error\n");
|
|
||||||
exit (CURLE_FAILED_INIT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int glob_url(URLGlob** glob, char* url, int *urlnum)
|
int glob_url(URLGlob** glob, char* url, int *urlnum)
|
||||||
@ -201,7 +261,9 @@ int glob_url(URLGlob** glob, char* url, int *urlnum)
|
|||||||
* as the specified URL!
|
* as the specified URL!
|
||||||
*/
|
*/
|
||||||
URLGlob *glob_expand;
|
URLGlob *glob_expand;
|
||||||
|
int amount;
|
||||||
char *glob_buffer=(char *)malloc(strlen(url)+1);
|
char *glob_buffer=(char *)malloc(strlen(url)+1);
|
||||||
|
|
||||||
if(NULL == glob_buffer)
|
if(NULL == glob_buffer)
|
||||||
return CURLE_OUT_OF_MEMORY;
|
return CURLE_OUT_OF_MEMORY;
|
||||||
|
|
||||||
@ -214,7 +276,16 @@ int glob_url(URLGlob** glob, char* url, int *urlnum)
|
|||||||
glob_expand->urllen = strlen(url);
|
glob_expand->urllen = strlen(url);
|
||||||
glob_expand->glob_buffer = glob_buffer;
|
glob_expand->glob_buffer = glob_buffer;
|
||||||
glob_expand->beenhere=0;
|
glob_expand->beenhere=0;
|
||||||
*urlnum = glob_word(glob_expand, url, 1);
|
if(GLOB_OK == glob_word(glob_expand, url, 1, &amount))
|
||||||
|
*urlnum = amount;
|
||||||
|
else {
|
||||||
|
/* it failed, we cleanup */
|
||||||
|
free(glob_buffer);
|
||||||
|
free(glob_expand);
|
||||||
|
glob_expand = NULL;
|
||||||
|
*urlnum = 1;
|
||||||
|
}
|
||||||
|
|
||||||
*glob = glob_expand;
|
*glob = glob_expand;
|
||||||
return CURLE_OK;
|
return CURLE_OK;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user