/******************************************************************************* * * Copyright (c) 2000-2003 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /*! * \file * * \brief Contains functions for uri, url parsing utility. */ #ifdef __FreeBSD__ #include #if __FreeBSD_version < 601103 #include #endif #endif #include #include "config.h" #include "uri.h" #include "upnpapi.h" /*! * \brief Returns a 1 if a char is a RESERVED char as defined in * http://www.ietf.org/rfc/rfc2396.txt RFC explaining URIs). * * \return 1 if char is a RESERVED char. */ static int is_reserved( /*! [in] Char to be matched for RESERVED characters. */ char in) { if (strchr(RESERVED, in)) { return 1; } else { return 0; } } /*! * \brief Returns a 1 if a char is a MARK char as defined in * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs). * * \return 1 if char is a MARKED char. */ int is_mark( /*! [in] Char to be matched for MARKED characters. */ char in) { if (strchr(MARK, in)) { return 1; } else { return 0; } } /*! * \brief Returns a 1 if a char is an UNRESERVED char as defined in * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs). * * \return 1 if char is a UNRESERVED char. */ int is_unreserved( /*! [in] Char to be matched for UNRESERVED characters. */ char in) { if (isalnum(in) || is_mark(in)) { return 1; } else { return 0; } } /*! * \brief Returns a 1 if a char[3] sequence is ESCAPED as defined in * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs). * * Size of array is NOT checked (MUST be checked by caller). * * \return 1 if char is a ESCAPED char. */ int is_escaped( /*! [in] Char sequence to be matched for ESCAPED characters. */ const char *in) { if (in[0] == '%' && isxdigit(in[1]) && isxdigit(in[2])) { return 1; } else { return 0; } } int replace_escaped(char *in, size_t index, size_t *max) { int tempInt = 0; char tempChar = 0; size_t i = 0; size_t j = 0; if (in[index] == '%' && isxdigit(in[index + 1]) && isxdigit(in[index + 2])) { /* Note the "%2x", makes sure that we convert a maximum of two * characters. */ if (sscanf(&in[index + 1], "%2x", &tempInt) != 1) { return 0; } tempChar = (char)tempInt; for (i = index + 3, j = index; j < *max; i++, j++) { in[j] = tempChar; if (i < *max) { tempChar = in[i]; } else { tempChar = 0; } } *max -= 2; return 1; } else { return 0; } } /*! * \brief Parses a string of uric characters starting at in[0] as defined in * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs). * * \return */ static size_t parse_uric( /*! [in] String of characters. */ const char *in, /*! [in] Maximum limit. */ size_t max, /*! [out] Token object where the string of characters is copied. */ token *out) { size_t i = 0; while (i < max && (is_unreserved(in[i]) || is_reserved(in[i]) || ((i + 2 < max) && is_escaped(&in[i])))) { i++; } out->size = i; out->buff = in; return i; } /*! * \brief Tokens are generally pointers into other strings. This copies the * offset and size from a token (in) relative to one string (in_base) into * a token (out) relative to another string (out_base). */ static void copy_token( /*! [in] Source token. */ const token *in, /*! [in] . */ const char *in_base, /*! [out] Destination token. */ token *out, /*! [in] . */ char *out_base) { out->size = in->size; out->buff = out_base + (in->buff - in_base); } int copy_URL_list(URL_list *in, URL_list *out) { size_t len = strlen(in->URLs) + 1; size_t i = 0; out->URLs = NULL; out->parsedURLs = NULL; out->size = 0; out->URLs = malloc(len); out->parsedURLs = malloc(sizeof(uri_type) * in->size); if ( !out->URLs || !out->parsedURLs) return UPNP_E_OUTOF_MEMORY; memcpy(out->URLs, in->URLs, len); for( i = 0; i < in->size; i++ ) { /*copy the parsed uri */ out->parsedURLs[i].type = in->parsedURLs[i].type; copy_token( &in->parsedURLs[i].scheme, in->URLs, &out->parsedURLs[i].scheme, out->URLs ); out->parsedURLs[i].path_type = in->parsedURLs[i].path_type; copy_token( &in->parsedURLs[i].pathquery, in->URLs, &out->parsedURLs[i].pathquery, out->URLs ); copy_token( &in->parsedURLs[i].fragment, in->URLs, &out->parsedURLs[i].fragment, out->URLs ); copy_token( &in->parsedURLs[i].hostport.text, in->URLs, &out->parsedURLs[i].hostport.text, out->URLs ); memcpy( &out->parsedURLs[i].hostport.IPaddress, &in->parsedURLs[i].hostport.IPaddress, sizeof(struct sockaddr_storage) ); } out->size = in->size; return HTTP_SUCCESS; } void free_URL_list(URL_list *list) { if (list->URLs) { free(list->URLs); } if (list->parsedURLs) { free(list->parsedURLs); } list->size = 0; } #ifdef DEBUG void print_uri(uri_type *in) { print_token(&in->scheme); print_token(&in->hostport.text); print_token(&in->pathquery); print_token(&in->fragment); } #endif /* DEBUG */ #ifdef DEBUG void print_token(token * in) { size_t i = 0; printf("Token Size : %" PRIzu "\n\'", in->size); for (i = 0; i < in->size; i++) putchar(in->buff[i]); putchar('\''); putchar('\n'); } #endif /* DEBUG */ int token_string_casecmp(token *in1, const char *in2) { size_t in2_length = strlen(in2); if (in1->size != in2_length) return 1; else return strncasecmp(in1->buff, in2, in1->size); } int token_string_cmp(token * in1, char *in2) { size_t in2_length = strlen(in2); if (in1->size != in2_length) return 1; else return strncmp(in1->buff, in2, in1->size); } int token_cmp(token *in1, token *in2) { if (in1->size != in2->size) return 1; else return memcmp(in1->buff, in2->buff, in1->size); } int parse_hostport( const char *in, size_t max, hostport_type *out) { char workbuf[256]; char *c; struct sockaddr_in *sai4 = (struct sockaddr_in *)&out->IPaddress; struct sockaddr_in6 *sai6 = (struct sockaddr_in6 *)&out->IPaddress; char *srvname = NULL; char *srvport = NULL; char *last_dot = NULL; unsigned short int port; int af = AF_UNSPEC; size_t hostport_size; int has_port = 0; int ret; memset(out, 0, sizeof(hostport_type)); /* Work on a copy of the input string. */ strncpy(workbuf, in, sizeof(workbuf)); c = workbuf; if (*c == '[') { /* IPv6 addresses are enclosed in square brackets. */ srvname = ++c; while (*c != '\0' && *c != ']') c++; if (*c == '\0') /* did not find closing bracket. */ return UPNP_E_INVALID_URL; /* NULL terminate the srvname and then increment c. */ *c++ = '\0'; /* overwrite the ']' */ if (*c == ':') { has_port = 1; c++; } af = AF_INET6; } else { /* IPv4 address -OR- host name. */ srvname = c; while (*c != ':' && *c != '/' && (isalnum(*c) || *c == '.' || *c == '-')) { if (*c == '.') last_dot = c; c++; } has_port = (*c == ':') ? 1 : 0; /* NULL terminate the srvname */ *c = '\0'; if (has_port == 1) c++; if (last_dot != NULL && isdigit(*(last_dot + 1))) /* Must be an IPv4 address. */ af = AF_INET; else { /* Must be a host name. */ struct addrinfo hints, *res, *res0; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(srvname, NULL, &hints, &res0); if (ret == 0) { for (res = res0; res; res = res->ai_next) { if (res->ai_family == AF_INET || res->ai_family == AF_INET6) { /* Found a valid IPv4 or IPv6 address. */ memcpy(&out->IPaddress, res->ai_addr, res->ai_addrlen); break; } } freeaddrinfo(res0); if (res == NULL) /* Didn't find an AF_INET or AF_INET6 address. */ return UPNP_E_INVALID_URL; } else /* getaddrinfo failed. */ return UPNP_E_INVALID_URL; } } /* Check if a port is specified. */ if (has_port == 1) { /* Port is specified. */ srvport = c; while (*c != '\0' && isdigit(*c)) c++; port = (unsigned short int)atoi(srvport); if (port == 0) /* Bad port number. */ return UPNP_E_INVALID_URL; } else /* Port was not specified, use default port. */ port = 80; /* The length of the host and port string can be calculated by */ /* subtracting pointers. */ hostport_size = (size_t)(c - workbuf); /* Fill in the 'out' information. */ if (af == AF_INET) { sai4->sin_family = AF_INET; sai4->sin_port = htons(port); ret = inet_pton(AF_INET, srvname, &sai4->sin_addr); } else if (af == AF_INET6) { sai6->sin6_family = AF_INET6; sai6->sin6_port = htons(port); sai6->sin6_scope_id = gIF_INDEX; ret = inet_pton(AF_INET6, srvname, &sai6->sin6_addr); } else { /* IP address was set by the hostname (getaddrinfo). */ /* Override port: */ if (out->IPaddress.ss_family == AF_INET) sai4->sin_port = htons(port); else sai6->sin6_port = htons(port); ret = 1; } /* Check if address was converted successfully. */ if (ret <= 0) return UPNP_E_INVALID_URL; out->text.size = hostport_size; out->text.buff = in; return (int)hostport_size; max = max; } /*! * \brief parses a uri scheme starting at in[0] as defined in * http://www.ietf.org/rfc/rfc2396.txt (RFC explaining URIs). * * (e.g. "http:" -> scheme= "http"). * * \note String MUST include ':' within the max charcters. * * \return */ static size_t parse_scheme( /*! [in] String of characters representing a scheme. */ const char *in, /*! [in] Maximum number of characters. */ size_t max, /*! [out] Output parameter whose buffer is filled in with the scheme. */ token *out) { size_t i = 0; out->size = 0; out->buff = NULL; if( ( max == 0 ) || ( !isalpha( in[0] ) ) ) return 0; i++; while( ( i < max ) && ( in[i] != ':' ) ) { if( !( isalnum( in[i] ) || ( in[i] == '+' ) || ( in[i] == '-' ) || ( in[i] == '.' ) ) ) return 0; i++; } if( i < max ) { out->size = i; out->buff = &in[0]; return i; } return 0; } int remove_escaped_chars(INOUT char *in, INOUT size_t *size) { size_t i = 0; for (i = 0; i < *size; i++) { replace_escaped(in, i, size); } return UPNP_E_SUCCESS; } int remove_dots(char *in, size_t size) { char *copyTo = in; char *copyFrom = in; char *max = in + size; char **Segments = NULL; int lastSegment = -1; Segments = malloc( sizeof( char * ) * size ); if( Segments == NULL ) return UPNP_E_OUTOF_MEMORY; Segments[0] = NULL; UpnpPrintf( UPNP_ALL, API, __FILE__, __LINE__, "REMOVE_DOTS: before: %s\n", in ); while( ( copyFrom < max ) && ( *copyFrom != '?' ) && ( *copyFrom != '#' ) ) { if( ( ( *copyFrom ) == '.' ) && ( ( copyFrom == in ) || ( *( copyFrom - 1 ) == '/' ) ) ) { if( ( copyFrom + 1 == max ) || ( *( copyFrom + 1 ) == '/' ) ) { copyFrom += 2; continue; } else if( ( *( copyFrom + 1 ) == '.' ) && ( ( copyFrom + 2 == max ) || ( *( copyFrom + 2 ) == '/' ) ) ) { copyFrom += 3; if( lastSegment > 0 ) { copyTo = Segments[--lastSegment]; } else { free( Segments ); /*TRACE("ERROR RESOLVING URL, ../ at ROOT"); */ return UPNP_E_INVALID_URL; } continue; } } if( ( *copyFrom ) == '/' ) { lastSegment++; Segments[lastSegment] = copyTo + 1; } ( *copyTo ) = ( *copyFrom ); copyTo++; copyFrom++; } if( copyFrom < max ) { while( copyFrom < max ) { ( *copyTo ) = ( *copyFrom ); copyTo++; copyFrom++; } } ( *copyTo ) = 0; free( Segments ); UpnpPrintf( UPNP_ALL, API, __FILE__, __LINE__, "REMOVE_DOTS: after: %s\n", in ); return UPNP_E_SUCCESS; } char *resolve_rel_url(char *base_url, char *rel_url) { uri_type base; uri_type rel; char temp_path = '/'; size_t i = 0; char *finger = NULL; char *last_slash = NULL; char *out = NULL; char *out_finger = NULL; if( base_url && rel_url ) { out = ( char * )malloc( strlen( base_url ) + strlen( rel_url ) + 2 ); out_finger = out; } else { if( rel_url ) return strdup( rel_url ); else return NULL; } if( out == NULL ) { return NULL; } if( ( parse_uri( rel_url, strlen( rel_url ), &rel ) ) == HTTP_SUCCESS ) { if( rel.type == ABSOLUTE ) { strcpy( out, rel_url ); } else { if( ( parse_uri( base_url, strlen( base_url ), &base ) == HTTP_SUCCESS ) && ( base.type == ABSOLUTE ) ) { if( strlen( rel_url ) == 0 ) { strcpy( out, base_url ); } else { memcpy( out, base.scheme.buff, base.scheme.size ); out_finger += base.scheme.size; ( *out_finger ) = ':'; out_finger++; if( rel.hostport.text.size > 0 ) { sprintf( out_finger, "%s", rel_url ); } else { if( base.hostport.text.size > 0 ) { memcpy( out_finger, "//", 2 ); out_finger += 2; memcpy( out_finger, base.hostport.text.buff, base.hostport.text.size ); out_finger += base.hostport.text.size; } if( rel.path_type == ABS_PATH ) { strcpy( out_finger, rel_url ); } else { if( base.pathquery.size == 0 ) { base.pathquery.size = 1; base.pathquery.buff = &temp_path; } finger = out_finger; last_slash = finger; i = 0; while( ( i < base.pathquery.size ) && ( base.pathquery.buff[i] != '?' ) ) { ( *finger ) = base.pathquery.buff[i]; if( base.pathquery.buff[i] == '/' ) last_slash = finger + 1; i++; finger++; } strcpy( last_slash, rel_url ); if( remove_dots( out_finger, strlen( out_finger ) ) != UPNP_E_SUCCESS ) { free(out); /* free(rel_url); */ return NULL; } } } } } else { free(out); /* free(rel_url); */ return NULL; } } } else { free(out); /* free(rel_url); */ return NULL; } /* free(rel_url); */ return out; } int parse_uri(const char *in, size_t max, uri_type *out) { int begin_path = 0; size_t begin_hostport = 0; size_t begin_fragment = 0; begin_hostport = parse_scheme(in, max, &out->scheme); if (begin_hostport) { out->type = ABSOLUTE; out->path_type = OPAQUE_PART; begin_hostport++; } else { out->type = RELATIVE; out->path_type = REL_PATH; } if (begin_hostport + 1 < max && in[begin_hostport] == '/' && in[begin_hostport + 1] == '/') { begin_hostport += 2; begin_path = parse_hostport(&in[begin_hostport], max - begin_hostport, &out->hostport); if (begin_path >= 0) { begin_path += (int)begin_hostport; } else return begin_path; } else { memset(&out->hostport, 0, sizeof(out->hostport)); begin_path = (int)begin_hostport; } begin_fragment = parse_uric(&in[begin_path], max - (size_t)begin_path, &out->pathquery) + (size_t)begin_path; if (out->pathquery.size && out->pathquery.buff[0] == '/') { out->path_type = ABS_PATH; } if (begin_fragment < max && in[begin_fragment] == '#') { begin_fragment++; parse_uric(&in[begin_fragment], max - begin_fragment, &out->fragment); } else { out->fragment.buff = NULL; out->fragment.size = 0; } return HTTP_SUCCESS; } int parse_uri_and_unescape(char *in, size_t max, uri_type *out) { int ret = parse_uri(in, max, out); if (ret != HTTP_SUCCESS) { return ret; } if (out->pathquery.size > 0) { remove_escaped_chars((char *)out->pathquery.buff, &out->pathquery.size); } if (out->fragment.size > 0) { remove_escaped_chars((char *)out->fragment.buff, &out->fragment.size); } return HTTP_SUCCESS; }