// // URI.cpp // // $Id: //poco/1.4/Foundation/src/URI.cpp#5 $ // // Library: Foundation // Package: URI // Module: URI // // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. // and Contributors. // // SPDX-License-Identifier: BSL-1.0 // #include "Poco/URI.h" #include "Poco/NumberFormatter.h" #include "Poco/Exception.h" #include "Poco/String.h" #include "Poco/NumberParser.h" namespace Poco { const std::string URI::RESERVED_PATH = "?#"; const std::string URI::RESERVED_QUERY = "#"; const std::string URI::RESERVED_FRAGMENT = ""; const std::string URI::ILLEGAL = "%<>{}|\\\"^`"; URI::URI(): _port(0) { } URI::URI(const std::string& uri): _port(0) { parse(uri); } URI::URI(const char* uri): _port(0) { parse(std::string(uri)); } URI::URI(const std::string& scheme, const std::string& pathEtc): _scheme(scheme), _port(0) { toLowerInPlace(_scheme); _port = getWellKnownPort(); std::string::const_iterator beg = pathEtc.begin(); std::string::const_iterator end = pathEtc.end(); parsePathEtc(beg, end); } URI::URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc): _scheme(scheme) { toLowerInPlace(_scheme); std::string::const_iterator beg = authority.begin(); std::string::const_iterator end = authority.end(); parseAuthority(beg, end); beg = pathEtc.begin(); end = pathEtc.end(); parsePathEtc(beg, end); } URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query): _scheme(scheme), _path(path), _query(query) { toLowerInPlace(_scheme); std::string::const_iterator beg = authority.begin(); std::string::const_iterator end = authority.end(); parseAuthority(beg, end); } URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment): _scheme(scheme), _path(path), _query(query), _fragment(fragment) { toLowerInPlace(_scheme); std::string::const_iterator beg = authority.begin(); std::string::const_iterator end = authority.end(); parseAuthority(beg, end); } URI::URI(const URI& uri): _scheme(uri._scheme), _userInfo(uri._userInfo), _host(uri._host), _port(uri._port), _path(uri._path), _query(uri._query), _fragment(uri._fragment) { } URI::URI(const URI& baseURI, const std::string& relativeURI): _scheme(baseURI._scheme), _userInfo(baseURI._userInfo), _host(baseURI._host), _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), _fragment(baseURI._fragment) { resolve(relativeURI); } URI::~URI() { } URI& URI::operator = (const URI& uri) { if (&uri != this) { _scheme = uri._scheme; _userInfo = uri._userInfo; _host = uri._host; _port = uri._port; _path = uri._path; _query = uri._query; _fragment = uri._fragment; } return *this; } URI& URI::operator = (const std::string& uri) { clear(); parse(uri); return *this; } URI& URI::operator = (const char* uri) { clear(); parse(std::string(uri)); return *this; } void URI::swap(URI& uri) { std::swap(_scheme, uri._scheme); std::swap(_userInfo, uri._userInfo); std::swap(_host, uri._host); std::swap(_port, uri._port); std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); } void URI::clear() { _scheme.clear(); _userInfo.clear(); _host.clear(); _port = 0; _path.clear(); _query.clear(); _fragment.clear(); } std::string URI::toString() const { std::string uri; if (isRelative()) { encode(_path, RESERVED_PATH, uri); } else { uri = _scheme; uri += ':'; std::string auth = getAuthority(); if (!auth.empty() || _scheme == "file") { uri.append("//"); uri.append(auth); } if (!_path.empty()) { if (!auth.empty() && _path[0] != '/') uri += '/'; encode(_path, RESERVED_PATH, uri); } else if (!_query.empty() || !_fragment.empty()) { uri += '/'; } } if (!_query.empty()) { uri += '?'; uri.append(_query); } if (!_fragment.empty()) { uri += '#'; encode(_fragment, RESERVED_FRAGMENT, uri); } return uri; } void URI::setScheme(const std::string& scheme) { _scheme = scheme; toLowerInPlace(_scheme); if (_port == 0) _port = getWellKnownPort(); } void URI::setUserInfo(const std::string& userInfo) { _userInfo.clear(); decode(userInfo, _userInfo); } void URI::setHost(const std::string& host) { _host = host; } unsigned short URI::getPort() const { if (_port == 0) return getWellKnownPort(); else return _port; } void URI::setPort(unsigned short port) { _port = port; } std::string URI::getAuthority() const { std::string auth; if (!_userInfo.empty()) { auth.append(_userInfo); auth += '@'; } if (_host.find(':') != std::string::npos) { auth += '['; auth += _host; auth += ']'; } else auth.append(_host); if (_port && !isWellKnownPort()) { auth += ':'; NumberFormatter::append(auth, _port); } return auth; } void URI::setAuthority(const std::string& authority) { _userInfo.clear(); _host.clear(); _port = 0; std::string::const_iterator beg = authority.begin(); std::string::const_iterator end = authority.end(); parseAuthority(beg, end); } void URI::setPath(const std::string& path) { _path.clear(); decode(path, _path); } void URI::setRawQuery(const std::string& query) { _query = query; } void URI::setQuery(const std::string& query) { _query.clear(); encode(query, RESERVED_QUERY, _query); } void URI::addQueryParameter(const std::string& param, const std::string& val) { std::string reserved(RESERVED_QUERY); reserved += "=&"; if (!_query.empty()) _query.append(1, '&'); encode(param, reserved, _query); _query.append(1, '='); encode(val, reserved, _query); } std::string URI::getQuery() const { std::string query; decode(_query, query); return query; } void URI::setFragment(const std::string& fragment) { _fragment.clear(); decode(fragment, _fragment); } void URI::setPathEtc(const std::string& pathEtc) { _path.clear(); _query.clear(); _fragment.clear(); std::string::const_iterator beg = pathEtc.begin(); std::string::const_iterator end = pathEtc.end(); parsePathEtc(beg, end); } std::string URI::getPathEtc() const { std::string pathEtc; encode(_path, RESERVED_PATH, pathEtc); if (!_query.empty()) { pathEtc += '?'; pathEtc += _query; } if (!_fragment.empty()) { pathEtc += '#'; encode(_fragment, RESERVED_FRAGMENT, pathEtc); } return pathEtc; } std::string URI::getPathAndQuery() const { std::string pathAndQuery; encode(_path, RESERVED_PATH, pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; pathAndQuery += _query; } return pathAndQuery; } void URI::resolve(const std::string& relativeURI) { URI parsedURI(relativeURI); resolve(parsedURI); } void URI::resolve(const URI& relativeURI) { if (!relativeURI._scheme.empty()) { _scheme = relativeURI._scheme; _userInfo = relativeURI._userInfo; _host = relativeURI._host; _port = relativeURI._port; _path = relativeURI._path; _query = relativeURI._query; removeDotSegments(); } else { if (!relativeURI._host.empty()) { _userInfo = relativeURI._userInfo; _host = relativeURI._host; _port = relativeURI._port; _path = relativeURI._path; _query = relativeURI._query; removeDotSegments(); } else { if (relativeURI._path.empty()) { if (!relativeURI._query.empty()) _query = relativeURI._query; } else { if (relativeURI._path[0] == '/') { _path = relativeURI._path; removeDotSegments(); } else { mergePath(relativeURI._path); } _query = relativeURI._query; } } } _fragment = relativeURI._fragment; } bool URI::isRelative() const { return _scheme.empty(); } bool URI::empty() const { return _scheme.empty() && _host.empty() && _path.empty() && _query.empty() && _fragment.empty(); } bool URI::operator == (const URI& uri) const { return equals(uri); } bool URI::operator == (const std::string& uri) const { URI parsedURI(uri); return equals(parsedURI); } bool URI::operator != (const URI& uri) const { return !equals(uri); } bool URI::operator != (const std::string& uri) const { URI parsedURI(uri); return !equals(parsedURI); } bool URI::equals(const URI& uri) const { return _scheme == uri._scheme && _userInfo == uri._userInfo && _host == uri._host && getPort() == uri.getPort() && _path == uri._path && _query == uri._query && _fragment == uri._fragment; } void URI::normalize() { removeDotSegments(!isRelative()); } void URI::removeDotSegments(bool removeLeading) { if (_path.empty()) return; bool leadingSlash = *(_path.begin()) == '/'; bool trailingSlash = *(_path.rbegin()) == '/'; std::vector segments; std::vector normalizedSegments; getPathSegments(segments); for (std::vector::const_iterator it = segments.begin(); it != segments.end(); ++it) { if (*it == "..") { if (!normalizedSegments.empty()) { if (normalizedSegments.back() == "..") normalizedSegments.push_back(*it); else normalizedSegments.pop_back(); } else if (!removeLeading) { normalizedSegments.push_back(*it); } } else if (*it != ".") { normalizedSegments.push_back(*it); } } buildPath(normalizedSegments, leadingSlash, trailingSlash); } void URI::getPathSegments(std::vector& segments) { getPathSegments(_path, segments); } void URI::getPathSegments(const std::string& path, std::vector& segments) { std::string::const_iterator it = path.begin(); std::string::const_iterator end = path.end(); std::string seg; while (it != end) { if (*it == '/') { if (!seg.empty()) { segments.push_back(seg); seg.clear(); } } else seg += *it; ++it; } if (!seg.empty()) segments.push_back(seg); } void URI::encode(const std::string& str, const std::string& reserved, std::string& encodedStr) { for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { char c = *it; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' || c == '~') { encodedStr += c; } else if (c <= 0x20 || c >= 0x7F || ILLEGAL.find(c) != std::string::npos || reserved.find(c) != std::string::npos) { encodedStr += '%'; encodedStr += NumberFormatter::formatHex((unsigned) (unsigned char) c, 2); } else encodedStr += c; } } void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpace) { bool inQuery = false; std::string::const_iterator it = str.begin(); std::string::const_iterator end = str.end(); while (it != end) { char c = *it++; if (c == '?') inQuery = true; // spaces may be encoded as plus signs in the query if (inQuery && plusAsSpace && c == '+') c = ' '; else if (c == '%') { if (it == end) throw SyntaxException("URI encoding: no hex digit following percent sign", str); char hi = *it++; if (it == end) throw SyntaxException("URI encoding: two hex digits must follow percent sign", str); char lo = *it++; if (hi >= '0' && hi <= '9') c = hi - '0'; else if (hi >= 'A' && hi <= 'F') c = hi - 'A' + 10; else if (hi >= 'a' && hi <= 'f') c = hi - 'a' + 10; else throw SyntaxException("URI encoding: not a hex digit"); c *= 16; if (lo >= '0' && lo <= '9') c += lo - '0'; else if (lo >= 'A' && lo <= 'F') c += lo - 'A' + 10; else if (lo >= 'a' && lo <= 'f') c += lo - 'a' + 10; else throw SyntaxException("URI encoding: not a hex digit"); } decodedStr += c; } } bool URI::isWellKnownPort() const { return _port == getWellKnownPort(); } unsigned short URI::getWellKnownPort() const { if (_scheme == "ftp") return 21; else if (_scheme == "ssh") return 22; else if (_scheme == "telnet") return 23; else if (_scheme == "http") return 80; else if (_scheme == "nntp") return 119; else if (_scheme == "ldap") return 389; else if (_scheme == "https") return 443; else if (_scheme == "rtsp") return 554; else if (_scheme == "sip") return 5060; else if (_scheme == "sips") return 5061; else if (_scheme == "xmpp") return 5222; else return 0; } void URI::parse(const std::string& uri) { std::string::const_iterator it = uri.begin(); std::string::const_iterator end = uri.end(); if (it == end) return; if (*it != '/' && *it != '.' && *it != '?' && *it != '#') { std::string scheme; while (it != end && *it != ':' && *it != '?' && *it != '#' && *it != '/') scheme += *it++; if (it != end && *it == ':') { ++it; if (it == end) throw SyntaxException("URI scheme must be followed by authority or path", uri); setScheme(scheme); if (*it == '/') { ++it; if (it != end && *it == '/') { ++it; parseAuthority(it, end); } else --it; } parsePathEtc(it, end); } else { it = uri.begin(); parsePathEtc(it, end); } } else parsePathEtc(it, end); } void URI::parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end) { std::string userInfo; std::string part; while (it != end && *it != '/' && *it != '?' && *it != '#') { if (*it == '@') { userInfo = part; part.clear(); } else part += *it; ++it; } std::string::const_iterator pbeg = part.begin(); std::string::const_iterator pend = part.end(); parseHostAndPort(pbeg, pend); _userInfo = userInfo; } void URI::parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end) { if (it == end) return; std::string host; if (*it == '[') { // IPv6 address ++it; while (it != end && *it != ']') host += *it++; if (it == end) throw SyntaxException("unterminated IPv6 address"); ++it; } else { while (it != end && *it != ':') host += *it++; } if (it != end && *it == ':') { ++it; std::string port; while (it != end) port += *it++; if (!port.empty()) { int nport = 0; if (NumberParser::tryParse(port, nport) && nport > 0 && nport < 65536) _port = (unsigned short) nport; else throw SyntaxException("bad or invalid port number", port); } else _port = getWellKnownPort(); } else _port = getWellKnownPort(); _host = host; toLowerInPlace(_host); } void URI::parsePath(std::string::const_iterator& it, const std::string::const_iterator& end) { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; decode(path, _path); } void URI::parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end) { if (it == end) return; if (*it != '?' && *it != '#') parsePath(it, end); if (it != end && *it == '?') { ++it; parseQuery(it, end); } if (it != end && *it == '#') { ++it; parseFragment(it, end); } } void URI::parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end) { _query.clear(); while (it != end && *it != '#') _query += *it++; } void URI::parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end) { std::string fragment; while (it != end) fragment += *it++; decode(fragment, _fragment); } void URI::mergePath(const std::string& path) { std::vector segments; std::vector normalizedSegments; bool addLeadingSlash = false; if (!_path.empty()) { getPathSegments(segments); bool endsWithSlash = *(_path.rbegin()) == '/'; if (!endsWithSlash && !segments.empty()) segments.pop_back(); addLeadingSlash = _path[0] == '/'; } getPathSegments(path, segments); addLeadingSlash = addLeadingSlash || (!path.empty() && path[0] == '/'); bool hasTrailingSlash = (!path.empty() && *(path.rbegin()) == '/'); bool addTrailingSlash = false; for (std::vector::const_iterator it = segments.begin(); it != segments.end(); ++it) { if (*it == "..") { addTrailingSlash = true; if (!normalizedSegments.empty()) normalizedSegments.pop_back(); } else if (*it != ".") { addTrailingSlash = false; normalizedSegments.push_back(*it); } else addTrailingSlash = true; } buildPath(normalizedSegments, addLeadingSlash, hasTrailingSlash || addTrailingSlash); } void URI::buildPath(const std::vector& segments, bool leadingSlash, bool trailingSlash) { _path.clear(); bool first = true; for (std::vector::const_iterator it = segments.begin(); it != segments.end(); ++it) { if (first) { first = false; if (leadingSlash) _path += '/'; else if (_scheme.empty() && (*it).find(':') != std::string::npos) _path.append("./"); } else _path += '/'; _path.append(*it); } if (trailingSlash) _path += '/'; } } // namespace Poco