diff options
author | Seonah Moon <seonah1.moon@samsung.com> | 2022-01-12 13:27:51 +0900 |
---|---|---|
committer | Seonah Moon <seonah1.moon@samsung.com> | 2022-01-12 13:28:56 +0900 |
commit | 58f7e70ff869dee66c90b1323e0d5c9f324067b8 (patch) | |
tree | 31060230fe108ac26a3258e1b4fb22454c7cf01c /lib/urlapi.c | |
parent | b31894b8fe11ff4cfe97bd63eb4e62e14893c749 (diff) | |
download | curl-58f7e70ff869dee66c90b1323e0d5c9f324067b8.tar.gz curl-58f7e70ff869dee66c90b1323e0d5c9f324067b8.tar.bz2 curl-58f7e70ff869dee66c90b1323e0d5c9f324067b8.zip |
Imported Upstream version 7.81.0upstream/7.81.0
Change-Id: Ibe8b1de1691e326fbd2a8133470758ab942cbd97
Diffstat (limited to 'lib/urlapi.c')
-rw-r--r-- | lib/urlapi.c | 555 |
1 files changed, 391 insertions, 164 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c index 88b7f042f..d29aeb238 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -5,11 +5,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al. + * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -30,6 +30,7 @@ #include "escape.h" #include "curl_ctype.h" #include "inet_pton.h" +#include "inet_ntop.h" /* The last 3 #include files should be in this order */ #include "curl_printf.h" @@ -50,6 +51,9 @@ ((str)[1] == ':' || (str)[1] == '|') && \ ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) +/* scheme is not URL encoded, the longest libcurl supported ones are... */ +#define MAX_SCHEME_LEN 40 + /* Internal representation of CURLU. Point to URL-encoded strings. */ struct Curl_URL { char *scheme; @@ -131,7 +135,7 @@ static const char *find_host_sep(const char *url) */ static bool urlchar_needs_escaping(int c) { - return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); + return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); } /* @@ -157,23 +161,23 @@ static size_t strlen_url(const char *url, bool relative) continue; } - switch(*ptr) { - case '?': - left = FALSE; - /* FALLTHROUGH */ - default: - if(urlchar_needs_escaping(*ptr)) - newlen += 2; - newlen++; - break; - case ' ': + if(*ptr == ' ') { if(left) newlen += 3; else newlen++; - break; + continue; } + + if (*ptr == '?') + left = FALSE; + + if(urlchar_needs_escaping(*ptr)) + newlen += 2; + + newlen++; } + return newlen; } @@ -202,19 +206,7 @@ static void strcpy_url(char *output, const char *url, bool relative) continue; } - switch(*iptr) { - case '?': - left = FALSE; - /* FALLTHROUGH */ - default: - if(urlchar_needs_escaping(*iptr)) { - msnprintf(optr, 4, "%%%02x", *iptr); - optr += 3; - } - else - *optr++=*iptr; - break; - case ' ': + if(*iptr == ' ') { if(left) { *optr++='%'; /* add a '%' */ *optr++='2'; /* add a '2' */ @@ -222,41 +214,58 @@ static void strcpy_url(char *output, const char *url, bool relative) } else *optr++='+'; /* add a '+' here */ - break; + continue; + } + + if(*iptr == '?') + left = FALSE; + + if(urlchar_needs_escaping(*iptr)) { + msnprintf(optr, 4, "%%%02x", *iptr); + optr += 3; } + else + *optr++ = *iptr; } *optr = 0; /* null-terminate output buffer */ } /* - * Returns true if the given URL is absolute (as opposed to relative) within - * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is - * non-NULL. + * Returns true if the given URL is absolute (as opposed to relative). Returns + * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must + * be larger than MAX_SCHEME_LEN if buf is set. */ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) { size_t i; + DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); + (void)buflen; /* only used in debug-builds */ + if(buf) + buf[0] = 0; /* always leave a defined value in buf */ #ifdef WIN32 if(STARTS_WITH_DRIVE_PREFIX(url)) return FALSE; #endif - for(i = 0; i < buflen && url[i]; ++i) { + for(i = 0; i < MAX_SCHEME_LEN; ++i) { char s = url[i]; - if((s == ':') && (url[i + 1] == '/')) { - if(buf) - buf[i] = 0; - return TRUE; + if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) { + /* RFC 3986 3.1 explains: + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + */ } - /* RFC 3986 3.1 explains: - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - */ - else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) { - if(buf) - buf[i] = (char)TOLOWER(s); - } - else + else { break; + } + } + if(i && (url[i] == ':') && (url[i + 1] == '/')) { + if(buf) { + buf[i] = 0; + while(i--) { + buf[i] = (char)TOLOWER(url[i]); + } + } + return TRUE; } return FALSE; } @@ -420,6 +429,29 @@ static char *concat_url(const char *base, const char *relurl) return newest; } +/* scan for byte values < 31 or 127 */ +static bool junkscan(const char *part, unsigned int flags) +{ + if(part) { + static const char badbytes[]={ + /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x7f, 0x00 /* null-terminate */ + }; + size_t n = strlen(part); + size_t nfine = strcspn(part, badbytes); + if(nfine != n) + /* since we don't know which part is scanned, return a generic error + code */ + return TRUE; + if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' ')) + return TRUE; + } + return FALSE; +} + /* * parse_hostname_login() * @@ -467,7 +499,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, (h && (h->flags & PROTOPT_URLOPTIONS)) ? &optionsp:NULL); if(ccode) { - result = CURLUE_MALFORMED_INPUT; + result = CURLUE_BAD_LOGIN; goto out; } @@ -477,15 +509,28 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, result = CURLUE_USER_NOT_ALLOWED; goto out; } - + if(junkscan(userp, flags)) { + result = CURLUE_BAD_USER; + goto out; + } u->user = userp; } - if(passwdp) + if(passwdp) { + if(junkscan(passwdp, flags)) { + result = CURLUE_BAD_PASSWORD; + goto out; + } u->password = passwdp; + } - if(optionsp) + if(optionsp) { + if(junkscan(optionsp, flags)) { + result = CURLUE_BAD_LOGIN; + goto out; + } u->options = optionsp; + } return CURLUE_OK; out: @@ -493,11 +538,15 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, free(userp); free(passwdp); free(optionsp); + u->user = NULL; + u->password = NULL; + u->options = NULL; return result; } -UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) +UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, + bool has_scheme) { char *portptr = NULL; char endbracket; @@ -515,19 +564,19 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) int zonelen = len; if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) { if(']' != endbracket) - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; portptr = &hostname[--zonelen + len + 1]; } else - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; } else - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; /* this is a RFC2732-style specified IP-address */ if(portptr && *portptr) { if(*portptr != ':') - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; } else portptr = NULL; @@ -542,10 +591,14 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) /* Browser behavior adaptation. If there's a colon with no digits after, just cut off the name there which makes us ignore the colon and just - use the default port. Firefox, Chrome and Safari all do that. */ + use the default port. Firefox, Chrome and Safari all do that. + + Don't do it if the URL has no scheme, to make something that looks like + a scheme not work! + */ if(!portptr[1]) { *portptr = '\0'; - return CURLUE_OK; + return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER; } if(!ISDIGIT(portptr[1])) @@ -553,9 +606,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ - if((port <= 0) || (port > 0xffff)) - /* Single unix standard says port numbers are 16 bits long, but we don't - treat port zero as OK. */ + if(port > 0xffff) return CURLUE_BAD_PORT_NUMBER; if(rest[0]) @@ -574,45 +625,20 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) return CURLUE_OK; } -/* scan for byte values < 31 or 127 */ -static CURLUcode junkscan(const char *part) -{ - if(part) { - static const char badbytes[]={ - /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x7f, - 0x00 /* null-terminate */ - }; - size_t n = strlen(part); - size_t nfine = strcspn(part, badbytes); - if(nfine != n) - /* since we don't know which part is scanned, return a generic error - code */ - return CURLUE_MALFORMED_INPUT; - } - return CURLUE_OK; -} - static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) { size_t len; size_t hlen = strlen(hostname); if(hostname[0] == '[') { -#ifdef ENABLE_IPV6 - char dest[16]; /* fits a binary IPv6 address */ -#endif const char *l = "0123456789abcdefABCDEF:."; if(hlen < 4) /* '[::]' is the shortest possible valid string */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; hostname++; hlen -= 2; if(hostname[hlen] != ']') - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; /* only valid letters are ok */ len = strspn(hostname, l); @@ -629,6 +655,7 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) while(*h && (*h != ']') && (i < 15)) zoneid[i++] = *h++; if(!i || (']' != *h)) + /* impossible to reach? */ return CURLUE_MALFORMED_INPUT; zoneid[i] = 0; u->zoneid = strdup(zoneid); @@ -638,22 +665,34 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) hostname[len + 1] = 0; /* terminate the hostname */ } else - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; /* hostname is fine */ } #ifdef ENABLE_IPV6 - hostname[hlen] = 0; /* end the address there */ - if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) - return CURLUE_MALFORMED_INPUT; - hostname[hlen] = ']'; /* restore ending bracket */ + { + char dest[16]; /* fits a binary IPv6 address */ + char norm[MAX_IPADR_LEN]; + hostname[hlen] = 0; /* end the address there */ + if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) + return CURLUE_BAD_IPV6; + + /* check if it can be done shorter */ + if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) && + (strlen(norm) < hlen)) { + strcpy(hostname, norm); + hlen = strlen(norm); + hostname[hlen + 1] = 0; + } + hostname[hlen] = ']'; /* restore ending bracket */ + } #endif } else { /* letters from the second string is not ok */ - len = strcspn(hostname, " "); + len = strcspn(hostname, " \r\n"); if(hlen != len) /* hostname with bad content */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_HOSTNAME; } if(!hostname[0]) return CURLUE_NO_HOST; @@ -662,10 +701,123 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) +/* + * Handle partial IPv4 numerical addresses and different bases, like + * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc. + * + * If the given input string is syntactically wrong or any part for example is + * too big, this function returns FALSE and doesn't create any output. + * + * Output the "normalized" version of that input string in plain quad decimal + * integers and return TRUE. + */ +static bool ipv4_normalize(const char *hostname, char *outp, size_t olen) +{ + bool done = FALSE; + int n = 0; + const char *c = hostname; + unsigned long parts[4] = {0, 0, 0, 0}; + + while(!done) { + char *endp; + unsigned long l; + if((*c < '0') || (*c > '9')) + /* most importantly this doesn't allow a leading plus or minus */ + return FALSE; + l = strtoul(c, &endp, 0); + + /* overflow or nothing parsed at all */ + if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c)) + return FALSE; + +#if SIZEOF_LONG > 4 + /* a value larger than 32 bits */ + if(l > UINT_MAX) + return FALSE; +#endif + + parts[n] = l; + c = endp; + + switch (*c) { + case '.' : + if(n == 3) + return FALSE; + n++; + c++; + break; + + case '\0': + done = TRUE; + break; + + default: + return FALSE; + } + } + + /* this is deemed a valid IPv4 numerical address */ + + switch(n) { + case 0: /* a -- 32 bits */ + msnprintf(outp, olen, "%u.%u.%u.%u", + parts[0] >> 24, (parts[0] >> 16) & 0xff, + (parts[0] >> 8) & 0xff, parts[0] & 0xff); + break; + case 1: /* a.b -- 8.24 bits */ + if((parts[0] > 0xff) || (parts[1] > 0xffffff)) + return FALSE; + msnprintf(outp, olen, "%u.%u.%u.%u", + parts[0], (parts[1] >> 16) & 0xff, + (parts[1] >> 8) & 0xff, parts[1] & 0xff); + break; + case 2: /* a.b.c -- 8.8.16 bits */ + if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff)) + return FALSE; + msnprintf(outp, olen, "%u.%u.%u.%u", + parts[0], parts[1], (parts[2] >> 8) & 0xff, + parts[2] & 0xff); + break; + case 3: /* a.b.c.d -- 8.8.8.8 bits */ + if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) || + (parts[3] > 0xff)) + return FALSE; + msnprintf(outp, olen, "%u.%u.%u.%u", + parts[0], parts[1], parts[2], parts[3]); + break; + } + return TRUE; +} + +/* return strdup'ed version in 'outp', possibly percent decoded */ +static CURLUcode decode_host(char *hostname, char **outp) +{ + char *per = NULL; + if(hostname[0] != '[') + /* only decode if not an ipv6 numerical */ + per = strchr(hostname, '%'); + if(!per) { + *outp = strdup(hostname); + if(!*outp) + return CURLUE_OUT_OF_MEMORY; + } + else { + /* might be encoded */ + size_t dlen; + CURLcode result = Curl_urldecode(NULL, hostname, 0, + outp, &dlen, REJECT_CTRL); + if(result) + return CURLUE_BAD_HOSTNAME; + } + + return CURLUE_OK; +} + static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) { char *path; bool path_alloced = FALSE; + bool uncpath = FALSE; char *hostname; char *query = NULL; char *fragment = NULL; @@ -676,8 +828,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) size_t schemelen = 0; size_t urllen; - if(!url) - return CURLUE_MALFORMED_INPUT; + DEBUGASSERT(url); /************************************************************* * Parse the URL. @@ -701,11 +852,14 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } /* handle the file: scheme */ - if(url_has_scheme && strcasecompare(schemebuf, "file")) { + if(url_has_scheme && !strcmp(schemebuf, "file")) { + if(urllen <= 6) + /* file:/ is not enough to actually be a complete file: URL */ + return CURLUE_BAD_FILE_URL; + /* path has been allocated large enough to hold this */ strcpy(path, &url[5]); - hostname = NULL; /* no host for file: URLs */ u->scheme = strdup("file"); if(!u->scheme) return CURLUE_OUT_OF_MEMORY; @@ -727,10 +881,13 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) * * o the hostname matches "localhost" (case-insensitively), or * - * o the hostname is a FQDN that resolves to this machine. + * o the hostname is a FQDN that resolves to this machine, or + * + * o it is an UNC String transformed to an URI (Windows only, RFC 8089 + * Appendix E.3). * * For brevity, we only consider URLs with empty, "localhost", or - * "127.0.0.1" hostnames as local. + * "127.0.0.1" hostnames as local, otherwise as an UNC String. * * Additionally, there is an exception for URLs with a Windows drive * letter in the authority (which was accidentally omitted from RFC 8089 @@ -739,25 +896,50 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { /* the URL includes a host name, it must match "localhost" or "127.0.0.1" to be valid */ - if(!checkprefix("localhost/", ptr) && - !checkprefix("127.0.0.1/", ptr)) { + if(checkprefix("localhost/", ptr) || + checkprefix("127.0.0.1/", ptr)) { + ptr += 9; /* now points to the slash after the host */ + } + else { +#if defined(WIN32) + size_t len; + + /* the host name, NetBIOS computer name, can not contain disallowed + chars, and the delimiting slash character must be appended to the + host name */ + path = strpbrk(ptr, "/\\:*?\"<>|"); + if(!path || *path != '/') + return CURLUE_BAD_FILE_URL; + + len = path - ptr; + if(len) { + memcpy(hostname, ptr, len); + hostname[len] = 0; + uncpath = TRUE; + } + + ptr -= 2; /* now points to the // before the host in UNC */ +#else /* Invalid file://hostname/, expected localhost or 127.0.0.1 or none */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_FILE_URL; +#endif } - ptr += 9; /* now points to the slash after the host */ } path = ptr; } + if(!uncpath) + hostname = NULL; /* no host for file: URLs by default */ + #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) /* Don't allow Windows drive letters when not in Windows. * This catches both "file:/c:" and "file:c:" */ if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || STARTS_WITH_URL_DRIVE_PREFIX(path)) { /* File drive letters are only accepted in MSDOS/Windows */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_FILE_URL; } #else /* If the path starts with a slash and a drive letter, ditch the slash */ @@ -784,22 +966,21 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } if((i < 1) || (i>3)) /* less than one or more than three slashes */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_SLASHES; schemep = schemebuf; if(!Curl_builtin_scheme(schemep) && !(flags & CURLU_NON_SUPPORT_SCHEME)) return CURLUE_UNSUPPORTED_SCHEME; - if(junkscan(schemep)) - return CURLUE_MALFORMED_INPUT; - + if(junkscan(schemep, flags)) + return CURLUE_BAD_SCHEME; } else { /* no scheme! */ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_SCHEME; if(flags & CURLU_DEFAULT_SCHEME) schemep = DEFAULT_SCHEME; @@ -810,7 +991,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } hostp = p; /* host name starts here */ - while(*p && !HOSTNAME_END(*p)) /* find end of host name */ + /* find the end of the host name + port number */ + while(*p && !HOSTNAME_END(*p)) p++; len = p - hostp; @@ -820,7 +1002,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } else { if(!(flags & CURLU_NO_AUTHORITY)) - return CURLUE_MALFORMED_INPUT; + return CURLUE_NO_HOST; } len = strlen(p); @@ -834,9 +1016,6 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } } - if(junkscan(path)) - return CURLUE_MALFORMED_INPUT; - if((flags & CURLU_URLENCODE) && path[0]) { /* worst case output length is 3x the original! */ char *newp = malloc(strlen(path) * 3); @@ -850,6 +1029,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) fragment = strchr(path, '#'); if(fragment) { *fragment++ = 0; + if(junkscan(fragment, flags)) + return CURLUE_BAD_FRAGMENT; if(fragment[0]) { u->fragment = strdup(fragment); if(!u->fragment) @@ -860,12 +1041,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) query = strchr(path, '?'); if(query) { *query++ = 0; + if(junkscan(query, flags)) + return CURLUE_BAD_QUERY; /* done even if the query part is a blank string */ u->query = strdup(query); if(!u->query) return CURLUE_OUT_OF_MEMORY; } + if(junkscan(path, flags)) + return CURLUE_BAD_PATH; + if(!path[0]) /* if there's no path left set, unset */ path = NULL; @@ -894,33 +1080,40 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } if(hostname) { + char normalized_ipv4[sizeof("255.255.255.255") + 1]; + /* * Parse the login details and strip them out of the host name. */ - if(junkscan(hostname)) - return CURLUE_MALFORMED_INPUT; - result = parse_hostname_login(u, &hostname, flags); if(result) return result; - result = Curl_parse_port(u, hostname); + result = Curl_parse_port(u, hostname, url_has_scheme); if(result) return result; + if(junkscan(hostname, flags)) + return CURLUE_BAD_HOSTNAME; + if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) { /* Skip hostname check, it's allowed to be empty. */ + u->host = strdup(""); } else { - result = hostname_check(u, hostname); - if(result) - return result; + if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4))) + u->host = strdup(normalized_ipv4); + else { + result = decode_host(hostname, &u->host); + if(result) + return result; + result = hostname_check(u, u->host); + if(result) + return result; + } } - - u->host = strdup(hostname); if(!u->host) return CURLUE_OUT_OF_MEMORY; - if((flags & CURLU_GUESS_SCHEME) && !schemep) { /* legacy curl-style guess based on host name */ if(checkprefix("ftp.", hostname)) @@ -978,12 +1171,14 @@ void curl_url_cleanup(CURLU *u) } } -#define DUP(dest, src, name) \ - if(src->name) { \ - dest->name = strdup(src->name); \ - if(!dest->name) \ - goto fail; \ - } +#define DUP(dest, src, name) \ + do { \ + if(src->name) { \ + dest->name = strdup(src->name); \ + if(!dest->name) \ + goto fail; \ + } \ + } while(0) CURLU *curl_url_dup(CURLU *in) { @@ -1013,6 +1208,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, CURLUcode ifmissing = CURLUE_UNKNOWN_PART; char portbuf[7]; bool urldecode = (flags & CURLU_URLDECODE)?1:0; + bool urlencode = (flags & CURLU_URLENCODE)?1:0; bool plusdecode = FALSE; (void)flags; if(!u) @@ -1045,6 +1241,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, break; case CURLUPART_ZONEID: ptr = u->zoneid; + ifmissing = CURLUE_NO_ZONEID; break; case CURLUPART_PORT: ptr = u->port; @@ -1056,7 +1253,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, const struct Curl_handler *h = Curl_builtin_scheme(u->scheme); if(h) { - msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); + msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); ptr = portbuf; } } @@ -1115,7 +1312,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, /* there's no stored port number, but asked to deliver a default one for the scheme */ if(h) { - msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); + msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); port = portbuf; } } @@ -1130,16 +1327,54 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, if(h && !(h->flags & PROTOPT_URLOPTIONS)) options = NULL; - if((u->host[0] == '[') && u->zoneid) { - /* make it '[ host %25 zoneid ]' */ - size_t hostlen = strlen(u->host); - size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; - allochost = malloc(alen); + if(u->host[0] == '[') { + if(u->zoneid) { + /* make it '[ host %25 zoneid ]' */ + size_t hostlen = strlen(u->host); + size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; + allochost = malloc(alen); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + memcpy(allochost, u->host, hostlen - 1); + msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, + "%%25%s]", u->zoneid); + } + } + else if(urlencode) { + allochost = curl_easy_escape(NULL, u->host, 0); if(!allochost) return CURLUE_OUT_OF_MEMORY; - memcpy(allochost, u->host, hostlen - 1); - msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, - "%%25%s]", u->zoneid); + } + else { + /* only encode '%' in output host name */ + char *host = u->host; + size_t pcount = 0; + /* first, count number of percents present in the name */ + while(*host) { + if(*host == '%') + pcount++; + host++; + } + /* if there were percents, encode the host name */ + if(pcount) { + size_t hostlen = strlen(u->host); + size_t alen = hostlen + 2 * pcount + 1; + char *o = allochost = malloc(alen); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + + host = u->host; + while(*host) { + if(*host == '%') { + memcpy(o, "%25", 3); + o += 3; + host++; + continue; + } + *o++ = *host++; + } + *o = '\0'; + } } url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", @@ -1264,7 +1499,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, case CURLUPART_SCHEME: if(strlen(part) > MAX_SCHEME_LEN) /* too long */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_SCHEME; if(!(flags & CURLU_NON_SUPPORT_SCHEME) && /* verify that it is a fine scheme */ !Curl_builtin_scheme(part)) @@ -1281,10 +1516,15 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, case CURLUPART_OPTIONS: storep = &u->options; break; - case CURLUPART_HOST: + case CURLUPART_HOST: { + size_t len = strcspn(part, " \r\n"); + if(strlen(part) != len) + /* hostname with bad content */ + return CURLUE_BAD_HOSTNAME; storep = &u->host; Curl_safefree(u->zoneid); break; + } case CURLUPART_ZONEID: storep = &u->zoneid; break; @@ -1297,7 +1537,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, return CURLUE_BAD_PORT_NUMBER; if(*endp) /* weirdly provided number, not good! */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_PORT_NUMBER; storep = &u->port; } break; @@ -1326,7 +1566,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, char *redired_url; CURLU *handle2; - if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) { + if(Curl_is_absolute_url(part, NULL, 0)) { handle2 = curl_url(); if(!handle2) return CURLUE_OUT_OF_MEMORY; @@ -1387,28 +1627,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, if(urlencode) { const unsigned char *i; char *o; - bool free_part = FALSE; char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ if(!enc) return CURLUE_OUT_OF_MEMORY; - if(plusencode) { - /* space to plus */ - i = (const unsigned char *)part; - for(o = enc; *i; ++o, ++i) - *o = (*i == ' ') ? '+' : *i; - *o = 0; /* null-terminate */ - part = strdup(enc); - if(!part) { - free(enc); - return CURLUE_OUT_OF_MEMORY; - } - free_part = TRUE; - } for(i = (const unsigned char *)part, o = enc; *i; i++) { - if(Curl_isunreserved(*i) || - ((*i == '/') && urlskipslash) || - ((*i == '=') && equalsencode) || - ((*i == '+') && plusencode)) { + if((*i == ' ') && plusencode) { + *o = '+'; + o++; + } + else if(Curl_isunreserved(*i) || + ((*i == '/') && urlskipslash) || + ((*i == '=') && equalsencode)) { if((*i == '=') && equalsencode) /* only skip the first equals sign */ equalsencode = FALSE; @@ -1422,8 +1651,6 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, } *o = 0; /* null-terminate */ newp = enc; - if(free_part) - free((char *)part); } else { char *p; @@ -1474,7 +1701,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, else { if(hostname_check(u, (char *)newp)) { free((char *)newp); - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_HOSTNAME; } } } |