diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
commit | 24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch) | |
tree | 371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/iri.c | |
parent | 0b86d50828d05a27de3ff840d6a06407310393c2 (diff) | |
download | wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2 wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip |
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/iri.c')
-rw-r--r-- | src/iri.c | 125 |
1 files changed, 97 insertions, 28 deletions
@@ -1,5 +1,6 @@ /* IRI related functions. - Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2008, 2009, 2010, 2011, 2015 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -52,8 +53,9 @@ as that of the covered work. */ /* Given a string containing "charset=XXX", return the encoding if found, or NULL otherwise */ char * -parse_charset (char *str) +parse_charset (const char *str) { + const char *end; char *charset; if (!str || !*str) @@ -64,14 +66,14 @@ parse_charset (char *str) return NULL; str += 8; - charset = str; + end = str; /* sXXXav: which chars should be banned ??? */ - while (*charset && !c_isspace (*charset)) - charset++; + while (*end && !c_isspace (*end)) + end++; /* sXXXav: could strdupdelim return NULL ? */ - charset = strdupdelim (str, charset); + charset = strdupdelim (str, end); /* Do a minimum check on the charset value */ if (!check_encoding_name (charset)) @@ -94,9 +96,9 @@ find_locale (void) /* Basic check of an encoding name. */ bool -check_encoding_name (char *encoding) +check_encoding_name (const char *encoding) { - char *s = encoding; + const char *s = encoding; while (*s) { @@ -116,13 +118,13 @@ check_encoding_name (char *encoding) will contain the transcoded string on success. *out content is unspecified otherwise. */ static bool -do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, char **out) +do_conversion (const char *tocode, const char *fromcode, char const *in_org, size_t inlen, char **out) { iconv_t cd; /* sXXXav : hummm hard to guess... */ size_t len, done, outlen; int invalid = 0, tooshort = 0; - char *s, *in_org, *in_save; + char *s, *in, *in_save; cd = iconv_open (tocode, fromcode); if (cd == (iconv_t)(-1)) @@ -134,9 +136,8 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, } /* iconv() has to work on an unescaped string */ - in_org = in; - in_save = in = xstrndup(in, inlen); - url_unescape(in); + in_save = in = xstrndup (in_org, inlen); + url_unescape_except_reserved (in); inlen = strlen(in); len = outlen = inlen * 2; @@ -151,7 +152,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, *(s + len - outlen - done) = '\0'; xfree(in_save); iconv_close(cd); - DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode)); + IF_DEBUG + { + /* not not print out embedded passwords, in_org might be an URL */ + if (!strchr(in_org, '@') && !strchr(*out, '@')) + debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode); + else + debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__); + } return true; } @@ -192,7 +200,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, xfree(in_save); iconv_close(cd); - DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode)); + IF_DEBUG + { + /* not not print out embedded passwords, in_org might be an URL */ + if (!strchr(in_org, '@') && !strchr(*out, '@')) + debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode); + else + debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__); + } return false; } @@ -216,42 +231,96 @@ locale_to_utf8 (const char *str) if (do_conversion ("UTF-8", opt.locale, (char *) str, strlen ((char *) str), &new)) return (const char *) new; + xfree (new); return str; } +/* + * Work around a libidn <= 1.30 vulnerability. + * + * The function checks for a valid UTF-8 character sequence before + * passing it to idna_to_ascii_8z(). + * + * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html + * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html + * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html + */ +static bool +_utf8_is_valid(const char *utf8) +{ + const unsigned char *s = (const unsigned char *) utf8; + + while (*s) + { + if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */ + s++; + else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ + { + if ((s[1] & 0xC0) != 0x80) + return false; + s+=2; + } + else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ + { + if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) + return false; + s+=3; + } + else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + { + if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) + return false; + s+=4; + } + else + return false; + } + + return true; +} + /* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL on error. */ char * -idn_encode (struct iri *i, char *host) +idn_encode (const struct iri *i, const char *host) { - char *new; int ret; + char *ascii_encoded; + char *utf8_encoded = NULL; /* Encode to UTF-8 if not done */ if (!i->utf8_encode) { - if (!remote_to_utf8 (i, (const char *) host, (const char **) &new)) + if (!remote_to_utf8 (i, host, &utf8_encoded)) return NULL; /* Nothing to encode or an error occured */ - host = new; } - /* toASCII UTF-8 NULL terminated string */ - ret = idna_to_ascii_8z (host, &new, IDNA_FLAGS); + if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host)) + { + logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"), + quote(utf8_encoded ? utf8_encoded : host)); + xfree (utf8_encoded); + return NULL; + } + + /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */ + ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS); + xfree (utf8_encoded); + if (ret != IDNA_SUCCESS) { - /* sXXXav : free new when needed ! */ logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret, quote (idna_strerror (ret))); return NULL; } - return new; + return ascii_encoded; } /* Try to decode an "ASCII encoded" host. Return the new domain in the locale on success or NULL on error. */ char * -idn_decode (char *host) +idn_decode (const char *host) { char *new; int ret; @@ -270,7 +339,7 @@ idn_decode (char *host) /* Try to transcode string str from remote encoding to UTF-8. On success, *new contains the transcoded string. *new content is unspecified otherwise. */ bool -remote_to_utf8 (struct iri *iri, const char *str, const char **new) +remote_to_utf8 (const struct iri *iri, const char *str, char **new) { bool ret = false; @@ -292,7 +361,7 @@ remote_to_utf8 (struct iri *iri, const char *str, const char **new) return false; } - if (do_conversion ("UTF-8", iri->uri_encoding, (char *) str, strlen (str), (char **) new)) + if (do_conversion ("UTF-8", iri->uri_encoding, str, strlen (str), new)) ret = true; /* Test if something was converted */ @@ -344,7 +413,7 @@ iri_free (struct iri *i) /* Set uri_encoding of struct iri i. If a remote encoding was specified, use it unless force is true. */ void -set_uri_encoding (struct iri *i, char *charset, bool force) +set_uri_encoding (struct iri *i, const char *charset, bool force) { DEBUGP (("URI encoding = %s\n", charset ? quote (charset) : "None")); if (!force && opt.encoding_remote) @@ -361,7 +430,7 @@ set_uri_encoding (struct iri *i, char *charset, bool force) /* Set content_encoding of struct iri i. */ void -set_content_encoding (struct iri *i, char *charset) +set_content_encoding (struct iri *i, const char *charset) { DEBUGP (("URI content encoding = %s\n", charset ? quote (charset) : "None")); if (opt.encoding_remote) |