summaryrefslogtreecommitdiff
path: root/src/iri.c
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
commit24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch)
tree371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/iri.c
parent0b86d50828d05a27de3ff840d6a06407310393c2 (diff)
downloadwget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/iri.c')
-rw-r--r--src/iri.c125
1 files changed, 97 insertions, 28 deletions
diff --git a/src/iri.c b/src/iri.c
index 38c8c6e..dc9aa9a 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -1,5 +1,6 @@
/* IRI related functions.
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011, 2015 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
@@ -52,8 +53,9 @@ as that of the covered work. */
/* Given a string containing "charset=XXX", return the encoding if found,
or NULL otherwise */
char *
-parse_charset (char *str)
+parse_charset (const char *str)
{
+ const char *end;
char *charset;
if (!str || !*str)
@@ -64,14 +66,14 @@ parse_charset (char *str)
return NULL;
str += 8;
- charset = str;
+ end = str;
/* sXXXav: which chars should be banned ??? */
- while (*charset && !c_isspace (*charset))
- charset++;
+ while (*end && !c_isspace (*end))
+ end++;
/* sXXXav: could strdupdelim return NULL ? */
- charset = strdupdelim (str, charset);
+ charset = strdupdelim (str, end);
/* Do a minimum check on the charset value */
if (!check_encoding_name (charset))
@@ -94,9 +96,9 @@ find_locale (void)
/* Basic check of an encoding name. */
bool
-check_encoding_name (char *encoding)
+check_encoding_name (const char *encoding)
{
- char *s = encoding;
+ const char *s = encoding;
while (*s)
{
@@ -116,13 +118,13 @@ check_encoding_name (char *encoding)
will contain the transcoded string on success. *out content is
unspecified otherwise. */
static bool
-do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, char **out)
+do_conversion (const char *tocode, const char *fromcode, char const *in_org, size_t inlen, char **out)
{
iconv_t cd;
/* sXXXav : hummm hard to guess... */
size_t len, done, outlen;
int invalid = 0, tooshort = 0;
- char *s, *in_org, *in_save;
+ char *s, *in, *in_save;
cd = iconv_open (tocode, fromcode);
if (cd == (iconv_t)(-1))
@@ -134,9 +136,8 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
}
/* iconv() has to work on an unescaped string */
- in_org = in;
- in_save = in = xstrndup(in, inlen);
- url_unescape(in);
+ in_save = in = xstrndup (in_org, inlen);
+ url_unescape_except_reserved (in);
inlen = strlen(in);
len = outlen = inlen * 2;
@@ -151,7 +152,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
*(s + len - outlen - done) = '\0';
xfree(in_save);
iconv_close(cd);
- DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode));
+ IF_DEBUG
+ {
+ /* not not print out embedded passwords, in_org might be an URL */
+ if (!strchr(in_org, '@') && !strchr(*out, '@'))
+ debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode);
+ else
+ debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__);
+ }
return true;
}
@@ -192,7 +200,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
xfree(in_save);
iconv_close(cd);
- DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode));
+ IF_DEBUG
+ {
+ /* not not print out embedded passwords, in_org might be an URL */
+ if (!strchr(in_org, '@') && !strchr(*out, '@'))
+ debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode);
+ else
+ debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__);
+ }
return false;
}
@@ -216,42 +231,96 @@ locale_to_utf8 (const char *str)
if (do_conversion ("UTF-8", opt.locale, (char *) str, strlen ((char *) str), &new))
return (const char *) new;
+ xfree (new);
return str;
}
+/*
+ * Work around a libidn <= 1.30 vulnerability.
+ *
+ * The function checks for a valid UTF-8 character sequence before
+ * passing it to idna_to_ascii_8z().
+ *
+ * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
+ * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
+ * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
+ */
+static bool
+_utf8_is_valid(const char *utf8)
+{
+ const unsigned char *s = (const unsigned char *) utf8;
+
+ while (*s)
+ {
+ if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
+ s++;
+ else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
+ {
+ if ((s[1] & 0xC0) != 0x80)
+ return false;
+ s+=2;
+ }
+ else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
+ {
+ if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
+ return false;
+ s+=3;
+ }
+ else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+ {
+ if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
+ return false;
+ s+=4;
+ }
+ else
+ return false;
+ }
+
+ return true;
+}
+
/* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
on error. */
char *
-idn_encode (struct iri *i, char *host)
+idn_encode (const struct iri *i, const char *host)
{
- char *new;
int ret;
+ char *ascii_encoded;
+ char *utf8_encoded = NULL;
/* Encode to UTF-8 if not done */
if (!i->utf8_encode)
{
- if (!remote_to_utf8 (i, (const char *) host, (const char **) &new))
+ if (!remote_to_utf8 (i, host, &utf8_encoded))
return NULL; /* Nothing to encode or an error occured */
- host = new;
}
- /* toASCII UTF-8 NULL terminated string */
- ret = idna_to_ascii_8z (host, &new, IDNA_FLAGS);
+ if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host))
+ {
+ logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"),
+ quote(utf8_encoded ? utf8_encoded : host));
+ xfree (utf8_encoded);
+ return NULL;
+ }
+
+ /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */
+ ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS);
+ xfree (utf8_encoded);
+
if (ret != IDNA_SUCCESS)
{
- /* sXXXav : free new when needed ! */
logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret,
quote (idna_strerror (ret)));
return NULL;
}
- return new;
+ return ascii_encoded;
}
/* Try to decode an "ASCII encoded" host. Return the new domain in the locale
on success or NULL on error. */
char *
-idn_decode (char *host)
+idn_decode (const char *host)
{
char *new;
int ret;
@@ -270,7 +339,7 @@ idn_decode (char *host)
/* Try to transcode string str from remote encoding to UTF-8. On success, *new
contains the transcoded string. *new content is unspecified otherwise. */
bool
-remote_to_utf8 (struct iri *iri, const char *str, const char **new)
+remote_to_utf8 (const struct iri *iri, const char *str, char **new)
{
bool ret = false;
@@ -292,7 +361,7 @@ remote_to_utf8 (struct iri *iri, const char *str, const char **new)
return false;
}
- if (do_conversion ("UTF-8", iri->uri_encoding, (char *) str, strlen (str), (char **) new))
+ if (do_conversion ("UTF-8", iri->uri_encoding, str, strlen (str), new))
ret = true;
/* Test if something was converted */
@@ -344,7 +413,7 @@ iri_free (struct iri *i)
/* Set uri_encoding of struct iri i. If a remote encoding was specified, use
it unless force is true. */
void
-set_uri_encoding (struct iri *i, char *charset, bool force)
+set_uri_encoding (struct iri *i, const char *charset, bool force)
{
DEBUGP (("URI encoding = %s\n", charset ? quote (charset) : "None"));
if (!force && opt.encoding_remote)
@@ -361,7 +430,7 @@ set_uri_encoding (struct iri *i, char *charset, bool force)
/* Set content_encoding of struct iri i. */
void
-set_content_encoding (struct iri *i, char *charset)
+set_content_encoding (struct iri *i, const char *charset)
{
DEBUGP (("URI content encoding = %s\n", charset ? quote (charset) : "None"));
if (opt.encoding_remote)