Imported Upstream version 1.17upstream/1.17

author: DongHun Kwak <dh0128.kwak@samsung.com> 2021-03-05 10:08:15 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2021-03-05 10:08:15 +0900
commit: 24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch)
tree: 371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/iri.c
parent: 0b86d50828d05a27de3ff840d6a06407310393c2 (diff)
download: wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip
1 files changed, 97 insertions, 28 deletions
diff --git a/src/iri.c b/src/iri.c
index 38c8c6e..dc9aa9a 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -1,5 +1,6 @@
 /* IRI related functions.
-   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011, 2015 Free Software Foundation,
+   Inc.
 
 This file is part of GNU Wget.
 
@@ -52,8 +53,9 @@ as that of the covered work.  */
 /* Given a string containing "charset=XXX", return the encoding if found,
    or NULL otherwise */
 char *
-parse_charset (char *str)
+parse_charset (const char *str)
 {
+  const char *end;
   char *charset;
 
   if (!str || !*str)
@@ -64,14 +66,14 @@ parse_charset (char *str)
     return NULL;
 
   str += 8;
-  charset = str;
+  end = str;
 
   /* sXXXav: which chars should be banned ??? */
-  while (*charset && !c_isspace (*charset))
-    charset++;
+  while (*end && !c_isspace (*end))
+    end++;
 
   /* sXXXav: could strdupdelim return NULL ? */
-  charset = strdupdelim (str, charset);
+  charset = strdupdelim (str, end);
 
   /* Do a minimum check on the charset value */
   if (!check_encoding_name (charset))
@@ -94,9 +96,9 @@ find_locale (void)
 
 /* Basic check of an encoding name. */
 bool
-check_encoding_name (char *encoding)
+check_encoding_name (const char *encoding)
 {
-  char *s = encoding;
+  const char *s = encoding;
 
   while (*s)
     {
@@ -116,13 +118,13 @@ check_encoding_name (char *encoding)
    will contain the transcoded string on success. *out content is
    unspecified otherwise. */
 static bool
-do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, char **out)
+do_conversion (const char *tocode, const char *fromcode, char const *in_org, size_t inlen, char **out)
 {
   iconv_t cd;
   /* sXXXav : hummm hard to guess... */
   size_t len, done, outlen;
   int invalid = 0, tooshort = 0;
-  char *s, *in_org, *in_save;
+  char *s, *in, *in_save;
 
   cd = iconv_open (tocode, fromcode);
   if (cd == (iconv_t)(-1))
@@ -134,9 +136,8 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
     }
 
   /* iconv() has to work on an unescaped string */
-  in_org = in;
-  in_save = in = xstrndup(in, inlen);
-  url_unescape(in);
+  in_save = in = xstrndup (in_org, inlen);
+  url_unescape_except_reserved (in);
   inlen = strlen(in);
 
   len = outlen = inlen * 2;
@@ -151,7 +152,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
           *(s + len - outlen - done) = '\0';
           xfree(in_save);
           iconv_close(cd);
-          DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode));
+          IF_DEBUG
+          {
+            /* not not print out embedded passwords, in_org might be an URL */
+            if (!strchr(in_org, '@') && !strchr(*out, '@'))
+              debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode);
+            else
+              debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__);
+          }
           return true;
         }
 
@@ -192,7 +200,14 @@ do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen,
 
     xfree(in_save);
     iconv_close(cd);
-    DEBUGP (("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode));
+    IF_DEBUG
+    {
+      /* not not print out embedded passwords, in_org might be an URL */
+      if (!strchr(in_org, '@') && !strchr(*out, '@'))
+        debug_logprintf ("converted '%s' (%s) -> '%s' (%s)\n", in_org, fromcode, *out, tocode);
+      else
+        debug_logprintf ("%s: logging suppressed, strings may contain password\n", __func__);
+    }
     return false;
 }
 
@@ -216,42 +231,96 @@ locale_to_utf8 (const char *str)
   if (do_conversion ("UTF-8", opt.locale, (char *) str, strlen ((char *) str), &new))
     return (const char *) new;
 
+  xfree (new);
   return str;
 }
 
+/*
+ * Work around a libidn <= 1.30 vulnerability.
+ *
+ * The function checks for a valid UTF-8 character sequence before
+ * passing it to idna_to_ascii_8z().
+ *
+ * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
+ * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
+ * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
+ */
+static bool
+_utf8_is_valid(const char *utf8)
+{
+  const unsigned char *s = (const unsigned char *) utf8;
+
+  while (*s)
+    {
+      if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
+        s++;
+      else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80)
+            return false;
+          s+=2;
+        }
+      else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
+            return false;
+          s+=3;
+        }
+      else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+        {
+          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
+            return false;
+          s+=4;
+        }
+      else
+        return false;
+    }
+
+  return true;
+}
+
 /* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
    on error. */
 char *
-idn_encode (struct iri *i, char *host)
+idn_encode (const struct iri *i, const char *host)
 {
-  char *new;
   int ret;
+  char *ascii_encoded;
+  char *utf8_encoded = NULL;
 
   /* Encode to UTF-8 if not done */
   if (!i->utf8_encode)
     {
-      if (!remote_to_utf8 (i, (const char *) host, (const char **) &new))
+      if (!remote_to_utf8 (i, host, &utf8_encoded))
           return NULL;  /* Nothing to encode or an error occured */
-      host = new;
     }
 
-  /* toASCII UTF-8 NULL terminated string */
-  ret = idna_to_ascii_8z (host, &new, IDNA_FLAGS);
+  if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host))
+    {
+      logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"),
+                 quote(utf8_encoded ? utf8_encoded : host));
+      xfree (utf8_encoded);
+      return NULL;
+    }
+
+  /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */
+  ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS);
+  xfree (utf8_encoded);
+
   if (ret != IDNA_SUCCESS)
     {
-      /* sXXXav : free new when needed ! */
       logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret,
                  quote (idna_strerror (ret)));
       return NULL;
     }
 
-  return new;
+  return ascii_encoded;
 }
 
 /* Try to decode an "ASCII encoded" host. Return the new domain in the locale
    on success or NULL on error. */
 char *
-idn_decode (char *host)
+idn_decode (const char *host)
 {
   char *new;
   int ret;
@@ -270,7 +339,7 @@ idn_decode (char *host)
 /* Try to transcode string str from remote encoding to UTF-8. On success, *new
    contains the transcoded string. *new content is unspecified otherwise. */
 bool
-remote_to_utf8 (struct iri *iri, const char *str, const char **new)
+remote_to_utf8 (const struct iri *iri, const char *str, char **new)
 {
   bool ret = false;
 
@@ -292,7 +361,7 @@ remote_to_utf8 (struct iri *iri, const char *str, const char **new)
       return false;
     }
 
-  if (do_conversion ("UTF-8", iri->uri_encoding, (char *) str, strlen (str), (char **) new))
+  if (do_conversion ("UTF-8", iri->uri_encoding, str, strlen (str), new))
     ret = true;
 
   /* Test if something was converted */
@@ -344,7 +413,7 @@ iri_free (struct iri *i)
 /* Set uri_encoding of struct iri i. If a remote encoding was specified, use
    it unless force is true. */
 void
-set_uri_encoding (struct iri *i, char *charset, bool force)
+set_uri_encoding (struct iri *i, const char *charset, bool force)
 {
   DEBUGP (("URI encoding = %s\n", charset ? quote (charset) : "None"));
   if (!force && opt.encoding_remote)
@@ -361,7 +430,7 @@ set_uri_encoding (struct iri *i, char *charset, bool force)
 
 /* Set content_encoding of struct iri i. */
 void
-set_content_encoding (struct iri *i, char *charset)
+set_content_encoding (struct iri *i, const char *charset)
 {
   DEBUGP (("URI content encoding = %s\n", charset ? quote (charset) : "None"));
   if (opt.encoding_remote)
author	DongHun Kwak <dh0128.kwak@samsung.com>	2021-03-05 10:08:15 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2021-03-05 10:08:15 +0900
commit	24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch)
tree	371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/iri.c
parent	0b86d50828d05a27de3ff840d6a06407310393c2 (diff)
download	wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2 wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip