diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:17 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:17 +0900 |
commit | 6403e0986cb5d0b8b4cbea66f8f3ff7a68cb4c20 (patch) | |
tree | 4936775a0caecb157d619aa6c8f26310c2611c7e /src | |
parent | 0fd98397eab07f1ec3b1fad9890fd751298e1fe0 (diff) | |
download | wget-6403e0986cb5d0b8b4cbea66f8f3ff7a68cb4c20.tar.gz wget-6403e0986cb5d0b8b4cbea66f8f3ff7a68cb4c20.tar.bz2 wget-6403e0986cb5d0b8b4cbea66f8f3ff7a68cb4c20.zip |
Imported Upstream version 1.18upstream/1.18
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.in | 2 | ||||
-rw-r--r-- | src/build_info.c | 6 | ||||
-rw-r--r-- | src/build_info.c.in | 1 | ||||
-rw-r--r-- | src/config.h.in | 3 | ||||
-rw-r--r-- | src/connect.c | 9 | ||||
-rw-r--r-- | src/convert.c | 6 | ||||
-rw-r--r-- | src/convert.h | 1 | ||||
-rw-r--r-- | src/ftp.c | 72 | ||||
-rw-r--r-- | src/ftp.h | 3 | ||||
-rw-r--r-- | src/gnutls.c | 100 | ||||
-rw-r--r-- | src/host.c | 267 | ||||
-rw-r--r-- | src/hsts.c | 83 | ||||
-rw-r--r-- | src/hsts.h | 1 | ||||
-rw-r--r-- | src/html-url.c | 90 | ||||
-rw-r--r-- | src/http.c | 4 | ||||
-rw-r--r-- | src/init.c | 21 | ||||
-rw-r--r-- | src/iri.c | 17 | ||||
-rw-r--r-- | src/iri.h | 2 | ||||
-rw-r--r-- | src/log.c | 11 | ||||
-rw-r--r-- | src/main.c | 85 | ||||
-rw-r--r-- | src/metalink.c | 7 | ||||
-rw-r--r-- | src/mswindows.c | 2 | ||||
-rw-r--r-- | src/openssl.c | 107 | ||||
-rw-r--r-- | src/options.h | 10 | ||||
-rw-r--r-- | src/progress.c | 13 | ||||
-rw-r--r-- | src/recur.c | 6 | ||||
-rw-r--r-- | src/retr.c | 11 | ||||
-rw-r--r-- | src/sysdep.h | 13 | ||||
-rw-r--r-- | src/url.c | 87 | ||||
-rw-r--r-- | src/utils.c | 215 | ||||
-rw-r--r-- | src/utils.h | 9 | ||||
-rw-r--r-- | src/warc.c | 18 |
32 files changed, 1102 insertions, 180 deletions
diff --git a/src/Makefile.in b/src/Makefile.in index 5357ddd..887ca8e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -387,6 +387,8 @@ BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@ BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@ BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@ BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@ +CARES_CFLAGS = @CARES_CFLAGS@ +CARES_LIBS = @CARES_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ diff --git a/src/build_info.c b/src/build_info.c index 067257e..bf6e6a9 100644 --- a/src/build_info.c +++ b/src/build_info.c @@ -13,6 +13,12 @@ const char *compiled_features[] = { +#if defined HAVE_LIBCARES + "+cares", +#else + "-cares", +#endif + #if defined ENABLE_DIGEST "+digest", #else diff --git a/src/build_info.c.in b/src/build_info.c.in index 83b7664..c7493e9 100644 --- a/src/build_info.c.in +++ b/src/build_info.c.in @@ -8,6 +8,7 @@ nls defined ENABLE_NLS ntlm defined ENABLE_NTLM opie defined ENABLE_OPIE psl defined HAVE_LIBPSL +cares defined HAVE_LIBCARES metalink defined HAVE_METALINK gpgme defined HAVE_GPGME diff --git a/src/config.h.in b/src/config.h.in index e8f4170..8b39c52 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -747,6 +747,9 @@ /* Define to 1 if you have the <langinfo.h> header file. */ #undef HAVE_LANGINFO_H +/* Define if libcares is available. */ +#undef HAVE_LIBCARES + /* Define to 1 if you have the `dl' library (-ldl). */ #undef HAVE_LIBDL diff --git a/src/connect.c b/src/connect.c index 024b231..0704000 100644 --- a/src/connect.c +++ b/src/connect.c @@ -369,7 +369,14 @@ connect_to_ip (const ip_address *ip, int port, const char *print) logprintf. */ int save_errno = errno; if (sock >= 0) - fd_close (sock); + { +#ifdef WIN32 + /* If the connection timed out, fd_close will hang in Gnulib's + close_fd_maybe_socket, inside the call to WSAEnumNetworkEvents. */ + if (errno != ETIMEDOUT) +#endif + fd_close (sock); + } if (print) logprintf (LOG_NOTQUIET, _("failed: %s.\n"), strerror (errno)); errno = save_errno; diff --git a/src/convert.c b/src/convert.c index df8d58d..509923e 100644 --- a/src/convert.c +++ b/src/convert.c @@ -308,7 +308,7 @@ convert_links (const char *file, struct urlpos *links) char *quoted_newname = local_quote_string (newname, link->link_css_p); - if (link->link_css_p) + if (link->link_css_p || link->link_noquote_html_p) p = replace_plain (p, link->size, fp, quoted_newname); else if (!link->link_refresh_p) p = replace_attr (p, link->size, fp, quoted_newname); @@ -329,7 +329,7 @@ convert_links (const char *file, struct urlpos *links) char *newname = convert_basename (p, link); char *quoted_newname = local_quote_string (newname, link->link_css_p); - if (link->link_css_p) + if (link->link_css_p || link->link_noquote_html_p) p = replace_plain (p, link->size, fp, quoted_newname); else if (!link->link_refresh_p) p = replace_attr (p, link->size, fp, quoted_newname); @@ -352,7 +352,7 @@ convert_links (const char *file, struct urlpos *links) char *newlink = link->url->url; char *quoted_newlink = html_quote_string (newlink); - if (link->link_css_p) + if (link->link_css_p || link->link_noquote_html_p) p = replace_plain (p, link->size, fp, newlink); else if (!link->link_refresh_p) p = replace_attr (p, link->size, fp, quoted_newlink); diff --git a/src/convert.h b/src/convert.h index b3cd196..e3ff6f0 100644 --- a/src/convert.h +++ b/src/convert.h @@ -69,6 +69,7 @@ struct urlpos { unsigned int link_base_p :1; /* the url came from <base href=...> */ unsigned int link_inline_p :1; /* needed to render the page */ unsigned int link_css_p :1; /* the url came from CSS */ + unsigned int link_noquote_html_p :1; /* from HTML, but doesn't need " */ unsigned int link_expect_html :1; /* expected to contain HTML */ unsigned int link_expect_css :1; /* expected to contain CSS */ @@ -236,7 +236,7 @@ print_length (wgint size, wgint start, bool authoritative) logputs (LOG_VERBOSE, !authoritative ? _(" (unauthoritative)\n") : "\n"); } -static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **); +static uerr_t ftp_get_listing (struct url *, struct url *, ccon *, struct fileinfo **); static uerr_t get_ftp_greeting(int csock, ccon *con) @@ -315,14 +315,14 @@ init_control_ssl_connection (int csock, struct url *u, bool *using_control_secur and closes the control connection in case of error. If warc_tmp is non-NULL, the downloaded data will be written there as well. */ static uerr_t -getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread, +getftp (struct url *u, struct url *original_url, + wgint passed_expected_bytes, wgint *qtyread, wgint restval, ccon *con, int count, wgint *last_expected_bytes, FILE *warc_tmp) { int csock, dtsock, local_sock, res; uerr_t err = RETROK; /* appease the compiler */ FILE *fp = NULL; - struct_fstat st; char *respline, *tms; const char *user, *passwd, *tmrate; int cmd = con->cmd; @@ -1189,7 +1189,7 @@ Error in server response, closing control connection.\n")); { bool exists = false; struct fileinfo *f; - uerr_t _res = ftp_get_listing (u, con, &f); + uerr_t _res = ftp_get_listing (u, original_url, con, &f); /* Set the DO_RETR command flag again, because it gets unset when calling ftp_get_listing() and would otherwise cause an assertion failure earlier on when this function gets repeatedly called @@ -1780,8 +1780,8 @@ exit_error: This loop either gets commands from con, or (if ON_YOUR_OWN is set), makes them up to retrieve the file given by the URL. */ static uerr_t -ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_file, - bool force_full_retrieve) +ftp_loop_internal (struct url *u, struct url *original_url, struct fileinfo *f, + ccon *con, char **local_file, bool force_full_retrieve) { int count, orig_lp; wgint restval, len = 0, qtyread = 0; @@ -1806,7 +1806,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi { /* URL-derived file. Consider "-O file" name. */ xfree (con->target); - con->target = url_file_name (u, NULL); + con->target = url_file_name (opt.trustservernames || !original_url ? u : original_url, NULL); if (!opt.output_document) locf = con->target; else @@ -1924,8 +1924,8 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi /* If we are working on a WARC record, getftp should also write to the warc_tmp file. */ - err = getftp (u, len, &qtyread, restval, con, count, &last_expected_bytes, - warc_tmp); + err = getftp (u, original_url, len, &qtyread, restval, con, count, + &last_expected_bytes, warc_tmp); if (con->csock == -1) con->st &= ~DONE_CWD; @@ -2093,7 +2093,8 @@ Removing file due to --delete-after in ftp_loop_internal():\n")); /* Return the directory listing in a reusable format. The directory is specifed in u->dir. */ static uerr_t -ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f) +ftp_get_listing (struct url *u, struct url *original_url, ccon *con, + struct fileinfo **f) { uerr_t err; char *uf; /* url file name */ @@ -2114,7 +2115,7 @@ ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f) con->target = xstrdup (lf); xfree (lf); - err = ftp_loop_internal (u, NULL, con, NULL, false); + err = ftp_loop_internal (u, original_url, NULL, con, NULL, false); lf = xstrdup (con->target); xfree (con->target); con->target = old_target; @@ -2137,8 +2138,9 @@ ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f) return err; } -static uerr_t ftp_retrieve_dirs (struct url *, struct fileinfo *, ccon *); -static uerr_t ftp_retrieve_glob (struct url *, ccon *, int); +static uerr_t ftp_retrieve_dirs (struct url *, struct url *, + struct fileinfo *, ccon *); +static uerr_t ftp_retrieve_glob (struct url *, struct url *, ccon *, int); static struct fileinfo *delelement (struct fileinfo *, struct fileinfo **); static void freefileinfo (struct fileinfo *f); @@ -2150,7 +2152,8 @@ static void freefileinfo (struct fileinfo *f); If opt.recursive is set, after all files have been retrieved, ftp_retrieve_dirs will be called to retrieve the directories. */ static uerr_t -ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con) +ftp_retrieve_list (struct url *u, struct url *original_url, + struct fileinfo *f, ccon *con) { static int depth = 0; uerr_t err; @@ -2311,7 +2314,10 @@ Already have correct symlink %s -> %s\n\n"), else /* opt.retr_symlinks */ { if (dlthis) - err = ftp_loop_internal (u, f, con, NULL, force_full_retrieve); + { + err = ftp_loop_internal (u, original_url, f, con, NULL, + force_full_retrieve); + } } /* opt.retr_symlinks */ break; case FT_DIRECTORY: @@ -2322,7 +2328,10 @@ Already have correct symlink %s -> %s\n\n"), case FT_PLAINFILE: /* Call the retrieve loop. */ if (dlthis) - err = ftp_loop_internal (u, f, con, NULL, force_full_retrieve); + { + err = ftp_loop_internal (u, original_url, f, con, NULL, + force_full_retrieve); + } break; case FT_UNKNOWN: logprintf (LOG_NOTQUIET, _("%s: unknown/unsupported file type.\n"), @@ -2387,7 +2396,7 @@ Already have correct symlink %s -> %s\n\n"), /* We do not want to call ftp_retrieve_dirs here */ if (opt.recursive && !(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel)) - err = ftp_retrieve_dirs (u, orig, con); + err = ftp_retrieve_dirs (u, original_url, orig, con); else if (opt.recursive) DEBUGP ((_("Will not retrieve dirs since depth is %d (max %d).\n"), depth, opt.reclevel)); @@ -2400,7 +2409,8 @@ Already have correct symlink %s -> %s\n\n"), ftp_retrieve_glob on each directory entry. The function knows about excluded directories. */ static uerr_t -ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con) +ftp_retrieve_dirs (struct url *u, struct url *original_url, + struct fileinfo *f, ccon *con) { char *container = NULL; int container_size = 0; @@ -2450,7 +2460,7 @@ Not descending to %s as it is excluded/not-included.\n"), odir = xstrdup (u->dir); /* because url_set_dir will free u->dir. */ url_set_dir (u, newdir); - ftp_retrieve_glob (u, con, GLOB_GETALL); + ftp_retrieve_glob (u, original_url, con, GLOB_GETALL); url_set_dir (u, odir); xfree (odir); @@ -2509,14 +2519,15 @@ is_invalid_entry (struct fileinfo *f) GLOB_GLOBALL, use globbing; if it's GLOB_GETALL, download the whole directory. */ static uerr_t -ftp_retrieve_glob (struct url *u, ccon *con, int action) +ftp_retrieve_glob (struct url *u, struct url *original_url, + ccon *con, int action) { struct fileinfo *f, *start; uerr_t res; con->cmd |= LEAVE_PENDING; - res = ftp_get_listing (u, con, &start); + res = ftp_get_listing (u, original_url, con, &start); if (res != RETROK) return res; /* First: weed out that do not conform the global rules given in @@ -2612,7 +2623,7 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) if (start) { /* Just get everything. */ - res = ftp_retrieve_list (u, start, con); + res = ftp_retrieve_list (u, original_url, start, con); } else { @@ -2628,7 +2639,7 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) { /* Let's try retrieving it anyway. */ con->st |= ON_YOUR_OWN; - res = ftp_loop_internal (u, NULL, con, NULL, false); + res = ftp_loop_internal (u, original_url, NULL, con, NULL, false); return res; } @@ -2648,8 +2659,8 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) of URL. Inherently, its capabilities are limited on what can be encoded into a URL. */ uerr_t -ftp_loop (struct url *u, char **local_file, int *dt, struct url *proxy, - bool recursive, bool glob) +ftp_loop (struct url *u, struct url *original_url, char **local_file, int *dt, + struct url *proxy, bool recursive, bool glob) { ccon con; /* FTP connection */ uerr_t res; @@ -2670,16 +2681,17 @@ ftp_loop (struct url *u, char **local_file, int *dt, struct url *proxy, if (!*u->file && !recursive) { struct fileinfo *f; - res = ftp_get_listing (u, &con, &f); + res = ftp_get_listing (u, original_url, &con, &f); if (res == RETROK) { if (opt.htmlify && !opt.spider) { + struct url *url_file = opt.trustservernames ? u : original_url; char *filename = (opt.output_document ? xstrdup (opt.output_document) : (con.target ? xstrdup (con.target) - : url_file_name (u, NULL))); + : url_file_name (url_file, NULL))); res = ftp_index (filename, u, f); if (res == FTPOK && opt.verbose) { @@ -2724,11 +2736,13 @@ ftp_loop (struct url *u, char **local_file, int *dt, struct url *proxy, /* ftp_retrieve_glob is a catch-all function that gets called if we need globbing, time-stamping, recursion or preserve permissions. Its third argument is just what we really need. */ - res = ftp_retrieve_glob (u, &con, + res = ftp_retrieve_glob (u, original_url, &con, ispattern ? GLOB_GLOBALL : GLOB_GETONE); } else - res = ftp_loop_internal (u, NULL, &con, local_file, false); + { + res = ftp_loop_internal (u, original_url, NULL, &con, local_file, false); + } } if (res == FTPOK) res = RETROK; @@ -169,7 +169,8 @@ enum wget_ftp_fstatus }; struct fileinfo *ftp_parse_ls (const char *, const enum stype); -uerr_t ftp_loop (struct url *, char **, int *, struct url *, bool, bool); +uerr_t ftp_loop (struct url *, struct url *, char **, int *, struct url *, + bool, bool); uerr_t ftp_index (const char *, struct url *, struct fileinfo *); diff --git a/src/gnutls.c b/src/gnutls.c index d39371f..185304d 100644 --- a/src/gnutls.c +++ b/src/gnutls.c @@ -36,7 +36,9 @@ as that of the covered work. */ #include <stdio.h> #include <dirent.h> #include <stdlib.h> +#include <xalloc.h> +#include <gnutls/abstract.h> #include <gnutls/gnutls.h> #include <gnutls/x509.h> #include <sys/ioctl.h> @@ -518,6 +520,22 @@ _do_handshake (gnutls_session_t session, int fd, double timeout) return err; } +static const char * +_sni_hostname(const char *hostname) +{ + size_t len = strlen(hostname); + + char *sni_hostname = xmemdup(hostname, len + 1); + + /* Remove trailing dot(s) to fix #47408. + * Regarding RFC 6066 (SNI): The hostname is represented as a byte + * string using ASCII encoding without a trailing dot. */ + while (len && sni_hostname[--len] == '.') + sni_hostname[len] = 0; + + return sni_hostname; +} + bool ssl_connect_wget (int fd, const char *hostname, int *continue_session) { @@ -530,8 +548,12 @@ ssl_connect_wget (int fd, const char *hostname, int *continue_session) /* We set the server name but only if it's not an IP address. */ if (! is_valid_ip_address (hostname)) { - gnutls_server_name_set (session, GNUTLS_NAME_DNS, hostname, - strlen (hostname)); + /* GnuTLS 3.4.x (x<=10) disrespects the length parameter, we have to construct a new string */ + /* see https://gitlab.com/gnutls/gnutls/issues/78 */ + const char *sni_hostname = _sni_hostname(hostname); + + gnutls_server_name_set (session, GNUTLS_NAME_DNS, sni_hostname, strlen(sni_hostname)); + xfree(sni_hostname); } gnutls_set_default_priority (session); @@ -671,6 +693,59 @@ ssl_connect_wget (int fd, const char *hostname, int *continue_session) return true; } +static bool +pkp_pin_peer_pubkey (gnutls_x509_crt_t cert, const char *pinnedpubkey) +{ + /* Scratch */ + size_t len1 = 0, len2 = 0; + char *buff1 = NULL; + + gnutls_pubkey_t key = NULL; + + /* Result is returned to caller */ + int ret = 0; + bool result = false; + + /* if a path wasn't specified, don't pin */ + if (NULL == pinnedpubkey) + return true; + + if (NULL == cert) + return result; + + /* Begin Gyrations to get the public key */ + gnutls_pubkey_init (&key); + + ret = gnutls_pubkey_import_x509 (key, cert, 0); + if (ret < 0) + goto cleanup; /* failed */ + + ret = gnutls_pubkey_export (key, GNUTLS_X509_FMT_DER, NULL, &len1); + if (ret != GNUTLS_E_SHORT_MEMORY_BUFFER || len1 == 0) + goto cleanup; /* failed */ + + buff1 = xmalloc (len1); + + len2 = len1; + + ret = gnutls_pubkey_export (key, GNUTLS_X509_FMT_DER, buff1, &len2); + if (ret < 0 || len1 != len2) + goto cleanup; /* failed */ + + /* End Gyrations */ + + /* The one good exit point */ + result = wg_pin_peer_pubkey (pinnedpubkey, buff1, len1); + + cleanup: + if (NULL != key) + gnutls_pubkey_deinit (key); + + xfree (buff1); + + return result; +} + #define _CHECK_CERT(flag,msg) \ if (status & (flag))\ {\ @@ -691,9 +766,10 @@ ssl_check_certificate (int fd, const char *host) him about problems with the server's certificate. */ const char *severity = opt.check_cert ? _("ERROR") : _("WARNING"); bool success = true; + bool pinsuccess = opt.pinnedpubkey == NULL; /* The user explicitly said to not check for the certificate. */ - if (opt.check_cert == CHECK_CERT_QUIET) + if (opt.check_cert == CHECK_CERT_QUIET && pinsuccess) return success; err = gnutls_certificate_verify_peers2 (ctx->session, &status); @@ -719,6 +795,7 @@ ssl_check_certificate (int fd, const char *host) gnutls_x509_crt_t cert; const gnutls_datum_t *cert_list; unsigned int cert_list_size; + const char *sni_hostname; if ((err = gnutls_x509_crt_init (&cert)) < 0) { @@ -753,13 +830,23 @@ ssl_check_certificate (int fd, const char *host) logprintf (LOG_NOTQUIET, _("The certificate has expired\n")); success = false; } - if (!gnutls_x509_crt_check_hostname (cert, host)) + sni_hostname = _sni_hostname(host); + if (!gnutls_x509_crt_check_hostname (cert, sni_hostname)) { logprintf (LOG_NOTQUIET, _("The certificate's owner does not match hostname %s\n"), - quote (host)); + quote (sni_hostname)); + success = false; + } + xfree(sni_hostname); + + pinsuccess = pkp_pin_peer_pubkey (cert, opt.pinnedpubkey); + if (!pinsuccess) + { + logprintf (LOG_ALWAYS, _("The public key does not match pinned public key!\n")); success = false; } + crt_deinit: gnutls_x509_crt_deinit (cert); } @@ -770,5 +857,6 @@ ssl_check_certificate (int fd, const char *host) } out: - return opt.check_cert == CHECK_CERT_ON ? success : true; + /* never return true if pinsuccess fails */ + return !pinsuccess ? false : (opt.check_cert == CHECK_CERT_ON ? success : true); } @@ -65,6 +65,7 @@ as that of the covered work. */ #include "host.h" #include "url.h" #include "hash.h" +#include "ptimer.h" #ifndef NO_ADDRESS # define NO_ADDRESS NO_DATA @@ -649,6 +650,112 @@ cache_remove (const char *host) } } +#ifdef HAVE_LIBCARES +#include <sys/select.h> +#include <ares.h> +extern ares_channel ares; + +static struct address_list * +merge_address_lists (struct address_list *al1, struct address_list *al2) +{ + int count = al1->count + al2->count; + + /* merge al2 into al1 */ + al1->addresses = xrealloc (al1->addresses, sizeof (ip_address) * count); + memcpy (al1->addresses + al1->count, al2->addresses, sizeof (ip_address) * al2->count); + al1->count = count; + + address_list_delete (al2); + + return al1; +} + +static struct address_list * +address_list_from_hostent (struct hostent *host) +{ + int count, i; + struct address_list *al = xnew0 (struct address_list); + + for (count = 0; host->h_addr_list[count]; count++) + ; + + assert (count > 0); + + al->addresses = xnew_array (ip_address, count); + al->count = count; + al->refcount = 1; + + for (i = 0; i < count; i++) + { + ip_address *ip = &al->addresses[i]; + ip->family = host->h_addrtype; + memcpy (IP_INADDR_DATA (ip), host->h_addr_list[i], ip->family == AF_INET ? 4 : 16); + } + + return al; +} + +/* Since GnuLib's select() (i.e. rpl_select()) cannot handle socket-numbers + * returned from C-ares, we must use the original select() from Winsock. + */ +#ifdef WINDOWS +#undef select +#endif + +static void +wait_ares (ares_channel channel) +{ + struct ptimer *timer = NULL; + + if (opt.dns_timeout) + timer = ptimer_new (); + + for (;;) + { + struct timeval *tvp, tv; + fd_set read_fds, write_fds; + int nfds, rc; + + FD_ZERO (&read_fds); + FD_ZERO (&write_fds); + nfds = ares_fds (channel, &read_fds, &write_fds); + if (nfds == 0) + break; + + if (timer) + { + double max = opt.dns_timeout - ptimer_measure (timer); + + tv.tv_sec = (long) max; + tv.tv_usec = 1000000 * (max - (long) max); + tvp = ares_timeout (channel, &tv, &tv); + } + else + tvp = ares_timeout (channel, NULL, &tv); + + rc = select (nfds, &read_fds, &write_fds, NULL, tvp); + if (rc == 0 && timer && ptimer_measure (timer) >= opt.dns_timeout) + ares_cancel (channel); + else + ares_process (channel, &read_fds, &write_fds); + } +} + +static void +callback (void *arg, int status, int timeouts _GL_UNUSED, struct hostent *host) +{ + struct address_list **al = (struct address_list **) arg; + + if (!host || status != ARES_SUCCESS) + { + *al = NULL; + return; + } + + *al = address_list_from_hostent (host); +} +#endif + /* Look up HOST in DNS and return a list of IP addresses. This function caches its result so that, if the same host is passed @@ -755,80 +862,112 @@ lookup_host (const char *host, int flags) } #ifdef ENABLE_IPV6 - { - int err; - struct addrinfo hints, *res; - - xzero (hints); - hints.ai_socktype = SOCK_STREAM; - if (opt.ipv4_only) - hints.ai_family = AF_INET; - else if (opt.ipv6_only) - hints.ai_family = AF_INET6; - else - /* We tried using AI_ADDRCONFIG, but removed it because: it - misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and - it's unneeded since we sort the addresses anyway. */ +#ifdef HAVE_LIBCARES + if (ares) + { + struct address_list *al4; + struct address_list *al6; + + if (opt.ipv4_only || !opt.ipv6_only) + ares_gethostbyname (ares, host, AF_INET, callback, &al4); + if (opt.ipv6_only || !opt.ipv4_only) + ares_gethostbyname (ares, host, AF_INET6, callback, &al6); + + wait_ares (ares); + + if (al4 && al6) + al = merge_address_lists (al4, al6); + else if (al4) + al = al4; + else + al = al6; + } + else +#endif + { + int err; + struct addrinfo hints, *res; + + xzero (hints); + hints.ai_socktype = SOCK_STREAM; + if (opt.ipv4_only) + hints.ai_family = AF_INET; + else if (opt.ipv6_only) + hints.ai_family = AF_INET6; + else + /* We tried using AI_ADDRCONFIG, but removed it because: it + misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and + it's unneeded since we sort the addresses anyway. */ hints.ai_family = AF_UNSPEC; - if (flags & LH_BIND) - hints.ai_flags |= AI_PASSIVE; + if (flags & LH_BIND) + hints.ai_flags |= AI_PASSIVE; #ifdef AI_NUMERICHOST - if (numeric_address) - { - /* Where available, the AI_NUMERICHOST hint can prevent costly - access to DNS servers. */ - hints.ai_flags |= AI_NUMERICHOST; - timeout = 0; /* no timeout needed when "resolving" + if (numeric_address) + { + /* Where available, the AI_NUMERICHOST hint can prevent costly + access to DNS servers. */ + hints.ai_flags |= AI_NUMERICHOST; + timeout = 0; /* no timeout needed when "resolving" numeric hosts -- avoid setting up signal handlers and such. */ - } + } #endif - err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout); - if (err != 0 || res == NULL) - { - if (!silent) - logprintf (LOG_VERBOSE, _("failed: %s.\n"), - err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); - return NULL; - } - al = address_list_from_addrinfo (res); - freeaddrinfo (res); - if (!al) - { - logprintf (LOG_VERBOSE, - _("failed: No IPv4/IPv6 addresses for host.\n")); - return NULL; - } + err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout); - /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per - --prefer-family) come first. Sorting is stable so the order of - the addresses with the same family is undisturbed. */ - if (al->count > 1 && opt.prefer_family != prefer_none) - stable_sort (al->addresses, al->count, sizeof (ip_address), - opt.prefer_family == prefer_ipv4 - ? cmp_prefer_ipv4 : cmp_prefer_ipv6); - } + if (err != 0 || res == NULL) + { + if (!silent) + logprintf (LOG_VERBOSE, _ ("failed: %s.\n"), + err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno)); + return NULL; + } + al = address_list_from_addrinfo (res); + freeaddrinfo (res); + } + + if (!al) + { + logprintf (LOG_VERBOSE, + _ ("failed: No IPv4/IPv6 addresses for host.\n")); + return NULL; + } + + /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per + --prefer-family) come first. Sorting is stable so the order of + the addresses with the same family is undisturbed. */ + if (al->count > 1 && opt.prefer_family != prefer_none) + stable_sort (al->addresses, al->count, sizeof (ip_address), + opt.prefer_family == prefer_ipv4 + ? cmp_prefer_ipv4 : cmp_prefer_ipv6); #else /* not ENABLE_IPV6 */ - { - struct hostent *hptr = gethostbyname_with_timeout (host, timeout); - if (!hptr) - { - if (!silent) - { - if (errno != ETIMEDOUT) - logprintf (LOG_VERBOSE, _("failed: %s.\n"), - host_errstr (h_errno)); - else - logputs (LOG_VERBOSE, _("failed: timed out.\n")); - } - return NULL; - } - /* Do older systems have h_addr_list? */ - al = address_list_from_ipv4_addresses (hptr->h_addr_list); - } +#ifdef HAVE_LIBCARES + if (ares) + { + ares_gethostbyname (ares, host, AF_INET, callback, &al); + wait_ares (ares); + } + else +#endif + { + struct hostent *hptr = gethostbyname_with_timeout (host, timeout); + if (!hptr) + { + if (!silent) + { + if (errno != ETIMEDOUT) + logprintf (LOG_VERBOSE, _ ("failed: %s.\n"), + host_errstr (h_errno)); + else + logputs (LOG_VERBOSE, _ ("failed: timed out.\n")); + } + return NULL; + } + /* Do older systems have h_addr_list? */ + al = address_list_from_ipv4_addresses (hptr->h_addr_list); + } #endif /* not ENABLE_IPV6 */ /* Print the addresses determined by DNS lookup, but no more than @@ -53,6 +53,7 @@ as that of the covered work. */ struct hsts_store { struct hash_table *table; time_t last_mtime; + bool changed; }; struct hsts_kh { @@ -316,7 +317,7 @@ hsts_store_dump (hsts_store_t store, FILE *fp) /* Print preliminary comments. We don't care if any of these fail. */ fputs ("# HSTS 1.0 Known Hosts database for GNU Wget.\n", fp); fputs ("# Edit at your own risk.\n", fp); - fputs ("# <hostname>[:<port>]\t<incl. subdomains>\t<created>\t<max-age>\n", fp); + fputs ("# <hostname>\t<port>\t<incl. subdomains>\t<created>\t<max-age>\n", fp); /* Now cycle through the HSTS store in memory and dump the entries */ for (hash_table_iterate (store->table, &it); hash_table_iter_next (&it);) @@ -334,6 +335,22 @@ hsts_store_dump (hsts_store_t store, FILE *fp) } } +/* + * Test: + * - The file is a regular file (ie. not a symlink), and + * - The file is not world-writable. + */ +static bool +hsts_file_access_valid (const char *filename) +{ + struct_stat st; + + if (stat (filename, &st) == -1) + return false; + + return !(st.st_mode & S_IWOTH) && S_ISREG (st.st_mode); +} + /* HSTS API */ /* @@ -370,10 +387,14 @@ hsts_match (hsts_store_t store, struct url *u) if (u->port == 80) u->port = 443; url_changed = true; + store->changed = true; } } else - hsts_remove_entry (store, kh); + { + hsts_remove_entry (store, kh); + store->changed = true; + } } xfree (kh->host); } @@ -423,12 +444,14 @@ hsts_store_entry (hsts_store_t store, if (entry && match == CONGRUENT_MATCH) { if (max_age == 0) - hsts_remove_entry (store, kh); + { + hsts_remove_entry (store, kh); + store->changed = true; + } else if (max_age > 0) { - entry->include_subdomains = include_subdomains; - - if (entry->max_age != max_age) + if (entry->max_age != max_age || + entry->include_subdomains != include_subdomains) { /* RFC 6797 states that 'max_age' is a TTL relative to the reception of the STS header so we have to update the 'created' field too */ @@ -436,6 +459,9 @@ hsts_store_entry (hsts_store_t store, if (t != -1) entry->created = t; entry->max_age = max_age; + entry->include_subdomains = include_subdomains; + + store->changed = true; } } /* we ignore negative max_ages */ @@ -450,6 +476,8 @@ hsts_store_entry (hsts_store_t store, happen we got a non-existent entry with max_age == 0. */ result = hsts_add_entry (store, host, port, max_age, include_subdomains); + if (result) + store->changed = true; } /* we ignore new entries with max_age == 0 */ xfree (kh->host); @@ -464,28 +492,45 @@ hsts_store_t hsts_store_open (const char *filename) { hsts_store_t store = NULL; - struct_stat st; - FILE *fp = NULL; store = xnew0 (struct hsts_store); store->table = hash_table_new (0, hsts_hash_func, hsts_cmp_func); store->last_mtime = 0; + store->changed = false; if (file_exists_p (filename)) { - fp = fopen (filename, "r"); + if (hsts_file_access_valid (filename)) + { + struct_stat st; + FILE *fp = fopen (filename, "r"); + + if (!fp || !hsts_read_database (store, fp, false)) + { + /* abort! */ + hsts_store_close (store); + xfree (store); + fclose (fp); + goto out; + } + + if (fstat (fileno (fp), &st) == 0) + store->last_mtime = st.st_mtime; - if (!fp || !hsts_read_database (store, fp, false)) + fclose (fp); + } + else { - /* abort! */ + /* + * If we're not reading the HSTS database, + * then by all means act as if HSTS was disabled. + */ hsts_store_close (store); xfree (store); - goto out; - } - if (fstat (fileno (fp), &st) == 0) - store->last_mtime = st.st_mtime; - fclose (fp); + logprintf (LOG_NOTQUIET, "Will not apply HSTS. " + "The HSTS database must be a regular and non-world-writable file.\n"); + } } out: @@ -529,6 +574,12 @@ hsts_store_save (hsts_store_t store, const char *filename) } } +bool +hsts_store_has_changed (hsts_store_t store) +{ + return (store ? store->changed : false); +} + void hsts_store_close (hsts_store_t store) { @@ -43,6 +43,7 @@ hsts_store_t hsts_store_open (const char *); void hsts_store_save (hsts_store_t, const char *); void hsts_store_close (hsts_store_t); +bool hsts_store_has_changed (hsts_store_t); bool hsts_store_entry (hsts_store_t, enum url_scheme, const char *, int, diff --git a/src/html-url.c b/src/html-url.c index 0743587..abc916b 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -56,6 +56,7 @@ typedef void (*tag_handler_t) (int, struct taginfo *, struct map_context *); DECLARE_TAG_HANDLER (tag_find_urls); DECLARE_TAG_HANDLER (tag_handle_base); DECLARE_TAG_HANDLER (tag_handle_form); +DECLARE_TAG_HANDLER (tag_handle_img); DECLARE_TAG_HANDLER (tag_handle_link); DECLARE_TAG_HANDLER (tag_handle_meta); @@ -105,7 +106,7 @@ static struct known_tag { { TAG_FORM, "form", tag_handle_form }, { TAG_FRAME, "frame", tag_find_urls }, { TAG_IFRAME, "iframe", tag_find_urls }, - { TAG_IMG, "img", tag_find_urls }, + { TAG_IMG, "img", tag_handle_img }, { TAG_INPUT, "input", tag_find_urls }, { TAG_LAYER, "layer", tag_find_urls }, { TAG_LINK, "link", tag_handle_link }, @@ -183,7 +184,8 @@ static const char *additional_attributes[] = { "name", /* used by tag_handle_meta */ "content", /* used by tag_handle_meta */ "action", /* used by tag_handle_form */ - "style" /* used by check_style_attr */ + "style", /* used by check_style_attr */ + "srcset", /* used by tag_handle_img */ }; static struct hash_table *interesting_tags; @@ -674,6 +676,88 @@ tag_handle_meta (int tagid _GL_UNUSED, struct taginfo *tag, struct map_context * } } +/* Handle the IMG tag. This requires special handling for the srcset attr, + while the traditional src/lowsrc/href attributes can be handled generically. +*/ + +static void +tag_handle_img (int tagid, struct taginfo *tag, struct map_context *ctx) { + int attrind; + char *srcset; + + /* Use the generic approach for the attributes without special syntax. */ + tag_find_urls(tagid, tag, ctx); + + srcset = find_attr (tag, "srcset", &attrind); + if (srcset) + { + /* These are relative to the input text. */ + int base_ind = ATTR_POS (tag,attrind,ctx); + int size = strlen (srcset); + + /* These are relative to srcset. */ + int offset, url_start, url_end; + + /* Make sure to line up base_ind with srcset[0], not outside quotes. */ + if (ctx->text[base_ind] == '"' || ctx->text[base_ind] == '\'') + ++base_ind; + + offset = 0; + while (offset < size) + { + bool has_descriptor = true; + + /* Skip over initial whitespace and commas. Note there is no \v + in HTML5 whitespace. */ + url_start = offset + strspn (srcset + offset, " \f\n\r\t,"); + + if (url_start == size) + return; + + /* URL is any non-whitespace chars (including commas) - but with + trailing commas removed. */ + url_end = url_start + strcspn (srcset + url_start, " \f\n\r\t"); + while ((url_end - 1) > url_start && srcset[url_end - 1] == ',') + { + has_descriptor = false; + --url_end; + } + + if (url_end > url_start) + { + char *url_text = strdupdelim (srcset + url_start, + srcset + url_end); + struct urlpos *up = append_url (url_text, base_ind + url_start, + url_end - url_start, ctx); + up->link_inline_p = 1; + up->link_noquote_html_p = 1; + xfree (url_text); + } + + /* If the URL wasn't terminated by a , there may also be a descriptor + which we just skip. */ + if (has_descriptor) + { + /* This is comma-terminated, except there may be one level of + parentheses escaping that. */ + bool in_paren = false; + for (offset = url_end; offset < size; ++offset) + { + char c = srcset[offset]; + if (c == '(') + in_paren = true; + else if (c == ')' && in_paren) + in_paren = false; + else if (c == ',' && !in_paren) + break; + } + } + else + offset = url_end; + } + } +} + /* Dispatch the tag handler appropriate for the tag we're mapping over. See known_tags[] for definition of tag handlers. */ @@ -752,10 +836,12 @@ get_urls_html (const char *file, const char *url, bool *meta_disallow_follow, map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags, NULL, interesting_attributes); +#ifdef ENABLE_IRI /* Meta charset is only valid if there was no HTTP header Content-Type charset. */ /* This is true for HTTP 1.0 and 1.1. */ if (iri && !iri->content_encoding && meta_charset) set_content_encoding (iri, meta_charset); +#endif DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow)); if (meta_disallow_follow) @@ -3424,13 +3424,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, char *tmp = strchr (type, ';'); if (tmp) { +#ifdef ENABLE_IRI /* sXXXav: only needed if IRI support is enabled */ char *tmp2 = tmp + 1; +#endif while (tmp > type && c_isspace (tmp[-1])) --tmp; *tmp = '\0'; +#ifdef ENABLE_IRI /* Try to get remote encoding if needed */ if (opt.enable_iri && !opt.encoding_remote) { @@ -3439,6 +3442,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, set_content_encoding (iri, tmp); xfree(tmp); } +#endif } } hs->newloc = resp_header_strdup (resp, "Location"); @@ -143,6 +143,9 @@ static const struct { { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, { "bindaddress", &opt.bind_address, cmd_string }, +#ifdef HAVE_LIBCARES + { "binddnsaddress", &opt.bind_dns_address, cmd_string }, +#endif { "bodydata", &opt.body_data, cmd_string }, { "bodyfile", &opt.body_file, cmd_string }, #ifdef HAVE_SSL @@ -173,6 +176,9 @@ static const struct { { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, { "dnscache", &opt.dns_cache, cmd_boolean }, +#ifdef HAVE_LIBCARES + { "dnsservers", &opt.dns_servers, cmd_string }, +#endif { "dnstimeout", &opt.dns_timeout, cmd_time }, { "domains", &opt.domains, cmd_vector }, { "dotbytes", &opt.dot_bytes, cmd_bytes }, @@ -254,6 +260,9 @@ static const struct { { "passiveftp", &opt.ftp_pasv, cmd_boolean }, { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/ { "password", &opt.passwd, cmd_string }, +#ifdef HAVE_SSL + { "pinnedpubkey", &opt.pinnedpubkey, cmd_string }, +#endif { "postdata", &opt.post_data, cmd_string }, { "postfile", &opt.post_file_name, cmd_file }, { "preferfamily", NULL, cmd_spec_prefer_family }, @@ -1922,6 +1931,18 @@ cleanup (void) xfree (opt.body_file); xfree (opt.rejected_log); +#ifdef HAVE_LIBCARES +#include <ares.h> + { + extern ares_channel ares; + + xfree (opt.bind_dns_address); + xfree (opt.dns_servers); + ares_destroy (ares); + ares_library_cleanup (); + } +#endif + #endif /* DEBUG_MALLOC */ } @@ -129,8 +129,8 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz cd = iconv_open (tocode, fromcode); if (cd == (iconv_t)(-1)) { - logprintf (LOG_VERBOSE, _("Conversion from %s to UTF-8 isn't supported\n"), - quote (opt.locale)); + logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"), + quote (fromcode), quote (tocode)); *out = NULL; return false; } @@ -146,7 +146,8 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz for (;;) { - if (iconv (cd, &in, &inlen, out, &outlen) != (size_t)(-1)) + if (iconv (cd, &in, &inlen, out, &outlen) != (size_t)(-1) && + iconv (cd, NULL, NULL, out, &outlen) != (size_t)(-1)) { *out = s; *(s + len - outlen - done) = '\0'; @@ -179,16 +180,10 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz } else if (errno == E2BIG) /* Output buffer full */ { - char *new; - tooshort++; done = len; - outlen = done + inlen * 2; - new = xmalloc (outlen + 1); - memcpy (new, s, done); - xfree (s); - s = new; - len = outlen; + len = outlen = done + inlen * 2; + s = xrealloc (s, outlen + 1); *out = s + done; } else /* Weird, we got an unspecified error */ @@ -60,7 +60,7 @@ void set_content_encoding (struct iri *i, const char *charset); extern struct iri dummy_iri; -#define parse_charset(str) (str, NULL) +#define parse_charset(str) NULL #define find_locale() NULL #define check_encoding_name(str) false #define locale_to_utf8(str) (str) @@ -351,6 +351,7 @@ logputs (enum log_options o, const char *s) { FILE *fp; FILE *warcfp; + int errno_save = errno; check_redirect_output (); if (o == LOG_PROGRESS) @@ -358,10 +359,14 @@ logputs (enum log_options o, const char *s) else fp = get_log_fp (); + errno = errno_save; + if (fp == NULL) return; warcfp = get_warc_log_fp (); + errno = errno_save; + CHECK_VERBOSE (o); FPUTS (s, fp); @@ -373,6 +378,8 @@ logputs (enum log_options o, const char *s) logflush (); else needs_flushing = true; + + errno = errno_save; } struct logvprintf_state { @@ -543,8 +550,10 @@ logprintf (enum log_options o, const char *fmt, ...) va_list args; struct logvprintf_state lpstate; bool done; + int errno_saved = errno; check_redirect_output (); + errno = errno_saved; if (inhibit_logging) return; CHECK_VERBOSE (o); @@ -561,6 +570,8 @@ logprintf (enum log_options o, const char *fmt, ...) exit (WGET_EXIT_GENERIC_ERROR); } while (!done); + + errno = errno_saved; } #ifdef ENABLE_DEBUG @@ -86,6 +86,13 @@ as that of the covered work. */ struct iri dummy_iri; #endif +#ifdef HAVE_LIBCARES +#include <ares.h> +ares_channel ares; +#else +void *ares; +#endif + struct options opt; /* defined in version.c */ @@ -197,10 +204,12 @@ save_hsts (void) { char *filename = get_hsts_database (); - if (filename) - DEBUGP (("Saving HSTS entries to %s\n", filename)); + if (filename && hsts_store_has_changed (hsts_store)) + { + DEBUGP (("Saving HSTS entries to %s\n", filename)); + hsts_store_save (hsts_store, filename); + } - hsts_store_save (hsts_store, filename); hsts_store_close (hsts_store); xfree (filename); @@ -252,6 +261,9 @@ static struct cmdline_option option_data[] = { "backups", 0, OPT_BOOLEAN, "backups", -1 }, { "base", 'B', OPT_VALUE, "base", -1 }, { "bind-address", 0, OPT_VALUE, "bindaddress", -1 }, +#ifdef HAVE_LIBCARES + { "bind-dns-address", 0, OPT_VALUE, "binddnsaddress", -1 }, +#endif { "body-data", 0, OPT_VALUE, "bodydata", -1 }, { "body-file", 0, OPT_VALUE, "bodyfile", -1 }, { IF_SSL ("ca-certificate"), 0, OPT_VALUE, "cacertificate", -1 }, @@ -277,6 +289,9 @@ static struct cmdline_option option_data[] = { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 }, { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 }, { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 }, +#ifdef HAVE_LIBCARES + { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 }, +#endif { "dns-timeout", 0, OPT_VALUE, "dnstimeout", -1 }, { "domains", 'D', OPT_VALUE, "domains", -1 }, { "dont-remove-listing", 0, OPT__DONT_REMOVE_LISTING, NULL, no_argument }, @@ -350,6 +365,7 @@ static struct cmdline_option option_data[] = { "parent", 0, OPT__PARENT, NULL, optional_argument }, { "passive-ftp", 0, OPT_BOOLEAN, "passiveftp", -1 }, { "password", 0, OPT_VALUE, "password", -1 }, + { IF_SSL ("pinnedpubkey"), 0, OPT_VALUE, "pinnedpubkey", -1 }, { "post-data", 0, OPT_VALUE, "postdata", -1 }, { "post-file", 0, OPT_VALUE, "postfile", -1 }, { "prefer-family", 0, OPT_VALUE, "preferfamily", -1 }, @@ -627,6 +643,12 @@ Download:\n"), --spider don't download anything\n"), N_("\ -T, --timeout=SECONDS set all timeout values to SECONDS\n"), +#ifdef HAVE_LIBCARES + N_("\ + --dns-servers=ADDRESSES list of DNS servers to query (comma separated)\n"), + N_("\ + --bind-dns-address=ADDRESS bind DNS resolver to ADDRESS (hostname or IP) on local host\n"), +#endif N_("\ --dns-timeout=SECS set the DNS lookup timeout to SECS\n"), N_("\ @@ -784,6 +806,11 @@ HTTPS (SSL/TLS) options:\n"), --ca-directory=DIR directory where hash list of CAs is stored\n"), N_("\ --crl-file=FILE file with bundle of CRLs\n"), + N_("\ + --pinnedpubkey=FILE/HASHES Public key (PEM/DER) file, or any number\n\ + of base64 encoded sha256 hashes preceded by\n\ + \'sha256//\' and seperated by \';\', to verify\n\ + peer against\n"), #if defined(HAVE_LIBSSL) || defined(HAVE_LIBSSL32) N_("\ --random-file=FILE file with random data for seeding the SSL PRNG\n"), @@ -1774,6 +1801,58 @@ only if outputting to a regular file.\n")); } } +#ifdef HAVE_LIBCARES + if (opt.bind_dns_address || opt.dns_servers) + { + if (ares_library_init (ARES_LIB_INIT_ALL)) + { + fprintf (stderr, _("Failed to init libcares\n")); + exit (WGET_EXIT_GENERIC_ERROR); + } + + if (ares_init (&ares) != ARES_SUCCESS) + { + fprintf (stderr, _("Failed to init c-ares channel\n")); + exit (WGET_EXIT_GENERIC_ERROR); + } + + if (opt.bind_dns_address) + { + struct in_addr a4; +#ifdef ENABLE_IPV6 + struct in6_addr a6; +#endif + + if (inet_pton (AF_INET, opt.bind_dns_address, &a4) == 1) + { + ares_set_local_ip4 (ares, ntohl (a4.s_addr)); + } +#ifdef ENABLE_IPV6 + else if (inet_pton (AF_INET6, opt.bind_dns_address, &a6) == 1) + { + ares_set_local_ip6 (ares, (unsigned char *) &a6); + } +#endif + else + { + fprintf (stderr, _("Failed to parse IP address '%s'\n"), opt.bind_dns_address); + exit (WGET_EXIT_GENERIC_ERROR); + } + } + + if (opt.dns_servers) + { + int result; + + if ((result = ares_set_servers_csv (ares, opt.dns_servers)) != ARES_SUCCESS) + { + fprintf (stderr, _("Failed to set DNS server(s) '%s' (%d)\n"), opt.dns_servers, result); + exit (WGET_EXIT_GENERIC_ERROR); + } + } + } +#endif + #ifdef __VMS /* Set global ODS5 flag according to the specified destination (if any), otherwise according to the current default device. diff --git a/src/metalink.c b/src/metalink.c index 25737b3..18f5f5d 100644 --- a/src/metalink.c +++ b/src/metalink.c @@ -274,7 +274,7 @@ retrieve_from_metalink (const metalink_t* metalink) goto gpg_skip_verification; } - DEBUGP (("Veryfying signature %s:\n%s\n", + DEBUGP (("Verifying signature %s:\n%s\n", quote (msig->mediatype), msig->signature)); @@ -342,10 +342,7 @@ retrieve_from_metalink (const metalink_t* metalink) /* The list is null-terminated. */ for (gpgsig = gpgres->signatures; gpgsig; gpgsig = gpgsig->next) { - DEBUGP (("Checking signature 0x%p\n", - (void *) gpgsig)); - DEBUGP (("Summary=0x%x Status=0x%x\n", - gpgsig->summary, gpgsig->status & 0xFFFF)); + DEBUGP (("Checking signature %s\n", gpgsig->fpr)); if (gpgsig->summary & (GPGME_SIGSUM_VALID | GPGME_SIGSUM_GREEN)) diff --git a/src/mswindows.c b/src/mswindows.c index 1a43b51..9735370 100644 --- a/src/mswindows.c +++ b/src/mswindows.c @@ -62,7 +62,7 @@ void xsleep (double seconds) { #if defined(HAVE_USLEEP) && defined(HAVE_SLEEP) - if (seconds > 1000) + if (seconds >= 1) { /* Explained in utils.c. */ sleep (seconds); diff --git a/src/openssl.c b/src/openssl.c index 6701c0d..c6549ea 100644 --- a/src/openssl.c +++ b/src/openssl.c @@ -35,6 +35,7 @@ as that of the covered work. */ #include <errno.h> #include <unistd.h> #include <string.h> +#include <xalloc.h> #include <openssl/ssl.h> #include <openssl/x509v3.h> @@ -506,6 +507,22 @@ ssl_connect_with_timeout_callback(void *arg) ctx->result = SSL_connect(ctx->ssl); } +static const char * +_sni_hostname(const char *hostname) +{ + size_t len = strlen(hostname); + + char *sni_hostname = xmemdup(hostname, len + 1); + + /* Remove trailing dot(s) to fix #47408. + * Regarding RFC 6066 (SNI): The hostname is represented as a byte + * string using ASCII encoding without a trailing dot. */ + while (len && sni_hostname[--len] == '.') + sni_hostname[len] = 0; + + return sni_hostname; +} + /* Perform the SSL handshake on file descriptor FD, which is assumed to be connected to an SSL server. The SSL handle provided by OpenSSL is registered with the file descriptor FD using @@ -532,7 +549,12 @@ ssl_connect_wget (int fd, const char *hostname, int *continue_session) then use it whenever we have a hostname. If not, don't, ever. */ if (! is_valid_ip_address (hostname)) { - if (! SSL_set_tlsext_host_name (conn, hostname)) + const char *sni_hostname = _sni_hostname(hostname); + + long rc = SSL_set_tlsext_host_name (conn, sni_hostname); + xfree(sni_hostname); + + if (rc == 0) { DEBUGP (("Failed to set TLS server-name indication.")); goto error; @@ -650,6 +672,65 @@ static char *_get_rfc2253_formatted (X509_NAME *name) return out ? out : xstrdup(""); } +/* + * Heavily modified from: + * https://www.owasp.org/index.php/Certificate_and_Public_Key_Pinning#OpenSSL + */ +static bool +pkp_pin_peer_pubkey (X509* cert, const char *pinnedpubkey) +{ + /* Scratch */ + int len1 = 0, len2 = 0; + char *buff1 = NULL, *temp = NULL; + + /* Result is returned to caller */ + bool result = false; + + /* if a path wasn't specified, don't pin */ + if (!pinnedpubkey) + return true; + + if (!cert) + return result; + + /* Begin Gyrations to get the subjectPublicKeyInfo */ + /* Thanks to Viktor Dukhovni on the OpenSSL mailing list */ + + /* https://groups.google.com/group/mailing.openssl.users/browse_thread + /thread/d61858dae102c6c7 */ + len1 = i2d_X509_PUBKEY (X509_get_X509_PUBKEY (cert), NULL); + if (len1 < 1) + goto cleanup; /* failed */ + + /* https://www.openssl.org/docs/crypto/buffer.html */ + buff1 = temp = OPENSSL_malloc (len1); + if (!buff1) + goto cleanup; /* failed */ + + /* https://www.openssl.org/docs/crypto/d2i_X509.html */ + len2 = i2d_X509_PUBKEY (X509_get_X509_PUBKEY (cert), (unsigned char **) &temp); + + /* + * These checks are verifying we got back the same values as when we + * sized the buffer. It's pretty weak since they should always be the + * same. But it gives us something to test. + */ + if ((len1 != len2) || !temp || ((temp - buff1) != len1)) + goto cleanup; /* failed */ + + /* End Gyrations */ + + /* The one good exit point */ + result = wg_pin_peer_pubkey (pinnedpubkey, buff1, len1); + + cleanup: + /* https://www.openssl.org/docs/crypto/buffer.html */ + if (NULL != buff1) + OPENSSL_free (buff1); + + return result; +} + /* Verify the validity of the certificate presented by the server. Also check that the "common name" of the server, as presented by its certificate, corresponds to HOST. (HOST typically comes from @@ -673,6 +754,7 @@ ssl_check_certificate (int fd, const char *host) long vresult; bool success = true; bool alt_name_checked = false; + bool pinsuccess = opt.pinnedpubkey == NULL; /* If the user has specified --no-check-cert, we still want to warn him about problems with the server's certificate. */ @@ -683,7 +765,7 @@ ssl_check_certificate (int fd, const char *host) assert (conn != NULL); /* The user explicitly said to not check for the certificate. */ - if (opt.check_cert == CHECK_CERT_QUIET) + if (opt.check_cert == CHECK_CERT_QUIET && pinsuccess) return success; cert = SSL_get_peer_certificate (conn); @@ -762,9 +844,12 @@ ssl_check_certificate (int fd, const char *host) { /* Test subject alternative names */ + /* SNI hostname must not have a trailing dot */ + const char *sni_hostname = _sni_hostname(host); + /* Do we want to check for dNSNAmes or ipAddresses (see RFC 2818)? * Signal it by host_in_octet_string. */ - ASN1_OCTET_STRING *host_in_octet_string = a2i_IPADDRESS (host); + ASN1_OCTET_STRING *host_in_octet_string = a2i_IPADDRESS (sni_hostname); int numaltnames = sk_GENERAL_NAME_num (subjectAltNames); int i; @@ -799,7 +884,7 @@ ssl_check_certificate (int fd, const char *host) if (0 <= ASN1_STRING_to_UTF8 (&name_in_utf8, name->d.dNSName)) { /* Compare and check for NULL attack in ASN1_STRING */ - if (pattern_match ((char *)name_in_utf8, host) && + if (pattern_match ((char *)name_in_utf8, sni_hostname) && (strlen ((char *)name_in_utf8) == (size_t) ASN1_STRING_length (name->d.dNSName))) { @@ -820,9 +905,11 @@ ssl_check_certificate (int fd, const char *host) logprintf (LOG_NOTQUIET, _("%s: no certificate subject alternative name matches\n" "\trequested host name %s.\n"), - severity, quote_n (1, host)); + severity, quote_n (1, sni_hostname)); success = false; } + + xfree(sni_hostname); } if (alt_name_checked == false) @@ -877,6 +964,13 @@ ssl_check_certificate (int fd, const char *host) } } + pinsuccess = pkp_pin_peer_pubkey (cert, opt.pinnedpubkey); + if (!pinsuccess) + { + logprintf (LOG_ALWAYS, _("The public key does not match pinned public key!\n")); + success = false; + } + if (success) DEBUGP (("X509 certificate successfully verified and matches host %s\n", @@ -889,7 +983,8 @@ ssl_check_certificate (int fd, const char *host) To connect to %s insecurely, use `--no-check-certificate'.\n"), quotearg_style (escape_quoting_style, host)); - return opt.check_cert == CHECK_CERT_ON ? success : true; + /* never return true if pinsuccess fails */ + return !pinsuccess ? false : (opt.check_cert == CHECK_CERT_ON ? success : true); } /* diff --git a/src/options.h b/src/options.h index 5cd5fb1..a8c494b 100644 --- a/src/options.h +++ b/src/options.h @@ -99,6 +99,11 @@ struct options void *(*regex_compile_fun)(const char *); /* Function to compile a regex. */ bool (*regex_match_fun)(const void *, const char *); /* Function to match a string to a regex. */ +#ifdef HAVE_LIBCARES + char *bind_dns_address; + char *dns_servers; +#endif + char **domains; /* See host.c */ char **exclude_domains; bool dns_cache; /* whether we cache DNS lookups. */ @@ -236,6 +241,11 @@ struct options char *ca_cert; /* CA certificate file to use */ char *crl_file; /* file with CRLs */ + char *pinnedpubkey; /* Public key (PEM/DER) file, or any number + of base64 encoded sha256 hashes preceded by + \'sha256//\' and seperated by \';\', to verify + peer against */ + char *random_file; /* file with random data to seed the PRNG */ char *egd_file; /* file name of the egd daemon socket */ bool https_only; /* whether to follow HTTPS only */ diff --git a/src/progress.c b/src/progress.c index 93f6246..c635d19 100644 --- a/src/progress.c +++ b/src/progress.c @@ -1164,6 +1164,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done) } padding = bp->width - count_cols (bp->buffer); + assert (padding >= 0 && "Padding length became non-positive!"); + padding = padding > 0 ? padding : 0; memset (p, ' ', padding); p += padding; *p = '\0'; @@ -1174,6 +1176,9 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done) * from the release code since we do not want Wget to crash and burn when the * assertion fails. Instead Wget should continue downloading and display a * horrible and irritating progress bar that spams the screen with newlines. + * + * By default, all assertions are disabled in a Wget build and are enabled + * only with the --enable-assert configure option. */ assert (count_cols (bp->buffer) == bp->width); } @@ -1193,8 +1198,6 @@ display_image (char *buf) static void bar_set_params (char *params) { - char *term = getenv ("TERM"); - if (params) { char *param = strtok (params, ":"); @@ -1214,12 +1217,6 @@ bar_set_params (char *params) dots. */ || !isatty (fileno (stderr)) #endif - /* Normally we don't depend on terminal type because the - progress bar only uses ^M to move the cursor to the - beginning of line, which works even on dumb terminals. But - Jamie Zawinski reports that ^M and ^H tricks don't work in - Emacs shell buffers, and only make a mess. */ - || (term && 0 == strcmp (term, "emacs")) ) && !current_impl_locked) { diff --git a/src/recur.c b/src/recur.c index b212ec6..2b17e72 100644 --- a/src/recur.c +++ b/src/recur.c @@ -234,17 +234,19 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi) FILE *rejectedlog = NULL; /* Don't write a rejected log. */ -#define COPYSTR(x) (x) ? xstrdup(x) : NULL; /* Duplicate pi struct if not NULL */ if (pi) { +#define COPYSTR(x) (x) ? xstrdup(x) : NULL; i->uri_encoding = COPYSTR (pi->uri_encoding); i->content_encoding = COPYSTR (pi->content_encoding); i->utf8_encode = pi->utf8_encode; +#undef COPYSTR } +#ifdef ENABLE_IRI else set_uri_encoding (i, opt.locale, true); -#undef COPYSTR +#endif queue = url_queue_new (); blacklist = make_string_hash_table (0); @@ -830,7 +830,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (redirection_count) oldrec = glob = false; - result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); + result = ftp_loop (u, orig_parsed, &local_file, dt, proxy_url, + recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to @@ -872,11 +873,15 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (mynewloc); mynewloc = construced_newloc; - /* Reset UTF-8 encoding state, keep the URI encoding and reset +#ifdef ENABLE_IRI + /* Reset UTF-8 encoding state, set the URI encoding and reset the content encoding. */ iri->utf8_encode = opt.enable_iri; + if (opt.encoding_remote) + set_uri_encoding (iri, opt.encoding_remote, true); set_content_encoding (iri, NULL); xfree (iri->orig_url); +#endif /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); @@ -1064,10 +1069,12 @@ retrieve_from_file (const char *file, bool html, int *count) if (dt & TEXTHTML) html = true; +#ifdef ENABLE_IRI /* If we have a found a content encoding, use it. * ( == is okay, because we're checking for identical object) */ if (iri->content_encoding != opt.locale) set_uri_encoding (iri, iri->content_encoding, false); +#endif /* Reset UTF-8 encode status */ iri->utf8_encode = opt.enable_iri; diff --git a/src/sysdep.h b/src/sysdep.h index f94e698..8da7611 100644 --- a/src/sysdep.h +++ b/src/sysdep.h @@ -68,9 +68,6 @@ as that of the covered work. */ #ifdef NAMESPACE_TWEAKS -/* Request the "Unix 98 compilation environment". */ -#define _XOPEN_SOURCE 500 - #endif /* NAMESPACE_TWEAKS */ @@ -146,16 +143,6 @@ int snprintf (char *str, size_t count, const char *fmt, ...); int vsnprintf (char *str, size_t count, const char *fmt, va_list arg); #endif -/* Some systems (Linux libc5, "NCR MP-RAS 3.0", and others) don't - provide MAP_FAILED, a symbolic constant for the value returned by - mmap() when it doesn't work. Usually, this constant should be -1. - This only makes sense for files that use mmap() and include - sys/mman.h *before* sysdep.h, but doesn't hurt others. */ - -#ifndef MAP_FAILED -# define MAP_FAILED ((void *) -1) -#endif - /* Enable system fnmatch only on systems where fnmatch.h is usable. If the fnmatch on your system is buggy, undef this symbol and a replacement implementation will be used instead. */ @@ -43,6 +43,11 @@ as that of the covered work. */ #include "host.h" /* for is_valid_ipv6_address */ #include "c-strcase.h" +#if HAVE_ICONV +#include <iconv.h> +#include <langinfo.h> +#endif + #ifdef __VMS #include "vms.h" #endif /* def __VMS */ @@ -1399,8 +1404,8 @@ UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128-143 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144-159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1531,6 +1536,82 @@ append_uri_pathel (const char *b, const char *e, bool escaped, append_null (dest); } +static char * +convert_fname (char *fname) +{ + char *converted_fname = fname; +#if HAVE_ICONV + const char *from_encoding = opt.encoding_remote; + const char *to_encoding = opt.locale; + iconv_t cd; + size_t len, done, inlen, outlen; + char *s; + const char *orig_fname = fname; + + /* Defaults for remote and local encodings. */ + if (!from_encoding) + from_encoding = "UTF-8"; + if (!to_encoding) + to_encoding = nl_langinfo (CODESET); + + cd = iconv_open (to_encoding, from_encoding); + if (cd == (iconv_t)(-1)) + logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"), + quote (from_encoding), quote (to_encoding)); + else + { + inlen = strlen (fname); + len = outlen = inlen * 2; + converted_fname = s = xmalloc (outlen + 1); + done = 0; + + for (;;) + { + if (iconv (cd, &fname, &inlen, &s, &outlen) != (size_t)(-1) + && iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1)) + { + *(converted_fname + len - outlen - done) = '\0'; + iconv_close(cd); + DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n", + orig_fname, from_encoding, converted_fname, to_encoding)); + xfree (orig_fname); + return converted_fname; + } + + /* Incomplete or invalid multibyte sequence */ + if (errno == EINVAL || errno == EILSEQ) + { + logprintf (LOG_VERBOSE, + _("Incomplete or invalid multibyte sequence encountered\n")); + xfree (converted_fname); + converted_fname = (char *)orig_fname; + break; + } + else if (errno == E2BIG) /* Output buffer full */ + { + done = len; + len = outlen = done + inlen * 2; + converted_fname = xrealloc (converted_fname, outlen + 1); + s = converted_fname + done; + } + else /* Weird, we got an unspecified error */ + { + logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno); + xfree (converted_fname); + converted_fname = (char *)orig_fname; + break; + } + } + DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n", + orig_fname, from_encoding, to_encoding)); + } + + iconv_close(cd); +#endif + + return converted_fname; +} + /* Append to DEST the directory structure that corresponds the directory part of URL's path. For example, if the URL is http://server/dir1/dir2/file, this appends "/dir1/dir2". @@ -1706,6 +1787,8 @@ url_file_name (const struct url *u, char *replaced_filename) xfree (temp_fnres.base); + fname = convert_fname (fname); + /* Check the cases in which the unique extensions are not used: 1) Clobbering is turned off (-nc). 2) Retrieval with regetting. diff --git a/src/utils.c b/src/utils.c index 5222851..b07da9f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -31,14 +31,12 @@ as that of the covered work. */ #include "wget.h" +#include "sha256.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <unistd.h> -#ifdef HAVE_MMAP -# include <sys/mman.h> -#endif #ifdef HAVE_PROCESS_H # include <process.h> /* getpid() */ #endif @@ -89,6 +87,18 @@ as that of the covered work. */ # define USE_SIGNAL_TIMEOUT #endif +/* Some systems (Linux libc5, "NCR MP-RAS 3.0", and others) don't + provide MAP_FAILED, a symbolic constant for the value returned by + mmap() when it doesn't work. Usually, this constant should be -1. + This only makes sense for files that use mmap() and include + sys/mman.h *before* sysdep.h, but doesn't hurt others. */ +#ifdef HAVE_MMAP +# include <sys/mman.h> +# ifndef MAP_FAILED +# define MAP_FAILED ((void *) -1) +# endif +#endif + #include "utils.h" #include "hash.h" @@ -2521,6 +2531,205 @@ wg_hex_to_string (char *str_buffer, const char *hex_buffer, size_t hex_len) str_buffer[2 * i] = '\0'; } +#ifdef HAVE_SSL + +/* + * Public key pem to der conversion + */ + +static bool +wg_pubkey_pem_to_der (const char *pem, unsigned char **der, size_t *der_len) +{ + char *stripped_pem, *begin_pos, *end_pos; + size_t pem_count, stripped_pem_count = 0, pem_len; + ssize_t size; + unsigned char *base64data; + + *der = NULL; + *der_len = 0; + + /* if no pem, exit. */ + if (!pem) + return false; + + begin_pos = strstr (pem, "-----BEGIN PUBLIC KEY-----"); + if (!begin_pos) + return false; + + pem_count = begin_pos - pem; + /* Invalid if not at beginning AND not directly following \n */ + if (0 != pem_count && '\n' != pem[pem_count - 1]) + return false; + + /* 26 is length of "-----BEGIN PUBLIC KEY-----" */ + pem_count += 26; + + /* Invalid if not directly following \n */ + end_pos = strstr (pem + pem_count, "\n-----END PUBLIC KEY-----"); + if (!end_pos) + return false; + + pem_len = end_pos - pem; + + stripped_pem = xmalloc (pem_len - pem_count + 1); + + /* + * Here we loop through the pem array one character at a time between the + * correct indices, and place each character that is not '\n' or '\r' + * into the stripped_pem array, which should represent the raw base64 string + */ + while (pem_count < pem_len) { + if ('\n' != pem[pem_count] && '\r' != pem[pem_count]) + stripped_pem[stripped_pem_count++] = pem[pem_count]; + ++pem_count; + } + /* Place the null terminator in the correct place */ + stripped_pem[stripped_pem_count] = '\0'; + + base64data = xmalloc (BASE64_LENGTH(stripped_pem_count)); + + size = base64_decode (stripped_pem, base64data); + + if (size < 0) { + xfree (base64data); /* malformed base64 from server */ + } else { + *der = base64data; + *der_len = (size_t) size; + } + + xfree (stripped_pem); + + return *der_len > 0; +} + +/* + * Generic pinned public key check. + */ + +bool +wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeylen) +{ + struct file_memory *fm; + unsigned char *buf = NULL, *pem_ptr = NULL; + size_t size, pem_len; + bool pem_read; + bool result = false; + + size_t pinkeylen; + ssize_t decoded_hash_length; + char *pinkeycopy, *begin_pos, *end_pos; + unsigned char *sha256sumdigest = NULL, *expectedsha256sumdigest = NULL; + + /* if a path wasn't specified, don't pin */ + if (!pinnedpubkey) + return true; + if (!pubkey || !pubkeylen) + return result; + + /* only do this if pinnedpubkey starts with "sha256//", length 8 */ + if (strncmp (pinnedpubkey, "sha256//", 8) == 0) { + /* compute sha256sum of public key */ + sha256sumdigest = xmalloc (SHA256_DIGEST_SIZE); + sha256_buffer (pubkey, pubkeylen, sha256sumdigest); + expectedsha256sumdigest = xmalloc (SHA256_DIGEST_SIZE + 1); + + /* it starts with sha256//, copy so we can modify it */ + pinkeylen = strlen (pinnedpubkey) + 1; + pinkeycopy = xmalloc (pinkeylen); + memcpy (pinkeycopy, pinnedpubkey, pinkeylen); + + /* point begin_pos to the copy, and start extracting keys */ + begin_pos = pinkeycopy; + do + { + end_pos = strstr (begin_pos, ";sha256//"); + /* + * if there is an end_pos, null terminate, + * otherwise it'll go to the end of the original string + */ + if (end_pos) + end_pos[0] = '\0'; + + /* decode base64 pinnedpubkey, 8 is length of "sha256//" */ + decoded_hash_length = base64_decode (begin_pos + 8, expectedsha256sumdigest); + /* if valid base64, compare sha256 digests directly */ + if (SHA256_DIGEST_SIZE == decoded_hash_length && + !memcmp (sha256sumdigest, expectedsha256sumdigest, SHA256_DIGEST_SIZE)) { + result = true; + break; + } + + /* + * change back the null-terminator we changed earlier, + * and look for next begin + */ + if (end_pos) { + end_pos[0] = ';'; + begin_pos = strstr (end_pos, "sha256//"); + } + } while (end_pos && begin_pos); + + xfree (sha256sumdigest); + xfree (expectedsha256sumdigest); + xfree (pinkeycopy); + + return result; + } + + /* fall back to assuming this is a file path */ + fm = wget_read_file (pinnedpubkey); + if (!fm) + return result; + + /* Check the file's size */ + if (fm->length < 0 || fm->length > MAX_PINNED_PUBKEY_SIZE) + goto cleanup; + + /* + * if the size of our certificate is bigger than the file + * size then it can't match + */ + size = (size_t) fm->length; + if (pubkeylen > size) + goto cleanup; + + /* If the sizes are the same, it can't be base64 encoded, must be der */ + if (pubkeylen == size) { + if (!memcmp (pubkey, fm->content, pubkeylen)) + result = true; + goto cleanup; + } + + /* + * Otherwise we will assume it's PEM and try to decode it + * after placing null terminator + */ + buf = xmalloc (size + 1); + memcpy (buf, fm->content, size); + buf[size] = '\0'; + + pem_read = wg_pubkey_pem_to_der ((const char *) buf, &pem_ptr, &pem_len); + /* if it wasn't read successfully, exit */ + if (!pem_read) + goto cleanup; + + /* + * if the size of our certificate doesn't match the size of + * the decoded file, they can't be the same, otherwise compare + */ + if (pubkeylen == pem_len && !memcmp (pubkey, pem_ptr, pubkeylen)) + result = true; + + cleanup: + xfree (buf); + xfree (pem_ptr); + wget_read_file_free (fm); + + return result; +} + +#endif /* HAVE_SSL */ + #ifdef TESTING const char * diff --git a/src/utils.h b/src/utils.h index 76f4f8d..f224b73 100644 --- a/src/utils.h +++ b/src/utils.h @@ -37,6 +37,10 @@ as that of the covered work. */ /* Constant is using when we don`t know attempted size exactly */ #define UNKNOWN_ATTEMPTED_SIZE -3 +#ifndef MAX_PINNED_PUBKEY_SIZE +#define MAX_PINNED_PUBKEY_SIZE 1048576 /* 1MB */ +#endif + /* Macros that interface to malloc, but know about type sizes, and cast the result to the appropriate type. The casts are not necessary in standard C, but Wget performs them anyway for the sake @@ -161,4 +165,9 @@ void wg_hex_to_string (char *str_buffer, const char *hex_buffer, size_t hex_len) extern unsigned char char_prop[]; +#ifdef HAVE_SSL +/* Check pinned public key. */ +bool wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeylen); +#endif + #endif /* UTILS_H */ @@ -246,6 +246,22 @@ warc_write_header (const char *name, const char *value) return warc_write_ok; } +/* Writes a WARC header with a URI as value to the current WARC record. + This method may be run after warc_write_start_record and + before warc_write_block_from_file. */ +static bool +warc_write_header_uri (const char *name, const char *value) +{ + if (value) + { + warc_write_string (name); + warc_write_string (": <"); + warc_write_string (value); + warc_write_string (">\r\n"); + } + return warc_write_ok; +} + /* Copies the contents of DATA_IN to the WARC record. Adds a Content-Length header to the WARC record. Run this method after warc_write_header, @@ -1292,7 +1308,7 @@ warc_write_request_record (const char *url, const char *timestamp_str, { warc_write_start_record (); warc_write_header ("WARC-Type", "request"); - warc_write_header ("WARC-Target-URI", url); + warc_write_header_uri ("WARC-Target-URI", url); warc_write_header ("Content-Type", "application/http;msgtype=request"); warc_write_date_header (timestamp_str); warc_write_header ("WARC-Record-ID", record_uuid); |