summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.in16
-rw-r--r--src/config.h.in20
-rw-r--r--src/connect.c11
-rw-r--r--src/convert.c2
-rw-r--r--src/cookies.c4
-rw-r--r--src/ftp.c8
-rw-r--r--src/host.c11
-rw-r--r--src/hsts.c25
-rw-r--r--src/html-parse.c4
-rw-r--r--src/http-ntlm.c2
-rw-r--r--src/http.c224
-rw-r--r--src/init.c59
-rw-r--r--src/init.h2
-rw-r--r--src/iri.c38
-rw-r--r--src/iri.h2
-rw-r--r--src/log.c2
-rw-r--r--src/main.c43
-rw-r--r--src/metalink.c8
-rw-r--r--src/mswindows.c2
-rw-r--r--src/mswindows.h8
-rw-r--r--src/netrc.c28
-rw-r--r--src/options.h8
-rw-r--r--src/progress.c2
-rw-r--r--src/recur.c4
-rw-r--r--src/res.c6
-rw-r--r--src/retr.c166
-rw-r--r--src/retr.h4
-rw-r--r--src/url.c121
-rw-r--r--src/url.h4
-rw-r--r--src/utils.c309
-rw-r--r--src/utils.h12
-rw-r--r--src/warc.c9
32 files changed, 878 insertions, 286 deletions
diff --git a/src/Makefile.in b/src/Makefile.in
index e72738c..95bbc60 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -145,13 +145,14 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/00gnulib.m4 \
$(top_srcdir)/m4/ftell.m4 $(top_srcdir)/m4/ftello.m4 \
$(top_srcdir)/m4/futimens.m4 $(top_srcdir)/m4/getaddrinfo.m4 \
$(top_srcdir)/m4/getdelim.m4 $(top_srcdir)/m4/getdtablesize.m4 \
- $(top_srcdir)/m4/getline.m4 $(top_srcdir)/m4/getopt.m4 \
- $(top_srcdir)/m4/getpass.m4 $(top_srcdir)/m4/getprogname.m4 \
- $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/gettime.m4 \
- $(top_srcdir)/m4/gettimeofday.m4 \
+ $(top_srcdir)/m4/getgroups.m4 $(top_srcdir)/m4/getline.m4 \
+ $(top_srcdir)/m4/getopt.m4 $(top_srcdir)/m4/getpass.m4 \
+ $(top_srcdir)/m4/getprogname.m4 $(top_srcdir)/m4/gettext.m4 \
+ $(top_srcdir)/m4/gettime.m4 $(top_srcdir)/m4/gettimeofday.m4 \
$(top_srcdir)/m4/gl-openssl.m4 $(top_srcdir)/m4/glibc21.m4 \
$(top_srcdir)/m4/gnulib-common.m4 \
$(top_srcdir)/m4/gnulib-comp.m4 \
+ $(top_srcdir)/m4/group-member.m4 \
$(top_srcdir)/m4/hard-locale.m4 $(top_srcdir)/m4/hostent.m4 \
$(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/iconv_h.m4 \
$(top_srcdir)/m4/include_next.m4 $(top_srcdir)/m4/inet_ntop.m4 \
@@ -750,6 +751,9 @@ GNULIB_WRITE = @GNULIB_WRITE@
GNULIB__EXIT = @GNULIB__EXIT@
GNUTLS_CFLAGS = @GNUTLS_CFLAGS@
GNUTLS_LIBS = @GNUTLS_LIBS@
+GPGME_CFLAGS = @GPGME_CFLAGS@
+GPGME_CONFIG = @GPGME_CONFIG@
+GPGME_LIBS = @GPGME_LIBS@
GREP = @GREP@
HAVE_ACCEPT4 = @HAVE_ACCEPT4@
HAVE_ARPA_INET_H = @HAVE_ARPA_INET_H@
diff --git a/src/config.h.in b/src/config.h.in
index afb806f..5131d55 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -93,6 +93,14 @@
/* Define to 1 if futimesat mishandles a NULL file name. */
#undef FUTIMESAT_NULL_BUG
+/* Define to the type of elements in the array set by `getgroups'. Usually
+ this is either `int' or `gid_t'. */
+#undef GETGROUPS_T
+
+/* Define this to 1 if getgroups(0,NULL) does not return the number of groups.
+ */
+#undef GETGROUPS_ZERO_BUG
+
/* Define if gettimeofday clobbers the localtime buffer. */
#undef GETTIMEOFDAY_CLOBBERS_LOCALTIME
@@ -201,6 +209,9 @@
/* Define to 1 when the gnulib module getdtablesize should be tested. */
#undef GNULIB_TEST_GETDTABLESIZE
+/* Define to 1 when the gnulib module getgroups should be tested. */
+#undef GNULIB_TEST_GETGROUPS
+
/* Define to 1 when the gnulib module getline should be tested. */
#undef GNULIB_TEST_GETLINE
@@ -213,6 +224,9 @@
/* Define to 1 when the gnulib module gettimeofday should be tested. */
#undef GNULIB_TEST_GETTIMEOFDAY
+/* Define to 1 when the gnulib module group-member should be tested. */
+#undef GNULIB_TEST_GROUP_MEMBER
+
/* Define to 1 when the gnulib module ioctl should be tested. */
#undef GNULIB_TEST_IOCTL
@@ -746,6 +760,9 @@
/* Define to 1 if you have the `getgid' function. */
#undef HAVE_GETGID
+/* Define to 1 if your system has a working `getgroups' function. */
+#undef HAVE_GETGROUPS
+
/* Define to 1 if you have the `gethostbyname' function. */
#undef HAVE_GETHOSTBYNAME
@@ -776,6 +793,9 @@
/* Define to 1 if you have the `gnutls_priority_set_direct' function. */
#undef HAVE_GNUTLS_PRIORITY_SET_DIRECT
+/* Define if GPGME is available. */
+#undef HAVE_GPGME
+
/* Define if you have the iconv() function and it works. */
#undef HAVE_ICONV
diff --git a/src/connect.c b/src/connect.c
index 7e18171..d665d6d 100644
--- a/src/connect.c
+++ b/src/connect.c
@@ -56,10 +56,6 @@ as that of the covered work. */
#include <string.h>
#include <sys/time.h>
-#ifdef ENABLE_IRI
-#include <idn2.h>
-#endif
-
#include "utils.h"
#include "host.h"
#include "connect.h"
@@ -280,11 +276,8 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
{
- int len = strlen (print) + strlen (name) + 4;
- str = xmalloc (len);
- snprintf (str, len, "%s (%s)", name, print);
- str[len-1] = '\0';
- idn2_free (name);
+ str = aprintf ("%s (%s)", name, print);
+ xfree (name);
}
logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
diff --git a/src/convert.c b/src/convert.c
index 509923e..78cbafb 100644
--- a/src/convert.c
+++ b/src/convert.c
@@ -1024,7 +1024,7 @@ convert_cleanup (void)
/* This table should really be merged with dl_file_url_map and
downloaded_html_files. This was originally a list, but I changed
- it to a hash table beause it was actually taking a lot of time to
+ it to a hash table because it was actually taking a lot of time to
find things in it. */
static struct hash_table *downloaded_files_hash;
diff --git a/src/cookies.c b/src/cookies.c
index a250770..e316ea1 100644
--- a/src/cookies.c
+++ b/src/cookies.c
@@ -485,7 +485,7 @@ parse_set_cookie (const char *set_cookie, bool silent)
/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
We don't want to call network functions like inet_addr() because
- all we need is a check, preferrably one that is small, fast, and
+ all we need is a check, preferably one that is small, fast, and
well-defined. */
static bool
@@ -516,7 +516,7 @@ numeric_address_p (const char *addr)
psl on our own, if libpsl is compiled without a public suffix list,
fall back to using the original "tail matching" heuristic. Also if
libpsl is unable to convert the domain to lowercase, which means that
- it doesnt have any runtime conversion support, we again fall back to
+ it doesn't have any runtime conversion support, we again fall back to
"tail matching" since libpsl states the results are unpredictable with
upper case strings.
*/
diff --git a/src/ftp.c b/src/ftp.c
index 2f2866c..a0b3b0b 100644
--- a/src/ftp.c
+++ b/src/ftp.c
@@ -1463,7 +1463,7 @@ Error in server response, closing control connection.\n"));
else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct
|| opt.output_document || count > 0)
{
- if (opt.unlink_requested && file_exists_p (con->target))
+ if (opt.unlink_requested && file_exists_p (con->target, NULL))
{
if (unlink (con->target) < 0)
{
@@ -1857,7 +1857,7 @@ ftp_loop_internal (struct url *u, struct url *original_url, struct fileinfo *f,
/* If we receive .listing file it is necessary to determine system type of the ftp
server even if opn.noclobber is given. Thus we must ignore opt.noclobber in
order to establish connection with the server and get system type. */
- if (opt.noclobber && !opt.output_document && file_exists_p (con->target)
+ if (opt.noclobber && !opt.output_document && file_exists_p (con->target, NULL)
&& !((con->cmd & DO_LIST) && !(con->cmd & DO_RETR)))
{
logprintf (LOG_VERBOSE,
@@ -2129,7 +2129,7 @@ Removing file due to --delete-after in ftp_loop_internal():\n"));
}
/* Return the directory listing in a reusable format. The directory
- is specifed in u->dir. */
+ is specified in u->dir. */
static uerr_t
ftp_get_listing (struct url *u, struct url *original_url, ccon *con,
struct fileinfo **f)
@@ -2413,7 +2413,7 @@ Already have correct symlink %s -> %s\n\n"),
&& !(f->type == FT_SYMLINK && !opt.retr_symlinks)
&& f->tstamp != -1
&& dlthis
- && file_exists_p (con->target))
+ && file_exists_p (con->target, NULL))
{
touch (actual_target, f->tstamp);
}
diff --git a/src/host.c b/src/host.c
index 9f551c7..ccc6349 100644
--- a/src/host.c
+++ b/src/host.c
@@ -57,10 +57,6 @@ as that of the covered work. */
#include <errno.h>
-#ifdef ENABLE_IRI
-#include <idn2.h>
-#endif
-
#include "utils.h"
#include "host.h"
#include "url.h"
@@ -846,11 +842,8 @@ lookup_host (const char *host, int flags)
if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL)
{
- int len = strlen (host) + strlen (name) + 4;
- str = xmalloc (len);
- snprintf (str, len, "%s (%s)", name, host);
- str[len-1] = '\0';
- idn2_free (name);
+ str = aprintf ("%s (%s)", name, host);
+ xfree (name);
}
logprintf (LOG_VERBOSE, _("Resolving %s... "),
diff --git a/src/hsts.c b/src/hsts.c
index 91cc527..257d4e5 100644
--- a/src/hsts.c
+++ b/src/hsts.c
@@ -32,9 +32,9 @@ as that of the covered work. */
#ifdef HAVE_HSTS
#include "hsts.h"
+#include "utils.h"
#include "host.h" /* for is_valid_ip_address() */
#include "init.h" /* for home_dir() */
-#include "utils.h"
#include "hash.h"
#include "c-ctype.h"
#ifdef TESTING
@@ -443,7 +443,6 @@ hsts_store_entry (hsts_store_t store,
enum hsts_kh_match match = NO_MATCH;
struct hsts_kh *kh = xnew(struct hsts_kh);
struct hsts_kh_info *entry = NULL;
- time_t t = 0;
if (hsts_is_host_eligible (scheme, host))
{
@@ -458,17 +457,18 @@ hsts_store_entry (hsts_store_t store,
}
else if (max_age > 0)
{
- if (entry->max_age != max_age ||
- entry->include_subdomains != include_subdomains)
+ /* RFC 6797 states that 'max_age' is a TTL relative to the
+ * reception of the STS header so we have to update the
+ * 'created' field too. The RFC also states that we have to
+ * update the entry each time we see HSTS header.
+ * See also Section 11.2. */
+ time_t t = time (NULL);
+
+ if (t != -1 && t != entry->created)
{
- /* RFC 6797 states that 'max_age' is a TTL relative to the reception of the STS header
- so we have to update the 'created' field too */
- t = time (NULL);
- if (t != -1)
- entry->created = t;
+ entry->created = t;
entry->max_age = max_age;
entry->include_subdomains = include_subdomains;
-
store->changed = true;
}
}
@@ -500,18 +500,19 @@ hsts_store_t
hsts_store_open (const char *filename)
{
hsts_store_t store = NULL;
+ file_stats_t fstats;
store = xnew0 (struct hsts_store);
store->table = hash_table_new (0, hsts_hash_func, hsts_cmp_func);
store->last_mtime = 0;
store->changed = false;
- if (file_exists_p (filename))
+ if (file_exists_p (filename, &fstats))
{
if (hsts_file_access_valid (filename))
{
struct stat st;
- FILE *fp = fopen (filename, "r");
+ FILE *fp = fopen_stat (filename, "r", &fstats);
if (!fp || !hsts_read_database (store, fp, false))
{
diff --git a/src/html-parse.c b/src/html-parse.c
index ae436d0..c14d73a 100644
--- a/src/html-parse.c
+++ b/src/html-parse.c
@@ -176,8 +176,8 @@ struct pool {
P->orig_size = P->size; \
} while (0)
-/* Grow the pool to accomodate at least SIZE new bytes. If the pool
- already has room to accomodate SIZE bytes of data, this is a no-op. */
+/* Grow the pool to accommodate at least SIZE new bytes. If the pool
+ already has room to accommodate SIZE bytes of data, this is a no-op. */
#define POOL_GROW(p, increase) \
GROW_ARRAY ((p)->contents, (p)->size, (p)->tail + (increase), \
diff --git a/src/http-ntlm.c b/src/http-ntlm.c
index 87f5a37..72f6fcd 100644
--- a/src/http-ntlm.c
+++ b/src/http-ntlm.c
@@ -122,7 +122,7 @@ ntlm_input (struct ntlmdata *ntlm, const char *header)
DEBUGP (("Received a type-2 NTLM message.\n"));
- size = wget_base64_decode (header, buffer);
+ size = wget_base64_decode (header, buffer, strlen (header));
if (size < 0)
return false; /* malformed base64 from server */
diff --git a/src/http.c b/src/http.c
index 898e184..dc31823 100644
--- a/src/http.c
+++ b/src/http.c
@@ -973,6 +973,9 @@ skip_short_body (int fd, wgint contlen, bool chunked)
remaining_chunk_size = strtol (line, &endl, 16);
xfree (line);
+ if (remaining_chunk_size < 0)
+ return false;
+
if (remaining_chunk_size == 0)
{
line = fd_read_line (fd);
@@ -1539,6 +1542,16 @@ persistent_available_p (const char *host, int port, bool ssl,
fd = -1; \
} while (0)
+typedef enum
+{
+ ENC_INVALID = -1, /* invalid encoding */
+ ENC_NONE = 0, /* no special encoding */
+ ENC_GZIP, /* gzip compression */
+ ENC_DEFLATE, /* deflate compression */
+ ENC_COMPRESS, /* compress compression */
+ ENC_BROTLI /* brotli compression */
+} encoding_t;
+
struct http_stat
{
wgint len; /* received length */
@@ -1569,6 +1582,10 @@ struct http_stat
#ifdef HAVE_METALINK
metalink_t *metalink;
#endif
+
+ encoding_t local_encoding; /* the encoding of the local file */
+ encoding_t remote_encoding; /* the encoding of the remote file */
+
bool temporary; /* downloading a temporary file */
};
@@ -1680,6 +1697,9 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
if (chunked_transfer_encoding)
flags |= rb_chunked_transfer_encoding;
+ if (hs->remote_encoding == ENC_GZIP)
+ flags |= rb_compressed_gzip;
+
hs->len = hs->restval;
hs->rd_size = 0;
/* Download the response body and write it to fp.
@@ -1873,7 +1893,12 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u
rel_value);
SET_USER_AGENT (req);
request_set_header (req, "Accept", "*/*", rel_none);
- request_set_header (req, "Accept-Encoding", "identity", rel_none);
+#ifdef HAVE_LIBZ
+ if (opt.compression != compression_none)
+ request_set_header (req, "Accept-Encoding", "gzip", rel_none);
+ else
+#endif
+ request_set_header (req, "Accept-Encoding", "identity", rel_none);
/* Find the username with priority */
if (u->user)
@@ -1900,7 +1925,7 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u
*passwd = NULL;
/* Check for ~/.netrc if none of the above match */
- if (opt.netrc && (!user || (!passwd || !*passwd)))
+ if (opt.netrc && (!*user || !*passwd))
search_netrc (u->host, (const char **) user, (const char **) passwd, 0);
/* We only do "site-wide" authentication with "global" user/password
@@ -2290,7 +2315,7 @@ check_file_output (const struct url *u, struct http_stat *hs,
}
/* TODO: perform this check only once. */
- if (!hs->existence_checked && file_exists_p (hs->local_file))
+ if (!hs->existence_checked && file_exists_p (hs->local_file, NULL))
{
if (opt.noclobber && !opt.output_document)
{
@@ -2486,7 +2511,7 @@ open_output_stream (struct http_stat *hs, int count, FILE **fp)
}
else if (ALLOW_CLOBBER || count > 0)
{
- if (opt.unlink_requested && file_exists_p (hs->local_file))
+ if (opt.unlink_requested && file_exists_p (hs->local_file, NULL))
{
if (unlink (hs->local_file) < 0)
{
@@ -2552,14 +2577,14 @@ set_content_type (int *dt, const char *type)
of the multitude of broken CGI's that "forget" to generate the
content-type. */
if (!type ||
- 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
- 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+ 0 == c_strcasecmp (type, TEXTHTML_S) ||
+ 0 == c_strcasecmp (type, TEXTXHTML_S))
*dt |= TEXTHTML;
else
*dt &= ~TEXTHTML;
if (type &&
- 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ 0 == c_strcasecmp (type, TEXTCSS_S))
*dt |= TEXTCSS;
else
*dt &= ~TEXTCSS;
@@ -2998,7 +3023,7 @@ skip_content_type:
char *bin_hash = alloca (dig_hash_str_len * 3 / 4 + 1);
ssize_t hash_bin_len;
- hash_bin_len = wget_base64_decode (dig_hash, bin_hash);
+ hash_bin_len = wget_base64_decode (dig_hash, bin_hash, dig_hash_str_len * 3 / 4 + 1);
/* Detect malformed base64 input. */
if (hash_bin_len < 0)
@@ -3189,6 +3214,8 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
xfree (hs->remote_time);
hs->error = NULL;
hs->message = NULL;
+ hs->local_encoding = ENC_NONE;
+ hs->remote_encoding = ENC_NONE;
conn = u;
@@ -3476,7 +3503,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
#ifdef HAVE_METALINK
/* We need to check for the Metalink data in the very first response
- we get from the server (before redirectionrs, authorization, etc.). */
+ we get from the server (before redirections, authorization, etc.). */
if (metalink)
{
hs->metalink = metalink_from_http (resp, hs, u);
@@ -3496,7 +3523,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
uerr_t auth_err = RETROK;
bool retry;
/* Normally we are not interested in the response body.
- But if we are writing a WARC file we are: we like to keep everyting. */
+ But if we are writing a WARC file we are: we like to keep everything. */
if (warc_enabled)
{
int _err;
@@ -3556,20 +3583,6 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
pconn.authorized = true;
}
- if (statcode == HTTP_STATUS_GATEWAY_TIMEOUT)
- {
- hs->len = 0;
- hs->res = 0;
- hs->restval = 0;
-
- CLOSE_FINISH (sock);
- xfree (hs->message);
-
- retval = GATEWAYTIMEOUT;
- goto cleanup;
- }
-
-
{
uerr_t ret = check_file_output (u, hs, resp, hdrval, sizeof hdrval);
if (ret != RETROK)
@@ -3639,6 +3652,8 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
}
hs->newloc = resp_header_strdup (resp, "Location");
hs->remote_time = resp_header_strdup (resp, "Last-Modified");
+ if (!hs->remote_time) // now look for the Wayback Machine's timestamp
+ hs->remote_time = resp_header_strdup (resp, "X-Archive-Orig-last-modified");
if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
{
@@ -3651,6 +3666,73 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
}
}
+ if (resp_header_copy (resp, "Content-Encoding", hdrval, sizeof (hdrval)))
+ {
+ hs->local_encoding = ENC_INVALID;
+
+ switch (hdrval[0])
+ {
+ case 'b': case 'B':
+ if (0 == c_strcasecmp(hdrval, "br"))
+ hs->local_encoding = ENC_BROTLI;
+ break;
+ case 'c': case 'C':
+ if (0 == c_strcasecmp(hdrval, "compress"))
+ hs->local_encoding = ENC_COMPRESS;
+ break;
+ case 'd': case 'D':
+ if (0 == c_strcasecmp(hdrval, "deflate"))
+ hs->local_encoding = ENC_DEFLATE;
+ break;
+ case 'g': case 'G':
+ if (0 == c_strcasecmp(hdrval, "gzip"))
+ hs->local_encoding = ENC_GZIP;
+ break;
+ case 'i': case 'I':
+ if (0 == c_strcasecmp(hdrval, "identity"))
+ hs->local_encoding = ENC_NONE;
+ break;
+ case 'x': case 'X':
+ if (0 == c_strcasecmp(hdrval, "x-compress"))
+ hs->local_encoding = ENC_COMPRESS;
+ else if (0 == c_strcasecmp(hdrval, "x-gzip"))
+ hs->local_encoding = ENC_GZIP;
+ break;
+ case '\0':
+ hs->local_encoding = ENC_NONE;
+ }
+
+ if (hs->local_encoding == ENC_INVALID)
+ {
+ DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
+ hs->local_encoding = ENC_NONE;
+ }
+#ifdef HAVE_LIBZ
+ else if (hs->local_encoding == ENC_GZIP
+ && opt.compression != compression_none)
+ {
+ /* Make sure the Content-Type is not gzip before decompressing */
+ const char * p = strchr (type, '/');
+ if (p == NULL)
+ {
+ hs->remote_encoding = ENC_GZIP;
+ hs->local_encoding = ENC_NONE;
+ }
+ else
+ {
+ p++;
+ if (c_tolower(p[0]) == 'x' && p[1] == '-')
+ p += 2;
+ if (0 != c_strcasecmp (p, "gzip"))
+ {
+ hs->remote_encoding = ENC_GZIP;
+ hs->local_encoding = ENC_NONE;
+ }
+ }
+ }
+#endif
+ }
+
/* 20x responses are counted among successful by default. */
if (H_20X (statcode))
*dt |= RETROKF;
@@ -3763,8 +3845,51 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
set_content_type (dt, type);
+ if (cond_get)
+ {
+ if (statcode == HTTP_STATUS_NOT_MODIFIED)
+ {
+ logprintf (LOG_VERBOSE,
+ _ ("File %s not modified on server. Omitting download.\n\n"),
+ quote (hs->local_file));
+ *dt |= RETROKF;
+ CLOSE_FINISH (sock);
+ retval = RETRUNNEEDED;
+ goto cleanup;
+ }
+ }
+
if (opt.adjust_extension)
{
+ const char *encoding_ext = NULL;
+ switch (hs->local_encoding)
+ {
+ case ENC_INVALID:
+ case ENC_NONE:
+ break;
+ case ENC_BROTLI:
+ encoding_ext = ".br";
+ break;
+ case ENC_COMPRESS:
+ encoding_ext = ".Z";
+ break;
+ case ENC_DEFLATE:
+ encoding_ext = ".zlib";
+ break;
+ case ENC_GZIP:
+ encoding_ext = ".gz";
+ break;
+ default:
+ DEBUGP (("No extension found for encoding %d\n",
+ hs->local_encoding));
+ }
+ if (encoding_ext != NULL)
+ {
+ char *file_ext = strrchr (hs->local_file, '.');
+ /* strip Content-Encoding extension (it will be re-added later) */
+ if (file_ext != NULL && 0 == strcasecmp (file_ext, encoding_ext))
+ *file_ext = '\0';
+ }
if (*dt & TEXTHTML)
/* -E / --adjust-extension / adjust_extension = on was specified,
and this is a text/html file. If some case-insensitive
@@ -3777,22 +3902,16 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
{
ensure_extension (hs, ".css", dt);
}
+ if (encoding_ext != NULL)
+ {
+ ensure_extension (hs, encoding_ext, dt);
+ }
}
if (cond_get)
{
- if (statcode == HTTP_STATUS_NOT_MODIFIED)
- {
- logprintf (LOG_VERBOSE,
- _("File %s not modified on server. Omitting download.\n\n"),
- quote (hs->local_file));
- *dt |= RETROKF;
- CLOSE_FINISH (sock);
- retval = RETRUNNEEDED;
- goto cleanup;
- }
/* Handle the case when server ignores If-Modified-Since header. */
- else if (statcode == HTTP_STATUS_OK && hs->remote_time)
+ if (statcode == HTTP_STATUS_OK && hs->remote_time)
{
time_t tmr = http_atotm (hs->remote_time);
@@ -3815,6 +3934,16 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
}
if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
+ && hs->restval < (contlen + contrange))
+ {
+ /* The file was not completely downloaded,
+ yet the server claims the range is invalid.
+ Bail out. */
+ CLOSE_INVALIDATE (sock);
+ retval = RANGEERR;
+ goto cleanup;
+ }
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
|| (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
&& contrange == 0 && contlen >= 0 && hs->restval >= contlen))
{
@@ -3848,6 +3977,9 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
}
if (contlen == -1)
hs->contlen = -1;
+ /* If the response is gzipped, the uncompressed size is unknown. */
+ else if (hs->remote_encoding == ENC_GZIP)
+ hs->contlen = -1;
else
hs->contlen = contlen + contrange;
@@ -3910,8 +4042,8 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
retval = _err;
goto cleanup;
}
- else
- CLOSE_FINISH (sock);
+
+ CLOSE_FINISH (sock);
}
else
{
@@ -3934,7 +4066,11 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
CLOSE_INVALIDATE (sock);
}
- retval = RETRFINISHED;
+ if (statcode == HTTP_STATUS_GATEWAY_TIMEOUT)
+ retval = GATEWAYTIMEOUT;
+ else
+ retval = RETRFINISHED;
+
goto cleanup;
}
@@ -4060,7 +4196,7 @@ http_loop (const struct url *u, struct url *original_url, char **newloc,
got_name = true;
}
- if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
+ if (got_name && file_exists_p (hstat.local_file, NULL) && opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file. But if the output_document was given, then this
@@ -4097,7 +4233,7 @@ http_loop (const struct url *u, struct url *original_url, char **newloc,
{
/* Use conditional get request if requested
* and if timestamp is known at this moment. */
- if (opt.if_modified_since && !send_head_first && got_name && file_exists_p (hstat.local_file))
+ if (opt.if_modified_since && !send_head_first && got_name && file_exists_p (hstat.local_file, NULL))
{
*dt |= IF_MODIFIED_SINCE;
{
@@ -4108,7 +4244,7 @@ http_loop (const struct url *u, struct url *original_url, char **newloc,
}
/* Send preliminary HEAD request if -N is given and we have existing
* destination file or content disposition is enabled. */
- else if (opt.content_disposition || file_exists_p (hstat.local_file))
+ else if (opt.content_disposition || file_exists_p (hstat.local_file, NULL))
send_head_first = true;
}
@@ -4208,6 +4344,8 @@ http_loop (const struct url *u, struct url *original_url, char **newloc,
bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */
printwhat (count, opt.ntry);
+ xfree (hstat.message);
+ xfree (hstat.error);
continue;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
@@ -5111,13 +5249,13 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt)
strcpy (hs->local_file + local_filename_len, ext);
/* If clobbering is not allowed and the file, as named,
exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file, NULL))
{
int ext_num = 1;
do
sprintf (hs->local_file + local_filename_len,
".%d%s", ext_num++, ext);
- while (file_exists_p (hs->local_file));
+ while (file_exists_p (hs->local_file, NULL));
}
*dt |= ADDED_HTML_EXTENSION;
}
diff --git a/src/init.c b/src/init.c
index e6aa673..1064883 100644
--- a/src/init.c
+++ b/src/init.c
@@ -99,6 +99,9 @@ CMD_DECLARE (cmd_vector);
CMD_DECLARE (cmd_use_askpass);
+#ifdef HAVE_LIBZ
+CMD_DECLARE (cmd_spec_compression);
+#endif
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_warc_header);
@@ -161,6 +164,9 @@ static const struct {
{ "checkcertificate", &opt.check_cert, cmd_check_cert },
#endif
{ "chooseconfig", &opt.choose_config, cmd_file },
+#ifdef HAVE_LIBZ
+ { "compression", &opt.compression, cmd_spec_compression },
+#endif
{ "connecttimeout", &opt.connect_timeout, cmd_time },
{ "contentdisposition", &opt.content_disposition, cmd_boolean },
{ "contentonerror", &opt.content_on_error, cmd_boolean },
@@ -445,6 +451,10 @@ defaults (void)
opt.ftps_clear_data_connection = false;
#endif
+#ifdef HAVE_LIBZ
+ opt.compression = compression_auto;
+#endif
+
/* The default for file name restriction defaults to the OS type. */
#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
opt.restrict_files_os = restrict_windows;
@@ -566,10 +576,11 @@ wgetrc_env_file_name (void)
char *env = getenv ("WGETRC");
if (env && *env)
{
- if (!file_exists_p (env))
+ file_stats_t flstat;
+ if (!file_exists_p (env, &flstat))
{
- fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
- exec_name, env);
+ fprintf (stderr, _("%s: WGETRC points to %s, which couldn't be accessed because of error: %s.\n"),
+ exec_name, env, strerror(flstat.access_err));
exit (WGET_EXIT_GENERIC_ERROR);
}
return xstrdup (env);
@@ -577,7 +588,7 @@ wgetrc_env_file_name (void)
return NULL;
}
-/* Check for the existance of '$HOME/.wgetrc' and return its path
+/* Check for the existence of '$HOME/.wgetrc' and return its path
if it exists and is set. */
char *
wgetrc_user_file_name (void)
@@ -597,7 +608,7 @@ wgetrc_user_file_name (void)
if (!file)
return NULL;
- if (!file_exists_p (file))
+ if (!file_exists_p (file, NULL))
{
xfree (file);
return NULL;
@@ -630,7 +641,7 @@ wgetrc_file_name (void)
if (home)
{
file = aprintf ("%s/wget.ini", home);
- if (!file_exists_p (file))
+ if (!file_exists_p (file, NULL))
{
xfree (file);
}
@@ -658,7 +669,7 @@ static bool setval_internal_tilde (int, const char *, const char *);
there were errors in the file. */
bool
-run_wgetrc (const char *file)
+run_wgetrc (const char *file, file_stats_t *flstats)
{
FILE *fp;
char *line = NULL;
@@ -666,7 +677,7 @@ run_wgetrc (const char *file)
int ln;
int errcnt = 0;
- fp = fopen (file, "r");
+ fp = fopen_stat (file, "r", flstats);
if (!fp)
{
fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
@@ -722,14 +733,16 @@ void
initialize (void)
{
char *file, *env_sysrc;
+ file_stats_t flstats;
bool ok = true;
+ memset(&flstats, 0, sizeof(flstats));
/* Run a non-standard system rc file when the according environment
variable has been set. For internal testing purposes only! */
env_sysrc = getenv ("SYSTEM_WGETRC");
- if (env_sysrc && file_exists_p (env_sysrc))
+ if (env_sysrc && file_exists_p (env_sysrc, &flstats))
{
- ok &= run_wgetrc (env_sysrc);
+ ok &= run_wgetrc (env_sysrc, &flstats);
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
@@ -743,8 +756,8 @@ or specify a different file using --config.\n"), env_sysrc);
}
/* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
- else if (file_exists_p (SYSTEM_WGETRC))
- ok &= run_wgetrc (SYSTEM_WGETRC);
+ else if (file_exists_p (SYSTEM_WGETRC, &flstats))
+ ok &= run_wgetrc (SYSTEM_WGETRC, &flstats);
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
@@ -771,7 +784,8 @@ or specify a different file using --config.\n"), SYSTEM_WGETRC);
}
else
#endif
- ok &= run_wgetrc (file);
+ if (file_exists_p (file, &flstats))
+ ok &= run_wgetrc (file, &flstats);
/* If there were errors processing either `.wgetrc', abort. */
if (!ok)
@@ -1441,6 +1455,25 @@ cmd_cert_type (const char *com, const char *val, void *place)
static bool check_user_specified_header (const char *);
+#ifdef HAVE_LIBZ
+static bool
+cmd_spec_compression (const char *com, const char *val, void *place)
+{
+ static const struct decode_item choices[] = {
+ { "auto", compression_auto },
+ { "gzip", compression_gzip },
+ { "none", compression_none },
+ };
+ int ok = decode_string (val, choices, countof (choices), place);
+ if (!ok)
+ {
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com,
+ quote (val));
+ }
+ return ok;
+}
+#endif
+
static bool
cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
diff --git a/src/init.h b/src/init.h
index 1969cc7..a57ca78 100644
--- a/src/init.h
+++ b/src/init.h
@@ -41,6 +41,6 @@ void setoptval (const char *, const char *, const char *);
char *home_dir (void);
void cleanup (void);
void defaults (void);
-bool run_wgetrc (const char *file);
+bool run_wgetrc (const char *file, file_stats_t *);
#endif /* INIT_H */
diff --git a/src/iri.c b/src/iri.c
index 8be109e..af0dfb6 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -39,8 +39,10 @@ as that of the covered work. */
# include <iconv.h>
#endif
#include <idn2.h>
-#include <unicase.h>
-#include <unistr.h>
+#if IDN2_VERSION_NUMBER < 0x00140000
+# include <unicase.h>
+# include <unistr.h>
+#endif
#include "utils.h"
#include "url.h"
@@ -119,6 +121,7 @@ check_encoding_name (const char *encoding)
return true;
}
+#ifdef HAVE_ICONV
/* Do the conversion according to the passed conversion descriptor cd. *out
will contain the transcoded string on success. *out content is
unspecified otherwise. */
@@ -210,6 +213,15 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz
}
return false;
}
+#else
+static bool
+do_conversion (const char *tocode _GL_UNUSED, const char *fromcode _GL_UNUSED,
+ char const *in_org _GL_UNUSED, size_t inlen _GL_UNUSED, char **out)
+{
+ *out = NULL;
+ return false;
+}
+#endif
/* Try converting string str from locale to UTF-8. Return a new string
on success, or str on error or if conversion isn't needed. */
@@ -253,7 +265,7 @@ idn_encode (const struct iri *i, const char *host)
if (!i->utf8_encode)
{
if (!remote_to_utf8 (i, host, &utf8_encoded))
- return NULL; /* Nothing to encode or an error occured */
+ return NULL; /* Nothing to encode or an error occurred */
src = utf8_encoded;
}
else
@@ -261,11 +273,14 @@ idn_encode (const struct iri *i, const char *host)
#if IDN2_VERSION_NUMBER >= 0x00140000
/* IDN2_TRANSITIONAL implies input NFC encoding */
- if ((ret = idn2_lookup_u8 ((uint8_t *) src, (uint8_t **) &ascii_encoded, IDN2_NONTRANSITIONAL)) != IDN2_OK)
- {
- logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret,
- quote (idn2_strerror (ret)));
- }
+ ret = idn2_lookup_u8 ((uint8_t *) src, (uint8_t **) &ascii_encoded, IDN2_NONTRANSITIONAL);
+ if (ret != IDN2_OK)
+ /* fall back to TR46 Transitional mode, max IDNA2003 compatibility */
+ ret = idn2_lookup_u8 ((uint8_t *) src, (uint8_t **) &ascii_encoded, IDN2_TRANSITIONAL);
+
+ if (ret != IDN2_OK)
+ logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret,
+ quote (idn2_strerror (ret)));
#else
/* we need a conversion to lowercase */
lower = u8_tolower ((uint8_t *) src, u8_strlen ((uint8_t *) src) + 1, 0, UNINORM_NFKC, NULL, &len);
@@ -288,6 +303,13 @@ idn_encode (const struct iri *i, const char *host)
xfree (utf8_encoded);
+ if (ret == IDN2_OK && ascii_encoded)
+ {
+ char *tmp = xstrdup (ascii_encoded);
+ idn2_free (ascii_encoded);
+ ascii_encoded = tmp;
+ }
+
return ret == IDN2_OK ? ascii_encoded : NULL;
}
diff --git a/src/iri.h b/src/iri.h
index ba64a27..fb994ee 100644
--- a/src/iri.h
+++ b/src/iri.h
@@ -40,8 +40,6 @@ struct iri {
#ifdef ENABLE_IRI
-# include <idn2.h>
-
char *parse_charset (const char *str);
const char *find_locale (void);
bool check_encoding_name (const char *encoding);
diff --git a/src/log.c b/src/log.c
index 51f30c4..6f9b0c7 100644
--- a/src/log.c
+++ b/src/log.c
@@ -532,7 +532,7 @@ log_set_flush (bool flush)
}
else
{
- /* Reenable flushing. If anything was printed in no-flush mode,
+ /* Re-enable flushing. If anything was printed in no-flush mode,
flush the log now. */
if (needs_flushing)
logflush ();
diff --git a/src/main.c b/src/main.c
index 581a33d..f9759c3 100644
--- a/src/main.c
+++ b/src/main.c
@@ -275,6 +275,9 @@ static struct cmdline_option option_data[] =
{ IF_SSL ("certificate-type"), 0, OPT_VALUE, "certificatetype", -1 },
{ IF_SSL ("check-certificate"), 0, OPT_BOOLEAN, "checkcertificate", -1 },
{ "clobber", 0, OPT__CLOBBER, NULL, optional_argument },
+#ifdef HAVE_LIBZ
+ { "compression", 0, OPT_VALUE, "compression", -1 },
+#endif
{ "config", 0, OPT_VALUE, "chooseconfig", -1 },
{ "connect-timeout", 0, OPT_VALUE, "connecttimeout", -1 },
{ "continue", 'c', OPT_BOOLEAN, "continue", -1 },
@@ -359,6 +362,7 @@ static struct cmdline_option option_data[] =
#endif
{ "method", 0, OPT_VALUE, "method", -1 },
{ "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
+ { "netrc", 0, OPT_BOOLEAN, "netrc", -1 },
{ "no", 'n', OPT__NO, NULL, required_argument },
{ "no-clobber", 0, OPT_BOOLEAN, "noclobber", -1 },
{ "no-config", 0, OPT_BOOLEAN, "noconfig", -1},
@@ -424,7 +428,6 @@ static struct cmdline_option option_data[] =
{ "user", 0, OPT_VALUE, "user", -1 },
{ "user-agent", 'U', OPT_VALUE, "useragent", -1 },
{ "verbose", 'v', OPT_BOOLEAN, "verbose", -1 },
- { "verbose", 0, OPT_BOOLEAN, "verbose", -1 },
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
{ "wait", 'w', OPT_VALUE, "wait", -1 },
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
@@ -630,6 +633,8 @@ Download:\n"),
-nc, --no-clobber skip downloads that would download to\n\
existing files (overwriting them)\n"),
N_("\
+ --no-netrc don't try to obtain credentials from .netrc\n"),
+ N_("\
-c, --continue resume getting a partially-downloaded file\n"),
N_("\
--start-pos=OFFSET start downloading from zero-based position OFFSET\n"),
@@ -761,6 +766,10 @@ HTTP options:\n"),
--ignore-length ignore 'Content-Length' header field\n"),
N_("\
--header=STRING insert STRING among the headers\n"),
+#ifdef HAVE_LIBZ
+ N_("\
+ --compression=TYPE choose compression, one of auto, gzip and none\n"),
+#endif
N_("\
--max-redirect maximum redirections allowed per page\n"),
N_("\
@@ -809,7 +818,7 @@ HTTP options:\n"),
HTTPS (SSL/TLS) options:\n"),
N_("\
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
- SSLv3, TLSv1 and PFS\n"),
+ SSLv3, TLSv1, TLSv1_1, TLSv1_2 and PFS\n"),
N_("\
--https-only only follow secure HTTPS links\n"),
N_("\
@@ -1382,10 +1391,10 @@ main (int argc, char **argv)
}
else if (strcmp (config_opt->long_name, "config") == 0)
{
- bool userrc_ret = true;
- userrc_ret &= run_wgetrc (optarg);
+ file_stats_t flstats;
use_userconfig = true;
- if (userrc_ret)
+ memset(&flstats, 0, sizeof(flstats));
+ if (file_exists_p(optarg, &flstats) && run_wgetrc (optarg, &flstats))
break;
else
{
@@ -1621,7 +1630,7 @@ WARNING: timestamping does nothing in combination with -O. See the manual\n\
for details.\n\n"));
opt.timestamping = false;
}
- if (opt.noclobber && file_exists_p(opt.output_document))
+ if (opt.noclobber && file_exists_p(opt.output_document, NULL))
{
/* Check if output file exists; if it does, exit. */
logprintf (LOG_VERBOSE,
@@ -1673,6 +1682,26 @@ for details.\n\n"));
}
}
+#ifdef HAVE_LIBZ
+ if (opt.always_rest || opt.start_pos >= 0)
+ {
+ if (opt.compression == compression_auto)
+ {
+ /* Compression does not work with --continue or --start-pos.
+ Since compression was not explicitly set, it will be disabled. */
+ opt.compression = compression_none;
+ }
+ else if (opt.compression != compression_none)
+ {
+ fprintf (stderr,
+ _("Compression does not work with --continue or"
+ " --start-pos, they will be disabled.\n"));
+ opt.always_rest = false;
+ opt.start_pos = -1;
+ }
+ }
+#endif
+
if (opt.ask_passwd && opt.passwd)
{
fprintf (stderr,
@@ -2082,7 +2111,7 @@ only if outputting to a regular file.\n"));
&dt, opt.recursive, iri, true);
}
- if (opt.delete_after && filename != NULL && file_exists_p (filename))
+ if (opt.delete_after && filename != NULL && file_exists_p (filename, NULL))
{
DEBUGP (("Removing file due to --delete-after in main():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
diff --git a/src/metalink.c b/src/metalink.c
index 904f9f3..f11b7c1 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -375,6 +375,7 @@ retrieve_from_metalink (const metalink_t* metalink)
metalink_checksum_t **mchksum_ptr, *mchksum;
struct iri *iri;
struct url *url;
+ file_stats_t flstats;
int url_err;
clean_metalink_string (&mres->url);
@@ -490,8 +491,9 @@ retrieve_from_metalink (const metalink_t* metalink)
Bugfix: point output_stream to destname if it exists.
*/
- if (!output_stream && file_exists_p (destname))
- output_stream = fopen (destname, "ab");
+ memset(&flstats, 0, sizeof(flstats));
+ if (!output_stream && file_exists_p (destname, &flstats))
+ output_stream = fopen_stat (destname, "ab", &flstats);
}
url_free (url);
iri_free (iri);
@@ -901,7 +903,7 @@ gpg_skip_verification:
Note: the file has been downloaded using *_loop. Therefore, it
is not necessary to keep the file for continuated download. */
if (((retr_err != RETROK && !opt.always_rest) || opt.delete_after)
- && destname != NULL && file_exists_p (destname))
+ && destname != NULL && file_exists_p (destname, NULL))
{
badhash_or_remove (destname);
}
diff --git a/src/mswindows.c b/src/mswindows.c
index 90e6ec4..f7e03bd 100644
--- a/src/mswindows.c
+++ b/src/mswindows.c
@@ -580,7 +580,7 @@ run_with_timeout (double seconds, void (*fun) (void *), void *arg)
const char *
inet_ntop (int af, const void *src, char *dst, socklen_t cnt)
{
- /* struct sockaddr can't accomodate struct sockaddr_in6. */
+ /* struct sockaddr can't accommodate struct sockaddr_in6. */
union {
struct sockaddr_in6 sin6;
struct sockaddr_in sin;
diff --git a/src/mswindows.h b/src/mswindows.h
index 09d7f8c..1bc2e21 100644
--- a/src/mswindows.h
+++ b/src/mswindows.h
@@ -57,6 +57,9 @@ as that of the covered work. */
/* Declares getpid(). */
#include <process.h>
+/* Declares inet_ntop() and inet_pton(). */
+#include <arpa/inet.h>
+
/* We have strcasecmp and strncasecmp, just under different names. */
#ifndef HAVE_STRCASECMP
# define strcasecmp stricmp
@@ -85,11 +88,6 @@ typedef __int64 wgint;
#define PATH_SEPARATOR '\\'
-/* Additional declarations needed for IPv6: */
-#ifdef ENABLE_IPV6
-const char *inet_ntop (int, const void *, char *, socklen_t);
-#endif
-
/* ioctl needed by set_windows_fd_as_blocking_socket() */
#include <sys/ioctl.h>
diff --git a/src/netrc.c b/src/netrc.c
index 47fe9b0..6375b66 100644
--- a/src/netrc.c
+++ b/src/netrc.c
@@ -235,7 +235,7 @@ parse_netrc (const char *path)
/* The latest token we've seen in the file. */
enum
{
- tok_nothing, tok_account, tok_login, tok_macdef, tok_machine, tok_password
+ tok_nothing, tok_account, tok_login, tok_macdef, tok_machine, tok_password, tok_port, tok_force
} last_token = tok_nothing;
current = retval = NULL;
@@ -344,6 +344,18 @@ parse_netrc (const char *path)
premature_token = "account";
break;
+ /* We don't handle the port keyword at all. */
+ case tok_port:
+ if (!current)
+ premature_token = "port";
+ break;
+
+ /* We don't handle the force keyword at all. */
+ case tok_force:
+ if (!current)
+ premature_token = "force";
+ break;
+
/* We handle tok_nothing below this switch. */
case tok_nothing:
break;
@@ -365,10 +377,10 @@ parse_netrc (const char *path)
/* Fetch the next token. */
if (!strcmp (tok, "account"))
last_token = tok_account;
+
else if (!strcmp (tok, "default"))
- {
maybe_add_to_list (&current, &retval);
- }
+
else if (!strcmp (tok, "login"))
last_token = tok_login;
@@ -381,6 +393,16 @@ parse_netrc (const char *path)
else if (!strcmp (tok, "password"))
last_token = tok_password;
+ /* GNU extensions 'port' and 'force', not operational
+ * see https://www.gnu.org/software/emacs/manual/html_node/gnus/NNTP.html#index-nntp_002dauthinfo_002dfunction-2003
+ * see https://savannah.gnu.org/bugs/index.php?52066
+ */
+ else if (!strcmp (tok, "port"))
+ last_token = tok_port;
+
+ else if (!strcmp (tok, "force"))
+ last_token = tok_force;
+
else
fprintf (stderr, _("%s: %s:%d: unknown token \"%s\"\n"),
exec_name, path, ln, tok);
diff --git a/src/options.h b/src/options.h
index 3972945..cf945c1 100644
--- a/src/options.h
+++ b/src/options.h
@@ -326,6 +326,14 @@ struct options
name. */
bool report_bps; /*Output bandwidth in bits format*/
+#ifdef HAVE_LIBZ
+ enum compression_options {
+ compression_auto,
+ compression_gzip,
+ compression_none
+ } compression; /* type of HTTP compression to use */
+#endif
+
char *rejected_log; /* The file to log rejected URLS to. */
#ifdef HAVE_HSTS
diff --git a/src/progress.c b/src/progress.c
index f44e0ee..36037df 100644
--- a/src/progress.c
+++ b/src/progress.c
@@ -75,7 +75,7 @@ static struct progress_implementation implementations[] = {
static struct progress_implementation *current_impl;
static int current_impl_locked;
-/* Progress implementation used by default. Can be overriden in
+/* Progress implementation used by default. Can be overridden in
wgetrc or by the fallback one. */
#define DEFAULT_PROGRESS_IMPLEMENTATION "bar"
diff --git a/src/recur.c b/src/recur.c
index 42b4eb1..1b25662 100644
--- a/src/recur.c
+++ b/src/recur.c
@@ -497,7 +497,7 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi)
if (file
&& (opt.delete_after
- || opt.spider /* opt.recursive is implicitely true */
+ || opt.spider /* opt.recursive is implicitly true */
|| !acceptable (file)))
{
/* Either --delete-after was specified, or we loaded this
@@ -698,7 +698,7 @@ download_child (const struct urlpos *upos, struct url *parent, int depth,
for directories (no file name to match) and for non-leaf HTMLs,
which can lead to other files that do need to be downloaded. (-p
automatically implies non-leaf because with -p we can, if
- necesary, overstep the maximum depth to get the page requisites.) */
+ necessary, overstep the maximum depth to get the page requisites.) */
if (u->file[0] != '\0'
&& !(has_html_suffix_p (u->file)
/* The exception only applies to non-leaf HTMLs (but -p
diff --git a/src/res.c b/src/res.c
index fb608dd..fd2a3cd 100644
--- a/src/res.c
+++ b/src/res.c
@@ -37,12 +37,12 @@ as that of the covered work. */
disallow access to certain parts of the site.
The first specification was written by Martijn Koster in 1994, and
- is still available at <http://www.robotstxt.org/wc/norobots.html>.
+ is still available at <http://www.robotstxt.org/orig.html>.
In 1996, Martijn wrote an Internet Draft specifying an improved RES
specification; however, that work was apparently abandoned since
the draft has expired in 1997 and hasn't been replaced since. The
draft is available at
- <http://www.robotstxt.org/wc/norobots-rfc.html>.
+ <http://www.robotstxt.org/norobots-rfc.txt>.
This file implements RES as specified by the draft. Note that this
only handles the "robots.txt" support. The META tag that controls
@@ -428,7 +428,7 @@ free_specs (struct robot_specs *specs)
/* The inner matching engine: return true if RECORD_PATH matches
URL_PATH. The rules for matching are described at
- <http://www.robotstxt.org/wc/norobots-rfc.txt>, section 3.2.2. */
+ <http://www.robotstxt.org/norobots-rfc.txt>, section 3.2.2. */
static bool
matches (const char *record_path, const char *url_path)
diff --git a/src/retr.c b/src/retr.c
index 5ba744f..6555ed4 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -41,6 +41,10 @@ as that of the covered work. */
# include <unixio.h> /* For delete(). */
#endif
+#ifdef HAVE_LIBZ
+# include <zlib.h>
+#endif
+
#include "exits.h"
#include "utils.h"
#include "retr.h"
@@ -84,6 +88,22 @@ limit_bandwidth_reset (void)
xzero (limit_data);
}
+#ifdef HAVE_LIBZ
+static voidpf
+zalloc (voidpf opaque, unsigned int items, unsigned int size)
+{
+ (void) opaque;
+ return (voidpf) xcalloc (items, size);
+}
+
+static void
+zfree (voidpf opaque, voidpf address)
+{
+ (void) opaque;
+ xfree (address);
+}
+#endif
+
/* Limit the bandwidth by pausing the download for an amount of time.
BYTES is the number of bytes received from the network, and TIMER
is the timer that started at the beginning of download. */
@@ -257,6 +277,44 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
wgint sum_written = 0;
wgint remaining_chunk_size = 0;
+#ifdef HAVE_LIBZ
+ /* try to minimize the number of calls to inflate() and write_data() per
+ call to fd_read() */
+ unsigned int gzbufsize = dlbufsize * 4;
+ char *gzbuf = NULL;
+ z_stream gzstream;
+
+ if (flags & rb_compressed_gzip)
+ {
+ gzbuf = xmalloc (gzbufsize);
+ if (gzbuf != NULL)
+ {
+ gzstream.zalloc = zalloc;
+ gzstream.zfree = zfree;
+ gzstream.opaque = Z_NULL;
+ gzstream.next_in = Z_NULL;
+ gzstream.avail_in = 0;
+
+ #define GZIP_DETECT 32 /* gzip format detection */
+ #define GZIP_WINDOW 15 /* logarithmic window size (default: 15) */
+ ret = inflateInit2 (&gzstream, GZIP_DETECT | GZIP_WINDOW);
+ if (ret != Z_OK)
+ {
+ xfree (gzbuf);
+ errno = (ret == Z_MEM_ERROR) ? ENOMEM : EINVAL;
+ ret = -1;
+ goto out;
+ }
+ }
+ else
+ {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ }
+#endif
+
if (flags & rb_skip_startpos)
skip = startpos;
@@ -320,6 +378,12 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
remaining_chunk_size = strtol (line, &endl, 16);
xfree (line);
+ if (remaining_chunk_size < 0)
+ {
+ ret = -1;
+ break;
+ }
+
if (remaining_chunk_size == 0)
{
ret = 0;
@@ -383,12 +447,64 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
int write_res;
sum_read += ret;
- write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
- if (write_res < 0)
+
+#ifdef HAVE_LIBZ
+ if (gzbuf != NULL)
{
- ret = (write_res == -3) ? -3 : -2;
- goto out;
+ int err;
+ int towrite;
+ gzstream.avail_in = ret;
+ gzstream.next_in = (unsigned char *) dlbuf;
+
+ do
+ {
+ gzstream.avail_out = gzbufsize;
+ gzstream.next_out = (unsigned char *) gzbuf;
+
+ err = inflate (&gzstream, Z_NO_FLUSH);
+
+ switch (err)
+ {
+ case Z_MEM_ERROR:
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ case Z_NEED_DICT:
+ case Z_DATA_ERROR:
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ case Z_STREAM_END:
+ if (exact && sum_read != toread)
+ {
+ DEBUGP(("zlib stream ended unexpectedly after "
+ "%ld/%ld bytes\n", sum_read, toread));
+ }
+ }
+
+ towrite = gzbufsize - gzstream.avail_out;
+ write_res = write_data (out, out2, gzbuf, towrite, &skip,
+ &sum_written);
+ if (write_res < 0)
+ {
+ ret = (write_res == -3) ? -3 : -2;
+ goto out;
+ }
+ }
+ while (gzstream.avail_out == 0);
+ }
+ else
+#endif
+ {
+ write_res = write_data (out, out2, dlbuf, ret, &skip,
+ &sum_written);
+ if (write_res < 0)
+ {
+ ret = (write_res == -3) ? -3 : -2;
+ goto out;
+ }
}
+
if (chunked)
{
remaining_chunk_size -= ret;
@@ -433,6 +549,31 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
if (timer)
ptimer_destroy (timer);
+#ifdef HAVE_LIBZ
+ if (gzbuf != NULL)
+ {
+ int err = inflateEnd (&gzstream);
+ if (ret >= 0)
+ {
+ /* with compression enabled, ret must be 0 if successful */
+ if (err == Z_OK)
+ ret = 0;
+ else
+ {
+ errno = EINVAL;
+ ret = -1;
+ }
+ }
+ xfree (gzbuf);
+
+ if (gzstream.total_in != sum_read)
+ {
+ DEBUGP(("zlib read size differs from raw read size (%lu/%lu)\n",
+ gzstream.total_in, sum_read));
+ }
+ }
+#endif
+
if (qtyread)
*qtyread += sum_read;
if (qtywritten)
@@ -963,11 +1104,16 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
u = url_parse (origurl, NULL, iri, true);
if (u)
{
- DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
- xfree (url);
- url = xstrdup (u->url);
- iri_fallbacked = 1;
- goto redirected;
+ if (strcmp(u->url, orig_parsed->url))
+ {
+ DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+ xfree (url);
+ url = xstrdup (u->url);
+ iri_fallbacked = 1;
+ goto redirected;
+ }
+ else
+ DEBUGP (("[Needn't fallback to non-utf8 for %s\n", quote (url)));
}
else
DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
@@ -1141,7 +1287,7 @@ retrieve_from_file (const char *file, bool html, int *count)
if (parsed_url)
url_free (parsed_url);
- if (filename && opt.delete_after && file_exists_p (filename))
+ if (filename && opt.delete_after && file_exists_p (filename, NULL))
{
DEBUGP (("\
Removing file due to --delete-after in retrieve_from_file():\n"));
diff --git a/src/retr.h b/src/retr.h
index 5fbbacb..f133c83 100644
--- a/src/retr.h
+++ b/src/retr.h
@@ -49,7 +49,9 @@ enum {
rb_skip_startpos = 2,
/* Used by HTTP/HTTPS*/
- rb_chunked_transfer_encoding = 4
+ rb_chunked_transfer_encoding = 4,
+
+ rb_compressed_gzip = 8
};
int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
diff --git a/src/url.c b/src/url.c
index c442891..5e01860 100644
--- a/src/url.c
+++ b/src/url.c
@@ -459,7 +459,7 @@ url_scheme (const char *url)
int i;
for (i = 0; supported_schemes[i].leading_string; i++)
- if (0 == strncasecmp (url, supported_schemes[i].leading_string,
+ if (0 == c_strncasecmp (url, supported_schemes[i].leading_string,
strlen (supported_schemes[i].leading_string)))
{
if (!(supported_schemes[i].flags & scm_disabled))
@@ -925,6 +925,17 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
url_unescape (u->host);
host_modified = true;
+ /* check for invalid control characters in host name */
+ for (p = u->host; *p; p++)
+ {
+ if (c_iscntrl(*p))
+ {
+ url_free(u);
+ error_code = PE_INVALID_HOST_NAME;
+ goto error;
+ }
+ }
+
/* Apply IDNA regardless of iri->utf8_encode status */
if (opt.enable_iri && iri)
{
@@ -933,7 +944,6 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
{
xfree (u->host);
u->host = new;
- u->idn_allocated = true;
host_modified = true;
}
}
@@ -1212,12 +1222,7 @@ url_free (struct url *url)
{
if (url)
{
- if (url->idn_allocated) {
- idn2_free (url->host); /* A dummy if !defined(ENABLE_IRI) */
- url->host = NULL;
- }
- else
- xfree (url->host);
+ xfree (url->host);
xfree (url->path);
xfree (url->url);
@@ -1544,6 +1549,7 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
append_null (dest);
}
+#ifdef HAVE_ICONV
static char *
convert_fname (char *fname)
{
@@ -1562,9 +1568,9 @@ convert_fname (char *fname)
to_encoding = nl_langinfo (CODESET);
cd = iconv_open (to_encoding, from_encoding);
- if (cd == (iconv_t)(-1))
- logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
- quote (from_encoding), quote (to_encoding));
+ if (cd == (iconv_t) (-1))
+ logprintf (LOG_VERBOSE, _ ("Conversion from %s to %s isn't supported\n"),
+ quote (from_encoding), quote (to_encoding));
else
{
inlen = strlen (fname);
@@ -1573,50 +1579,62 @@ convert_fname (char *fname)
done = 0;
for (;;)
- {
- if (iconv (cd, (ICONV_CONST char **) &fname, &inlen, &s, &outlen) != (size_t)(-1)
- && iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1))
- {
- *(converted_fname + len - outlen - done) = '\0';
- iconv_close(cd);
- DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
- orig_fname, from_encoding, converted_fname, to_encoding));
- xfree (orig_fname);
- return converted_fname;
- }
-
- /* Incomplete or invalid multibyte sequence */
- if (errno == EINVAL || errno == EILSEQ)
- {
- logprintf (LOG_VERBOSE,
- _("Incomplete or invalid multibyte sequence encountered\n"));
- xfree (converted_fname);
- converted_fname = (char *)orig_fname;
- break;
- }
- else if (errno == E2BIG) /* Output buffer full */
- {
- done = len;
- len = outlen = done + inlen * 2;
- converted_fname = xrealloc (converted_fname, outlen + 1);
- s = converted_fname + done;
- }
- else /* Weird, we got an unspecified error */
- {
- logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno);
- xfree (converted_fname);
- converted_fname = (char *)orig_fname;
- break;
- }
- }
+ {
+ errno = 0;
+ if (iconv (cd, (ICONV_CONST char **) &fname, &inlen, &s, &outlen) == 0
+ && iconv (cd, NULL, NULL, &s, &outlen) == 0)
+ {
+ *(converted_fname + len - outlen - done) = '\0';
+ iconv_close (cd);
+ DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
+ orig_fname, from_encoding, converted_fname, to_encoding));
+ xfree (orig_fname);
+ return converted_fname;
+ }
+
+ /* Incomplete or invalid multibyte sequence */
+ if (errno == EINVAL || errno == EILSEQ || errno == 0)
+ {
+ if (errno)
+ logprintf (LOG_VERBOSE,
+ _ ("Incomplete or invalid multibyte sequence encountered\n"));
+ else
+ logprintf (LOG_VERBOSE,
+ _ ("Unconvertable multibyte sequence encountered\n"));
+ xfree (converted_fname);
+ converted_fname = (char *) orig_fname;
+ break;
+ }
+ else if (errno == E2BIG) /* Output buffer full */
+ {
+ done = len;
+ len = outlen = done + inlen * 2;
+ converted_fname = xrealloc (converted_fname, outlen + 1);
+ s = converted_fname + done;
+ }
+ else /* Weird, we got an unspecified error */
+ {
+ logprintf (LOG_VERBOSE, _ ("Unhandled errno %d\n"), errno);
+ xfree (converted_fname);
+ converted_fname = (char *) orig_fname;
+ break;
+ }
+ }
DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n",
- orig_fname, from_encoding, to_encoding));
+ orig_fname, from_encoding, to_encoding));
}
iconv_close(cd);
return converted_fname;
}
+#else
+static char *
+convert_fname (char *fname)
+{
+ return fname;
+}
+#endif
/* Append to DEST the directory structure that corresponds the
directory part of URL's path. For example, if the URL is
@@ -1732,6 +1750,9 @@ url_file_name (const struct url *u, char *replaced_filename)
fname_len_check = concat_strings (u_file, FN_QUERY_SEP_STR, u->query, NULL);
else
fname_len_check = strdupdelim (u_file, u_file + strlen (u_file));
+
+ /* convert before concat with local path */
+ fname_len_check = convert_fname (fname_len_check);
}
else
{
@@ -1793,8 +1814,6 @@ url_file_name (const struct url *u, char *replaced_filename)
xfree (temp_fnres.base);
- fname = convert_fname (fname);
-
/* Check the cases in which the unique extensions are not used:
1) Clobbering is turned off (-nc).
2) Retrieval with regetting.
@@ -1806,7 +1825,7 @@ url_file_name (const struct url *u, char *replaced_filename)
directory (see `mkalldirs' for explanation). */
if (ALLOW_CLOBBER
- && !(file_exists_p (fname) && !file_non_directory_p (fname)))
+ && !(file_exists_p (fname, NULL) && !file_non_directory_p (fname)))
{
unique = fname;
}
diff --git a/src/url.h b/src/url.h
index 94d1528..ad58739 100644
--- a/src/url.h
+++ b/src/url.h
@@ -99,10 +99,6 @@ struct url
/* Username and password (unquoted). */
char *user;
char *passwd;
-
- /* 'host' is allocated by idn2_lookup_u8() via idn_encode().
- * Call 'idn2_free()' to free this memory. */
- bool idn_allocated;
};
/* Function declarations */
diff --git a/src/utils.c b/src/utils.c
index db89ae1..81f2801 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -45,6 +45,7 @@ as that of the covered work. */
#include <assert.h>
#include <stdarg.h>
#include <locale.h>
+#include <errno.h>
#if HAVE_UTIME
# include <sys/types.h>
@@ -586,21 +587,42 @@ remove_link (const char *file)
return err;
}
-/* Does FILENAME exist? This is quite a lousy implementation, since
- it supplies no error codes -- only a yes-or-no answer. Thus it
- will return that a file does not exist if, e.g., the directory is
- unreadable. I don't mind it too much currently, though. The
- proper way should, of course, be to have a third, error state,
- other than true/false, but that would introduce uncalled-for
- additional complexity to the callers. */
+/* Does FILENAME exist? */
bool
-file_exists_p (const char *filename)
+file_exists_p (const char *filename, file_stats_t *fstats)
{
-#ifdef HAVE_ACCESS
- return access (filename, F_OK) >= 0;
-#else
struct stat buf;
- return stat (filename, &buf) >= 0;
+
+#if defined(WINDOWS) || defined(__VMS)
+ int ret = stat (filename, &buf);
+ if (ret >= 0)
+ {
+ if (fstats != NULL)
+ fstats->access_err = errno;
+ }
+ return ret >= 0;
+#else
+ errno = 0;
+ if (stat (filename, &buf) == 0 && S_ISREG(buf.st_mode) &&
+ (((S_IRUSR & buf.st_mode) && (getuid() == buf.st_uid)) ||
+ ((S_IRGRP & buf.st_mode) && group_member(buf.st_gid)) ||
+ (S_IROTH & buf.st_mode))) {
+ if (fstats != NULL)
+ {
+ fstats->access_err = 0;
+ fstats->st_ino = buf.st_ino;
+ fstats->st_dev = buf.st_dev;
+ }
+ return true;
+ }
+ else
+ {
+ if (fstats != NULL)
+ fstats->access_err = (errno == 0 ? EACCES : errno);
+ errno = 0;
+ return false;
+ }
+ /* NOTREACHED */
#endif
}
@@ -668,7 +690,7 @@ unique_name_1 (const char *prefix)
do
number_to_string (template_tail, count++);
- while (file_exists_p (template));
+ while (file_exists_p (template, NULL));
return xstrdup (template);
}
@@ -696,7 +718,7 @@ unique_name (const char *file, bool allow_passthrough)
{
/* If the FILE itself doesn't exist, return it without
modification. */
- if (!file_exists_p (file))
+ if (!file_exists_p (file, NULL))
return allow_passthrough ? (char *)file : xstrdup (file);
/* Otherwise, find a numeric suffix that results in unused file name
@@ -825,7 +847,7 @@ fopen_excl (const char *fname, int binary)
/* Manually check whether the file exists. This is prone to race
conditions, but systems without O_EXCL haven't deserved
better. */
- if (file_exists_p (fname))
+ if (file_exists_p (fname, NULL))
{
errno = EEXIST;
return NULL;
@@ -834,6 +856,113 @@ fopen_excl (const char *fname, int binary)
#endif /* not O_EXCL */
}
+/* fopen_stat() assumes that file_exists_p() was called earlier.
+ file_stats_t passed to this function was returned from file_exists_p()
+ This is to prevent TOCTTOU race condition.
+ Details : FIO45-C from https://www.securecoding.cert.org/
+ Note that for creating a new file, this check is not useful
+
+ Input:
+ fname => Name of file to open
+ mode => File open mode
+ fstats => Saved file_stats_t about file that was checked for existence
+
+ Returns:
+ NULL if there was an error
+ FILE * of opened file stream
+*/
+FILE *
+fopen_stat(const char *fname, const char *mode, file_stats_t *fstats)
+{
+ int fd;
+ FILE *fp;
+ struct stat fdstats;
+
+ fp = fopen (fname, mode);
+ if (fp == NULL)
+ {
+ logprintf (LOG_NOTQUIET, _("Failed to Fopen file %s\n"), fname);
+ return NULL;
+ }
+ fd = fileno (fp);
+ if (fd < 0)
+ {
+ logprintf (LOG_NOTQUIET, _("Failed to get FD for file %s\n"), fname);
+ fclose (fp);
+ return NULL;
+ }
+ memset(&fdstats, 0, sizeof(fdstats));
+ if (fstat (fd, &fdstats) == -1)
+ {
+ logprintf (LOG_NOTQUIET, _("Failed to stat file %s, (check permissions)\n"), fname);
+ fclose (fp);
+ return NULL;
+ }
+#if !(defined(WINDOWS) || defined(__VMS))
+ if (fstats != NULL &&
+ (fdstats.st_dev != fstats->st_dev ||
+ fdstats.st_ino != fstats->st_ino))
+ {
+ /* File changed since file_exists_p() : NOT SAFE */
+ logprintf (LOG_NOTQUIET, _("File %s changed since the last check. Security check failed."), fname);
+ fclose (fp);
+ return NULL;
+ }
+#endif
+
+ return fp;
+}
+
+/* open_stat assumes that file_exists_p() was called earlier to save file_stats
+ file_stats_t passed to this function was returned from file_exists_p()
+ This is to prevent TOCTTOU race condition.
+ Details : FIO45-C from https://www.securecoding.cert.org/
+ Note that for creating a new file, this check is not useful
+
+
+ Input:
+ fname => Name of file to open
+ flags => File open flags
+ mode => File open mode
+ fstats => Saved file_stats_t about file that was checked for existence
+
+ Returns:
+ -1 if there was an error
+ file descriptor of opened file stream
+*/
+int
+open_stat(const char *fname, int flags, mode_t mode, file_stats_t *fstats)
+{
+ int fd;
+ struct stat fdstats;
+
+ fd = open (fname, flags, mode);
+ if (fd < 0)
+ {
+ logprintf (LOG_NOTQUIET, _("Failed to open file %s, reason :%s\n"), fname, strerror(errno));
+ return -1;
+ }
+ memset(&fdstats, 0, sizeof(fdstats));
+ if (fstat (fd, &fdstats) == -1)
+ {
+ logprintf (LOG_NOTQUIET, _("Failed to stat file %s, error: %s\n"), fname, strerror(errno));
+ return -1;
+ }
+#if !(defined(WINDOWS) || defined(__VMS))
+ if (fstats != NULL &&
+ (fdstats.st_dev != fstats->st_dev ||
+ fdstats.st_ino != fstats->st_ino))
+ {
+ /* File changed since file_exists_p() : NOT SAFE */
+ logprintf (LOG_NOTQUIET, _("Trying to open file %s but it changed since last check. Security check failed."), fname);
+ close (fd);
+ return -1;
+ }
+#endif
+
+ return fd;
+}
+
/* Create DIRECTORY. If some of the pathname components of DIRECTORY
are missing, create them first. In case any mkdir() call fails,
return its error status. Returns 0 on successful completion.
@@ -862,7 +991,7 @@ make_directory (const char *directory)
/* Check whether the directory already exists. Allow creation of
of intermediate directories to fail, as the initial path components
are not necessarily directories! */
- if (!file_exists_p (dir))
+ if (!file_exists_p (dir, NULL))
ret = mkdir (dir, 0777);
else
ret = 0;
@@ -1606,7 +1735,7 @@ numdigit (wgint number)
{
int cnt = 1;
if (number < 0)
- ++cnt; /* accomodate '-' */
+ ++cnt; /* accommodate '-' */
while ((number /= 10) != 0)
++cnt;
return cnt;
@@ -2206,7 +2335,7 @@ wget_base64_encode (const void *data, size_t length, char *dest)
/* Decode data from BASE64 (a null-terminated string) into memory
pointed to by DEST. DEST is assumed to be large enough to
- accomodate the decoded data, which is guaranteed to be no more than
+ accommodate the decoded data, which is guaranteed to be no more than
3/4*strlen(base64).
Since DEST is assumed to contain binary data, it is not
@@ -2217,7 +2346,7 @@ wget_base64_encode (const void *data, size_t length, char *dest)
This function originates from Free Recode. */
ssize_t
-wget_base64_decode (const char *base64, void *dest)
+wget_base64_decode (const char *base64, void *dest, size_t size)
{
/* Table of base64 values for first 128 characters. Note that this
assumes ASCII (but so does Wget in other places). */
@@ -2241,7 +2370,8 @@ wget_base64_decode (const char *base64, void *dest)
#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
const char *p = base64;
- char *q = dest;
+ unsigned char *q = dest;
+ ssize_t n = 0;
while (1)
{
@@ -2263,7 +2393,12 @@ wget_base64_decode (const char *base64, void *dest)
if (c == '=' || !IS_BASE64 (c))
return -1; /* illegal char while decoding base64 */
value |= BASE64_CHAR_TO_VALUE (c) << 12;
- *q++ = value >> 16;
+ if (size)
+ {
+ *q++ = value >> 16;
+ size--;
+ }
+ n++;
/* Process third byte of a quadruplet. */
NEXT_CHAR (c, p);
@@ -2283,7 +2418,12 @@ wget_base64_decode (const char *base64, void *dest)
}
value |= BASE64_CHAR_TO_VALUE (c) << 6;
- *q++ = 0xff & value >> 8;
+ if (size)
+ {
+ *q++ = 0xff & value >> 8;
+ size--;
+ }
+ n++;
/* Process fourth byte of a quadruplet. */
NEXT_CHAR (c, p);
@@ -2295,12 +2435,17 @@ wget_base64_decode (const char *base64, void *dest)
return -1; /* illegal char while decoding base64 */
value |= BASE64_CHAR_TO_VALUE (c);
- *q++ = 0xff & value;
+ if (size)
+ {
+ *q++ = 0xff & value;
+ size--;
+ }
+ n++;
}
#undef IS_BASE64
#undef BASE64_CHAR_TO_VALUE
- return q - (char *) dest;
+ return n;
}
#ifdef HAVE_LIBPCRE
@@ -2597,7 +2742,7 @@ wg_pubkey_pem_to_der (const char *pem, unsigned char **der, size_t *der_len)
base64data = xmalloc (BASE64_LENGTH(stripped_pem_count));
- size = wget_base64_decode (stripped_pem, base64data);
+ size = wget_base64_decode (stripped_pem, base64data, BASE64_LENGTH(stripped_pem_count));
if (size < 0) {
xfree (base64data); /* malformed base64 from server */
@@ -2636,54 +2781,65 @@ wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeyl
return result;
/* only do this if pinnedpubkey starts with "sha256//", length 8 */
- if (strncmp (pinnedpubkey, "sha256//", 8) == 0) {
- /* compute sha256sum of public key */
- sha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
- sha256_buffer (pubkey, pubkeylen, sha256sumdigest);
- expectedsha256sumdigest = xmalloc (SHA256_DIGEST_SIZE + 1);
-
- /* it starts with sha256//, copy so we can modify it */
- pinkeylen = strlen (pinnedpubkey) + 1;
- pinkeycopy = xmalloc (pinkeylen);
- memcpy (pinkeycopy, pinnedpubkey, pinkeylen);
-
- /* point begin_pos to the copy, and start extracting keys */
- begin_pos = pinkeycopy;
- do
- {
- end_pos = strstr (begin_pos, ";sha256//");
- /*
- * if there is an end_pos, null terminate,
- * otherwise it'll go to the end of the original string
- */
- if (end_pos)
- end_pos[0] = '\0';
-
- /* decode base64 pinnedpubkey, 8 is length of "sha256//" */
- decoded_hash_length = wget_base64_decode (begin_pos + 8, expectedsha256sumdigest);
- /* if valid base64, compare sha256 digests directly */
- if (SHA256_DIGEST_SIZE == decoded_hash_length &&
- !memcmp (sha256sumdigest, expectedsha256sumdigest, SHA256_DIGEST_SIZE)) {
- result = true;
- break;
- }
-
- /*
- * change back the null-terminator we changed earlier,
- * and look for next begin
- */
- if (end_pos) {
- end_pos[0] = ';';
- begin_pos = strstr (end_pos, "sha256//");
+ if (strncmp (pinnedpubkey, "sha256//", 8) == 0)
+ {
+ /* compute sha256sum of public key */
+ sha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
+ sha256_buffer (pubkey, pubkeylen, sha256sumdigest);
+ expectedsha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
+
+ /* it starts with sha256//, copy so we can modify it */
+ pinkeylen = strlen (pinnedpubkey) + 1;
+ pinkeycopy = xmalloc (pinkeylen);
+ memcpy (pinkeycopy, pinnedpubkey, pinkeylen);
+
+ /* point begin_pos to the copy, and start extracting keys */
+ begin_pos = pinkeycopy;
+ do
+ {
+ end_pos = strstr (begin_pos, ";sha256//");
+ /*
+ * if there is an end_pos, null terminate,
+ * otherwise it'll go to the end of the original string
+ */
+ if (end_pos)
+ end_pos[0] = '\0';
+
+ /* decode base64 pinnedpubkey, 8 is length of "sha256//" */
+ decoded_hash_length = wget_base64_decode (begin_pos + 8, expectedsha256sumdigest, SHA256_DIGEST_SIZE);
+
+ /* if valid base64, compare sha256 digests directly */
+ if (SHA256_DIGEST_SIZE == decoded_hash_length)
+ {
+ if (!memcmp (sha256sumdigest, expectedsha256sumdigest, SHA256_DIGEST_SIZE))
+ {
+ result = true;
+ break;
+ }
+ }
+ else
+ logprintf (LOG_VERBOSE, _ ("Skipping key with wrong size (%d/%d): %s\n"),
+ (strlen (begin_pos + 8) * 3) / 4, SHA256_DIGEST_SIZE,
+ quote (begin_pos + 8));
+
+ /*
+ * change back the null-terminator we changed earlier,
+ * and look for next begin
+ */
+ if (end_pos)
+ {
+ end_pos[0] = ';';
+ begin_pos = strstr (end_pos, "sha256//");
+ }
}
- } while (end_pos && begin_pos);
+ while (end_pos && begin_pos);
- xfree (sha256sumdigest);
- xfree (expectedsha256sumdigest);
- xfree (pinkeycopy);
+ xfree (sha256sumdigest);
+ xfree (expectedsha256sumdigest);
+ xfree (pinkeycopy);
- return result;
- }
+ return result;
+ }
/* fall back to assuming this is a file path */
fm = wget_read_file (pinnedpubkey);
@@ -2703,11 +2859,12 @@ wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeyl
goto cleanup;
/* If the sizes are the same, it can't be base64 encoded, must be der */
- if (pubkeylen == size) {
- if (!memcmp (pubkey, fm->content, pubkeylen))
- result = true;
- goto cleanup;
- }
+ if (pubkeylen == size)
+ {
+ if (!memcmp (pubkey, fm->content, pubkeylen))
+ result = true;
+ goto cleanup;
+ }
/*
* Otherwise we will assume it's PEM and try to decode it
@@ -2729,7 +2886,7 @@ wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeyl
if (pubkeylen == pem_len && !memcmp (pubkey, pem_ptr, pubkeylen))
result = true;
- cleanup:
+cleanup:
xfree (buf);
xfree (pem_ptr);
wget_read_file_free (fm);
diff --git a/src/utils.h b/src/utils.h
index aaac730..9936d97 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -78,15 +78,23 @@ void fork_to_background (void);
char *aprintf (const char *, ...) GCC_FORMAT_ATTR (1, 2);
char *concat_strings (const char *, ...);
+typedef struct file_stat_s {
+ int access_err; /* Error in accecssing file : Not present vs permission */
+ ino_t st_ino; /* st_ino from stats() on the file before open() */
+ dev_t st_dev; /* st_dev from stats() on the file before open() */
+} file_stats_t;
+
void touch (const char *, time_t);
int remove_link (const char *);
-bool file_exists_p (const char *);
+bool file_exists_p (const char *, file_stats_t *);
bool file_non_directory_p (const char *);
wgint file_size (const char *);
int make_directory (const char *);
char *unique_name (const char *, bool);
FILE *unique_create (const char *, bool, char **);
FILE *fopen_excl (const char *, int);
+FILE *fopen_stat (const char *, const char *, file_stats_t *);
+int open_stat (const char *, int, mode_t, file_stats_t *);
char *file_merge (const char *, const char *);
int fnmatch_nocase (const char *, const char *, int);
@@ -141,7 +149,7 @@ void xsleep (double);
#define BASE64_LENGTH(len) (4 * (((len) + 2) / 3))
size_t wget_base64_encode (const void *, size_t, char *);
-ssize_t wget_base64_decode (const char *, void *);
+ssize_t wget_base64_decode (const char *, void *, size_t);
#ifdef HAVE_LIBPCRE
void *compile_pcre_regex (const char *);
diff --git a/src/warc.c b/src/warc.c
index bc1aafa..6a4b9eb 100644
--- a/src/warc.c
+++ b/src/warc.c
@@ -32,6 +32,7 @@ as that of the covered work. */
#include "utils.h"
#include "version.h"
#include "dirname.h"
+#include "url.h"
#include <stdio.h>
#include <stdlib.h>
@@ -1365,6 +1366,8 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
mime_type = "-";
if (redirect_location == NULL || strlen(redirect_location) == 0)
redirect_location = "-";
+ else
+ redirect_location = url_escape(redirect_location);
number_to_string (offset_string, offset);
@@ -1413,7 +1416,7 @@ warc_write_revisit_record (const char *url, const char *timestamp_str,
warc_write_header ("WARC-Refers-To", refers_to);
warc_write_header ("WARC-Profile", "http://netpreserve.org/warc/1.0/revisit/identical-payload-digest");
warc_write_header ("WARC-Truncated", "length");
- warc_write_header ("WARC-Target-URI", url);
+ warc_write_header_uri ("WARC-Target-URI", url);
warc_write_date_header (timestamp_str);
warc_write_ip_header (ip);
warc_write_header ("Content-Type", "application/http;msgtype=response");
@@ -1505,7 +1508,7 @@ warc_write_response_record (const char *url, const char *timestamp_str,
warc_write_header ("WARC-Record-ID", response_uuid);
warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
- warc_write_header ("WARC-Target-URI", url);
+ warc_write_header_uri ("WARC-Target-URI", url);
warc_write_date_header (timestamp_str);
warc_write_ip_header (ip);
warc_write_header ("WARC-Block-Digest", block_digest);
@@ -1562,7 +1565,7 @@ warc_write_record (const char *record_type, const char *resource_uuid,
warc_write_header ("WARC-Record-ID", resource_uuid);
warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
- warc_write_header ("WARC-Target-URI", url);
+ warc_write_header_uri ("WARC-Target-URI", url);
warc_write_date_header (timestamp_str);
warc_write_ip_header (ip);
warc_write_digest_headers (body, payload_offset);