summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog363
-rw-r--r--src/Makefile.am4
-rw-r--r--src/Makefile.in230
-rw-r--r--src/build_info.c2
-rw-r--r--src/build_info.c.in2
-rw-r--r--src/config.h.in329
-rw-r--r--src/connect.c41
-rw-r--r--src/connect.h1
-rw-r--r--src/convert.c9
-rw-r--r--src/convert.h4
-rw-r--r--src/cookies.c11
-rw-r--r--src/css-url.c3
-rw-r--r--src/css-url.h1
-rw-r--r--src/exits.c8
-rw-r--r--src/exits.h4
-rw-r--r--src/ftp-basic.c10
-rw-r--r--src/ftp.c126
-rw-r--r--src/gnutls.c178
-rw-r--r--src/hash.c6
-rw-r--r--src/hash.h2
-rw-r--r--src/host.c17
-rw-r--r--src/host.h4
-rw-r--r--src/html-parse.c6
-rw-r--r--src/html-url.c9
-rw-r--r--src/http.c592
-rw-r--r--src/init.c129
-rw-r--r--src/log.c66
-rw-r--r--src/log.h4
-rw-r--r--src/main.c221
-rw-r--r--src/openssl.c23
-rw-r--r--src/options.h25
-rw-r--r--src/progress.c15
-rw-r--r--src/ptimer.c4
-rw-r--r--src/recur.c5
-rw-r--r--src/retr.c103
-rw-r--r--src/retr.h2
-rw-r--r--src/spider.c2
-rw-r--r--src/ssl.h4
-rw-r--r--src/test.c2
-rw-r--r--src/url.c11
-rw-r--r--src/utils.c121
-rw-r--r--src/utils.h10
-rw-r--r--src/warc.c1440
-rw-r--r--src/warc.h23
-rw-r--r--src/wget.h4
45 files changed, 3756 insertions, 420 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 8eae78f..8fcd0bf 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,366 @@
+2012-07-03 Steven Schubiger <stsc@member.fsf.org>
+
+ * init.c: Include warc.h for warc_close in cleanup function.
+
+2012-07-08 Steven Schubiger <stsc@member.fsf.org>
+
+ * exits.h: Fix comment.
+ * exits.c: Likewise.
+
+2012-07-07 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ (digest_authentication_encode): Add support for RFC 2617 Digest
+ Access Authentication.
+
+2012-07-07 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * http.c (http_loop): Fix log message.
+ * main.c (main): Likewise.
+ Reported by: Petr Pisar <petr.pisar@atlas.cz>
+
+2012-06-17 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * wget.h: Define `CLOSEFAILED'.
+ * init.c: Include "exits.h".
+ (cleanup): Check `fclose' failure.
+ * exits.c (get_status_for_err): Handle `CLOSEFAILED'.
+
+2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * main.c (main): Move some cleanup related function to...
+ * init.c (cleanup): ...here.
+
+ * main.c: Do not include "stdout.h".
+ (main): Do not register `close_stdout' at exit.
+ Reported by: Micah Cowan <micah@cowan.name>.
+
+2012-06-09 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * main.c (print_help): Move --report-speed under the section
+ "Logging and input file".
+
+2012-06-06 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * main.c (print_help): Rename --bits to --report-bps.
+ (cmdline_options): Likewise.
+ * init.c (commands): Rename --report-bps to --report-speed.
+ (cmd_spec_report_speed): New function.
+
+ * options.h (struct options): Rename `bits_fmt' to `report_bps'.
+ * main.c (print_help): Rename --bits to --report-bps.
+ (cmdline_options): Likewise.
+ * init.c (commands): Likewise
+
+ * progress.c (create_image): Adjust caller.
+ * retr.c (retr_rate): Likewise.
+ * utils.c (convert_to_bits): Likewise.
+
+2012-06-04 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * main.c (main): Check for filename != NULL.
+ * warc.c (warc_process_cdx_line): Fix memory leak.
+ * utils.c (match_posix_regex, compile_posix_regex): Remove dead
+ assignment.
+ * openssl.c (ssl_init): Fix old-style function definition.
+
+2012-06-02 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * connect.c: Include <sys/socket.h> and <sys/select.h>.
+
+2012-05-30 Gijs van Tulder <gvtulder@gmail.com>
+
+ * warc.c: Fix segfault if CDX record is not found.
+
+2011-05-26 Steven Schweda <sms@antinode.info>
+ * connect.c [HAVE_SYS_SOCKET_H]: Include <sys/socket.h>.
+ [HAVE_SYS_SELECT_H]: Include <sys/select.h>.
+
+2012-05-26 Mike Frysinger <vapier@gentoo.org>
+
+ * warc.c: Change type of `warc_current_gzfile' to gzFile.
+
+2012-05-26 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * warc.c (warc_load_cdx_dedup_file): Change type of `line_length' to
+ ssize_t.
+ Suggested by: Ángel González <keisial@gmail.com>
+
+2012-05-18 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * gnutls.c (wgnutls_poll): Honor the specified `timeout' value.
+ (wgnutls_peek): Likewise.
+
+2012-05-19 illusionoflife <illusion.of.life92@gmail.com> (tiny change)
+
+ * convert.c (register_html,register_css): Fixed functions signature to
+ not accept unused argument
+ * retr.c (retrieve_url): Changed register_{css,html} usage according
+ new signature.
+
+2012-05-16 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * warc.h: Cut length lines to 80 columns.
+ * warc.c: Likewise.
+
+2012-05-14 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * gnutls.c (wgnutls_read_timeout): removed warnings, moved fcntl stuff
+ outside loop.
+
+ * hash.h (hash_table_put): Make argument "value" const.
+ * hash.c (hash_table_put): Make argument value const. Cast `value' to
+ void.
+ * http.c (request_set_header): Make argument `name' const. Cast `value'
+ and `name' to void*.
+ (request_remove_header): Make argument `name' const.
+ * url.c (url_file_name): Make `index_filename' static.
+ * warc.h (warc_write_cdx_record): Make `url', `timestamp', `mime_type',
+ `payload_digest', `redirect_location', `warc_filename', response_uuid'
+ arguments const. Make `checksum' const.
+ * warc.c (warc_write_date_header): Make the `timestamp' argument const.
+ Make `extension' const.
+ (warc_write_cdx_record): Make `url', `timestamp', `mime_type',
+ `payload_digest', `redirect_location', `warc_filename', response_uuid'
+ arguments const. Make `checksum' const.
+
+2012-05-13 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * gnutls.c (credentials): Change type to
+ gnutls_certificate_credentials_t.
+ (ssl_init): Do not use deprecated types.
+ (ssl_connect_wget): Likewise.
+
+2012-04-11 Gijs van Tulder <gvtulder@gmail.com>
+
+ * init.c: Add --accept-regex, --reject-regex and --regex-type.
+ * main.c: Likewise.
+ * options.c: Likewise.
+ * recur.c: Likewise.
+ * utils.c: Add regex-related functions.
+ * utils.h: Add regex-related functions.
+
+2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * convert.c (convert_links_in_hashtable): Mmake it static.
+ * cookies.c (parse_set_cookie): Remove empty else branches.
+ * css-url.c: Include "css-url.h".
+ (get_uri_string): Make it static.
+ * css-url.h (get_urls_css): Add protoype.
+ * gnutls.c (ssl_init): Add prototype.
+ * html-parse.c (tagstack_push): Make it static.
+ * html-parse.c (tagstack_pop): Make it static.
+ * html-parse.c (tagstack_find): Make it static.
+ * html-url.c (cleanup_html_url): Make it static.
+ * progress.c (count_cols): Make it static.
+ * progress.c (get_eta): Make it static.
+ * retr.h (convert_to_bits): Remove prototype.
+ * util.h (convert_to_bits): Add prototype.
+ * spider.c (spider_cleanup): Make it static.
+ * warc.c (warc_write_start_record): Add prototype.
+ * warc.c (warc_write_end_record): Add prototype.
+ * warc.c (warc_start_cdx_file): Add prototype.
+ * warc.c (warc_init): Add prototype.
+ * warc.c (warc_load_cdx_dedup_file): Add prototype.
+ * warc.c (warc_write_metadata): Add prototype.
+ * warc.c (warc_close): Add prototype.
+ * warc.c (warc_tempfile): Add prototype.
+ * warc.c (warc_write_warcinfo_record): Make it static.
+ * warc.c (warc_load_cdx_dedup_file): Make it static.
+ * warc.c (warc_write_metadata): Make it static.
+ * warc.h (warc_init): Fix prototype.
+ * warc.h (warc_close): Fix prototype.
+ * warc.h (warc_tempfile): Fix prototype.
+
+2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * url.c: Use empty query in local filenames.
+
+2012-04-22 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * main.c (main): Dynamically allocate `opt.progress_type'.
+
+2012-04-21 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * ftp-basic.c (ftp_pasv): Fix memory leak.
+
+ * http.c (gethttp): Fix memory leak.
+
+ * ftp.c (getftp): Silent compiler warning.
+
+2009-06-14 Phil Pennock <mutt-dev@spodhuis.org> (tiny change)
+ * host.h: Declare `is_valid_ip_address'.
+ * host.c (is_valid_ip_address): New function.
+ * http.c (gethttp): Specify the hostname to ssl_connect_wget.
+ * gnutls.c (ssl_connect_wget): Specify the server name.
+ * openssl.c (ssl_connect_wget): Likewise.
+ * ssl.h: Change method signature for ssl_connect_wget.
+
+2012-04-13 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
+
+ * warc.c (warc_load_cdx_dedup_file): Fix a memory leak by freeing
+ `lineptr'.
+
+2012-04-07 Daniel Kahn Gillmor <dkg@fifthhorseman.net> (tiny change)
+
+ * gnutls.c (key_type_to_gnutls_type): New function.
+ (ssl_init): Use correctly the specified gnutls certificate.
+
+2012-04-01 Gijs van Tulder <gvtulder@gmail.com>
+
+ * html-url.c: Prevent crash on incomplete STYLE tag.
+
+2012-04-01 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * gnutls.c (wgnutls_read_timeout): Ensure timer is freed.
+
+ * gnutls.c (wgnutls_read_timeout): Do not use timer if it is not
+ allocated.
+ Reported by: Xu Zhongxing <xu_zhong_xing@163.com>
+
+2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
+
+ * warc.c: make warc_uuid_str() implementation depend on HAVE_LIBUUID.
+
+2012-03-29 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
+
+ * utils.c (library): Include <sys/time.h>.
+
+2012-03-25 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * utils.c: Include <sys/ioctl.h>.
+
+ * ptimer.c: Include <sys/time.h>.
+
+ * connect.c: Include <sys/socket.h>, <sys/select.h>, <sys/time.h>.
+ Reported by: Ray Satiro <raysatiro@yahoo.com>.
+
+2012-03-25 Ray Satiro <raysatiro@yahoo.com>
+
+ * build_info.c.in: Check that HAVE_LIBSSL32 is defined when OpenSSL
+ is used.
+
+2012-03-07 Steven Schubiger <stsc@member.fsf.org>
+
+ * init.c (wgetrc_user_file_name): Correct typo.
+
+2012-03-06 Sasikantha Babu <sasikanth.v19@gmail.com>
+
+ * utils.c (convert_to_bits): Added new function convert_to_bits to
+ convert bytes to bits.
+ * retr.c (calc_rate): Modified the function to handle --bits
+ option and download rate calculated as bits per sec (SI-prefix)
+ for --bits otherwise bytes (IEC-prefix).
+ (retr_rate): Rates will display in bits per sec for --bits.
+ * options.h (struct opt): Added --bit option bool variable bits_fmt.
+ * main.c (print_help) : Added help for --bit.
+ * init.c: Defined command for --bit option.
+ * retr.h: Added function prototype.
+
+2012-02-26 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * main.c: Include "closeout.h"
+ (main): Register close_stdout at exit.
+
+2012-02-01 Gijs van Tulder <gvtulder@gmail.com>
+
+ * warc.c: Fix large file support with ftello, fseeko.
+ * warc.h: Fix large file support.
+ * http.c: Fix large file support.
+
+2012-02-23 Giuseppe Scrivano <giuseppe@southpole.se>
+
+ * main.c (main): Write diagnostic messages to `stderr' not to `stdout'.
+
+ * main.c (main): Fail gracefully if `malloc' fails.
+
+ * gnutls.c (wgnutls_read): Remove unused variables `timer' and `flags'.
+
+2012-02-17 Steven Schubiger <stsc@member.fsf.org>
+
+ * warc.c: Add license header.
+
+2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
+
+ * retr.c (fd_read_body): If the response is chunked, the chunk
+ headers are now written to the WARC file, making the WARC file
+ an exact copy of the HTTP response.
+
+2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
+
+ * retr.c (fd_read_body): Fix a memory leak with chunked responses.
+ * http.c (skip_short_body): Fix the same memory leak.
+
+2012-01-09 Gijs van Tulder <gvtulder@gmail.com>
+
+ * init.c: Disable WARC compression if zlib is disabled.
+ * main.c: Do not show the 'no-warc-compression' option if zlib is
+ disabled.
+ * warc.c: Do not compress WARC files if zlib is disabled.
+
+2012-01-09 Sasikantha Babu <sasikanth.v19@gmail.com> (tiny change)
+ * connect.c (connect_to_ip): properly formatted ipv6 address display.
+ (socket_family): New function - returns socket family type.
+ * http.c (gethttp): properly formatted ipv6 address display.
+
+2011-11-09 Gijs van Tulder <address@hidden>
+
+ * warc.c: Call gzdopen() with wb9 instead of wb+9, which fails on
+ zlib version >= 1.2.4.
+
+2011-11-04 Steven Schweda <address@hidden>
+
+ * warc.c [! WINDOWS]: Include <libgen.h>.
+ (warc_write_warcinfo_record): Assign a new allocated buffer and
+ free it on errors.
+
+2011-11-01 Steven Schweda <address@hidden>
+
+ * gnutls.c (ssl_init): Ensure GNU TLS is loaded only once.
+
+2011-10-07 Steven Schweda <address@hidden>
+
+ * connect.c: Add HAVE_SYS_SELECT_H and HAVE_SYS_SOCKET_H conditions
+ on includes of <sys/select.h> and <sys/socket.h>, respectively.
+ * ftp.c (getftp): Move BIN_TYPE_TRANSFER macro into VMS-specific
+ section. On VMS, use Stream_LF attributes for listing files. Pass
+ BIN_TYPE_FILE to fopen_excl() instead of constant-everywhere "true".
+ * ftp.c (ftp_retrieve_list): Restore lost test of opt.preserve_perm
+ (--preserve-permissions) on the chmod() operation.
+ * init.c, main.c: Remove "deprecated" from opt.preserve_perm
+ (--preserve-permissions).
+ * init.c (initialize): Use distinct messages for errors in C macro
+ SYSTEM_WGETRC and environment-variable SYSTEM_WGETRC. Avoid use of
+ C macro SYSTEM_WGETRC when it's not defined.
+ * log.c (log_close): Avoid closing logfp when it's stderr.
+ * main.c (print_help): Restore --preserve-permissions.
+ * main.c (main): Avoid using a negative value of longindex as a
+ subscript (for long_options[]) when searching for "--config".
+ * main.c (main): Exit the program using exit() instead of "return".
+ (VMS handles these differently, and exit() is better.)
+ * openssl.c (ssl_init): Add type cast (SSL_METHOD *) to newly "const"
+ "meth" argument to accommodate OpenSSL version 0.9.8, where that
+ argument is not "const" in the OpenSSL function (SSL_CTX_new).
+ * test.c: Declare "program_argstring".
+ * utils.c (fopen_excl): Comment typography.
+ * warc.h: New file.
+ * warc.c: New file.
+
+2011-10-02 Henrik Holst <henrik.holst@millistream.com> (tiny change)
+ * http.c (gethttp): If 'contentonerror' is used then do not
+ skip the http body on 4xx and 5xx errors.
+
+ * init.c (commands): Add 'contentonerror'.
+
+ * main.c (print_help, option_data): Add new option 'contentonerror'
+ to make wget not skip the http content on 4xx and 5xx errors.
+
+ * options.h: New variable 'content_on_error'.
+
+2011-09-19 Giuseppe Scrivano <gscrivano@gnu.org>
+
+ * main.c (print_version): Update copyright year.
+ (print_version): Fix typo.
+
2011-09-13 Giuseppe Scrivano <gscrivano@gnu.org>
* ftp.c (ftp_retrieve_glob): Propagate correctly the `res' error
diff --git a/src/Makefile.am b/src/Makefile.am
index 6b95198..8ef931a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -46,13 +46,13 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
css_.c css-url.c \
ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
- recur.c res.c retr.c spider.c url.c \
+ recur.c res.c retr.c spider.c url.c warc.c \
utils.c exits.c build_info.c $(IRI_OBJ) \
css-url.h css-tokens.h connect.h convert.h cookies.h \
ftp.h hash.h host.h html-parse.h html-url.h \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
- spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
+ spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \
exits.h gettext.h
nodist_wget_SOURCES = version.c
EXTRA_wget_SOURCES = iri.c
diff --git a/src/Makefile.in b/src/Makefile.in
index 2998df2..dc0b856 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -49,6 +49,23 @@
#
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@@ -74,76 +91,89 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/00gnulib.m4 \
$(top_srcdir)/m4/alloca.m4 $(top_srcdir)/m4/arpa_inet_h.m4 \
- $(top_srcdir)/m4/asm-underscore.m4 \
- $(top_srcdir)/m4/clock_time.m4 $(top_srcdir)/m4/close.m4 \
- $(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/configmake.m4 \
- $(top_srcdir)/m4/dirname.m4 \
+ $(top_srcdir)/m4/asm-underscore.m4 $(top_srcdir)/m4/base32.m4 \
+ $(top_srcdir)/m4/btowc.m4 $(top_srcdir)/m4/clock_time.m4 \
+ $(top_srcdir)/m4/close.m4 $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/configmake.m4 $(top_srcdir)/m4/dirname.m4 \
$(top_srcdir)/m4/double-slash-root.m4 $(top_srcdir)/m4/dup2.m4 \
$(top_srcdir)/m4/environ.m4 $(top_srcdir)/m4/errno_h.m4 \
- $(top_srcdir)/m4/error.m4 $(top_srcdir)/m4/extensions.m4 \
+ $(top_srcdir)/m4/error.m4 $(top_srcdir)/m4/exponentd.m4 \
+ $(top_srcdir)/m4/extensions.m4 \
+ $(top_srcdir)/m4/extern-inline.m4 \
$(top_srcdir)/m4/fatal-signal.m4 $(top_srcdir)/m4/fcntl-o.m4 \
$(top_srcdir)/m4/fcntl.m4 $(top_srcdir)/m4/fcntl_h.m4 \
$(top_srcdir)/m4/float_h.m4 $(top_srcdir)/m4/fseek.m4 \
- $(top_srcdir)/m4/fseeko.m4 $(top_srcdir)/m4/futimens.m4 \
- $(top_srcdir)/m4/getaddrinfo.m4 $(top_srcdir)/m4/getdelim.m4 \
- $(top_srcdir)/m4/getdtablesize.m4 $(top_srcdir)/m4/getline.m4 \
- $(top_srcdir)/m4/getopt.m4 $(top_srcdir)/m4/getpass.m4 \
- $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/gettime.m4 \
- $(top_srcdir)/m4/gettimeofday.m4 $(top_srcdir)/m4/glibc21.m4 \
- $(top_srcdir)/m4/gnulib-common.m4 \
+ $(top_srcdir)/m4/fseeko.m4 $(top_srcdir)/m4/fstat.m4 \
+ $(top_srcdir)/m4/ftell.m4 $(top_srcdir)/m4/ftello.m4 \
+ $(top_srcdir)/m4/futimens.m4 $(top_srcdir)/m4/getaddrinfo.m4 \
+ $(top_srcdir)/m4/getdelim.m4 $(top_srcdir)/m4/getdtablesize.m4 \
+ $(top_srcdir)/m4/getline.m4 $(top_srcdir)/m4/getopt.m4 \
+ $(top_srcdir)/m4/getpass.m4 $(top_srcdir)/m4/gettext.m4 \
+ $(top_srcdir)/m4/gettime.m4 $(top_srcdir)/m4/gettimeofday.m4 \
+ $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/gnulib-common.m4 \
$(top_srcdir)/m4/gnulib-comp.m4 $(top_srcdir)/m4/hostent.m4 \
$(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/iconv_h.m4 \
$(top_srcdir)/m4/include_next.m4 $(top_srcdir)/m4/inet_ntop.m4 \
$(top_srcdir)/m4/inline.m4 $(top_srcdir)/m4/intlmacosx.m4 \
$(top_srcdir)/m4/intmax_t.m4 $(top_srcdir)/m4/inttypes_h.m4 \
- $(top_srcdir)/m4/ioctl.m4 $(top_srcdir)/m4/largefile.m4 \
- $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
- $(top_srcdir)/m4/lib-prefix.m4 \
+ $(top_srcdir)/m4/ioctl.m4 $(top_srcdir)/m4/langinfo_h.m4 \
+ $(top_srcdir)/m4/largefile.m4 $(top_srcdir)/m4/lib-ld.m4 \
+ $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
$(top_srcdir)/m4/localcharset.m4 $(top_srcdir)/m4/locale-fr.m4 \
$(top_srcdir)/m4/locale-ja.m4 $(top_srcdir)/m4/locale-zh.m4 \
+ $(top_srcdir)/m4/locale_h.m4 $(top_srcdir)/m4/localeconv.m4 \
$(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
$(top_srcdir)/m4/lseek.m4 $(top_srcdir)/m4/lstat.m4 \
$(top_srcdir)/m4/malloc.m4 $(top_srcdir)/m4/mbrtowc.m4 \
$(top_srcdir)/m4/mbsinit.m4 $(top_srcdir)/m4/mbstate_t.m4 \
$(top_srcdir)/m4/mbtowc.m4 $(top_srcdir)/m4/md5.m4 \
$(top_srcdir)/m4/memchr.m4 $(top_srcdir)/m4/mkdir.m4 \
- $(top_srcdir)/m4/mmap-anon.m4 $(top_srcdir)/m4/mode_t.m4 \
- $(top_srcdir)/m4/multiarch.m4 $(top_srcdir)/m4/netdb_h.m4 \
- $(top_srcdir)/m4/netinet_in_h.m4 $(top_srcdir)/m4/nls.m4 \
- $(top_srcdir)/m4/nocrash.m4 $(top_srcdir)/m4/open.m4 \
+ $(top_srcdir)/m4/mkstemp.m4 $(top_srcdir)/m4/mmap-anon.m4 \
+ $(top_srcdir)/m4/mode_t.m4 $(top_srcdir)/m4/msvc-inval.m4 \
+ $(top_srcdir)/m4/msvc-nothrow.m4 $(top_srcdir)/m4/multiarch.m4 \
+ $(top_srcdir)/m4/netdb_h.m4 $(top_srcdir)/m4/netinet_in_h.m4 \
+ $(top_srcdir)/m4/nl_langinfo.m4 $(top_srcdir)/m4/nls.m4 \
+ $(top_srcdir)/m4/nocrash.m4 $(top_srcdir)/m4/off_t.m4 \
+ $(top_srcdir)/m4/open.m4 $(top_srcdir)/m4/pathmax.m4 \
$(top_srcdir)/m4/pipe2.m4 $(top_srcdir)/m4/po.m4 \
$(top_srcdir)/m4/posix_spawn.m4 $(top_srcdir)/m4/printf.m4 \
$(top_srcdir)/m4/quote.m4 $(top_srcdir)/m4/quotearg.m4 \
- $(top_srcdir)/m4/rawmemchr.m4 $(top_srcdir)/m4/realloc.m4 \
+ $(top_srcdir)/m4/raise.m4 $(top_srcdir)/m4/rawmemchr.m4 \
+ $(top_srcdir)/m4/realloc.m4 $(top_srcdir)/m4/regex.m4 \
$(top_srcdir)/m4/sched_h.m4 $(top_srcdir)/m4/select.m4 \
- $(top_srcdir)/m4/servent.m4 $(top_srcdir)/m4/sig_atomic_t.m4 \
- $(top_srcdir)/m4/sigaction.m4 $(top_srcdir)/m4/signal_h.m4 \
+ $(top_srcdir)/m4/servent.m4 $(top_srcdir)/m4/sha1.m4 \
+ $(top_srcdir)/m4/sig_atomic_t.m4 $(top_srcdir)/m4/sigaction.m4 \
+ $(top_srcdir)/m4/signal_h.m4 \
$(top_srcdir)/m4/signalblocking.m4 $(top_srcdir)/m4/sigpipe.m4 \
$(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/snprintf.m4 \
$(top_srcdir)/m4/socketlib.m4 $(top_srcdir)/m4/sockets.m4 \
$(top_srcdir)/m4/socklen.m4 $(top_srcdir)/m4/sockpfaf.m4 \
$(top_srcdir)/m4/spawn-pipe.m4 $(top_srcdir)/m4/spawn_h.m4 \
- $(top_srcdir)/m4/stat-time.m4 $(top_srcdir)/m4/stat.m4 \
+ $(top_srcdir)/m4/ssize_t.m4 $(top_srcdir)/m4/stat-time.m4 \
+ $(top_srcdir)/m4/stat.m4 $(top_srcdir)/m4/stdalign.m4 \
$(top_srcdir)/m4/stdbool.m4 $(top_srcdir)/m4/stddef_h.m4 \
$(top_srcdir)/m4/stdint.m4 $(top_srcdir)/m4/stdint_h.m4 \
$(top_srcdir)/m4/stdio_h.m4 $(top_srcdir)/m4/stdlib_h.m4 \
$(top_srcdir)/m4/strcase.m4 $(top_srcdir)/m4/strcasestr.m4 \
$(top_srcdir)/m4/strchrnul.m4 $(top_srcdir)/m4/strerror.m4 \
$(top_srcdir)/m4/strerror_r.m4 $(top_srcdir)/m4/string_h.m4 \
- $(top_srcdir)/m4/strings_h.m4 $(top_srcdir)/m4/sys_ioctl_h.m4 \
+ $(top_srcdir)/m4/strings_h.m4 $(top_srcdir)/m4/strtok_r.m4 \
+ $(top_srcdir)/m4/sys_ioctl_h.m4 \
$(top_srcdir)/m4/sys_select_h.m4 \
$(top_srcdir)/m4/sys_socket_h.m4 \
$(top_srcdir)/m4/sys_stat_h.m4 $(top_srcdir)/m4/sys_time_h.m4 \
$(top_srcdir)/m4/sys_types_h.m4 $(top_srcdir)/m4/sys_uio_h.m4 \
- $(top_srcdir)/m4/sys_wait_h.m4 $(top_srcdir)/m4/threadlib.m4 \
- $(top_srcdir)/m4/time_h.m4 $(top_srcdir)/m4/timespec.m4 \
+ $(top_srcdir)/m4/sys_wait_h.m4 $(top_srcdir)/m4/tempname.m4 \
+ $(top_srcdir)/m4/threadlib.m4 $(top_srcdir)/m4/time_h.m4 \
+ $(top_srcdir)/m4/timespec.m4 $(top_srcdir)/m4/tmpdir.m4 \
$(top_srcdir)/m4/unistd-safer.m4 $(top_srcdir)/m4/unistd_h.m4 \
$(top_srcdir)/m4/unlocked-io.m4 $(top_srcdir)/m4/utimbuf.m4 \
$(top_srcdir)/m4/utimens.m4 $(top_srcdir)/m4/utimes.m4 \
$(top_srcdir)/m4/vasnprintf.m4 $(top_srcdir)/m4/vasprintf.m4 \
- $(top_srcdir)/m4/wait-process.m4 $(top_srcdir)/m4/waitpid.m4 \
- $(top_srcdir)/m4/warn-on-use.m4 $(top_srcdir)/m4/wchar_h.m4 \
- $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wctype_h.m4 \
+ $(top_srcdir)/m4/vsnprintf.m4 $(top_srcdir)/m4/wait-process.m4 \
+ $(top_srcdir)/m4/waitpid.m4 $(top_srcdir)/m4/warn-on-use.m4 \
+ $(top_srcdir)/m4/wchar_h.m4 $(top_srcdir)/m4/wchar_t.m4 \
+ $(top_srcdir)/m4/wcrtomb.m4 $(top_srcdir)/m4/wctype_h.m4 \
$(top_srcdir)/m4/wget.m4 $(top_srcdir)/m4/wint_t.m4 \
$(top_srcdir)/m4/write.m4 $(top_srcdir)/m4/xalloc.m4 \
$(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
@@ -158,13 +188,13 @@ libunittest_a_DEPENDENCIES = $(LIBOBJS)
am__libunittest_a_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c \
ftp.c css_.c css-url.c ftp-basic.c ftp-ls.c hash.c host.c \
html-parse.c html-url.c http.c init.c log.c main.c netrc.c \
- progress.c ptimer.c recur.c res.c retr.c spider.c url.c \
+ progress.c ptimer.c recur.c res.c retr.c spider.c url.c warc.c \
utils.c exits.c build_info.c iri.c css-url.h css-tokens.h \
connect.h convert.h cookies.h ftp.h hash.h host.h html-parse.h \
html-url.h http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h spider.h \
- ssl.h sysdep.h url.h utils.h wget.h iri.h exits.h gettext.h \
- test.c test.h
+ ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h exits.h \
+ gettext.h test.c test.h
@IRI_IS_ENABLED_TRUE@am__objects_1 = libunittest_a-iri.$(OBJEXT)
am__objects_2 = libunittest_a-cmpt.$(OBJEXT) \
libunittest_a-connect.$(OBJEXT) \
@@ -182,7 +212,8 @@ am__objects_2 = libunittest_a-cmpt.$(OBJEXT) \
libunittest_a-ptimer.$(OBJEXT) libunittest_a-recur.$(OBJEXT) \
libunittest_a-res.$(OBJEXT) libunittest_a-retr.$(OBJEXT) \
libunittest_a-spider.$(OBJEXT) libunittest_a-url.$(OBJEXT) \
- libunittest_a-utils.$(OBJEXT) libunittest_a-exits.$(OBJEXT) \
+ libunittest_a-warc.$(OBJEXT) libunittest_a-utils.$(OBJEXT) \
+ libunittest_a-exits.$(OBJEXT) \
libunittest_a-build_info.$(OBJEXT) $(am__objects_1)
am_libunittest_a_OBJECTS = $(am__objects_2) \
libunittest_a-test.$(OBJEXT) \
@@ -195,12 +226,13 @@ PROGRAMS = $(bin_PROGRAMS)
am__wget_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c ftp.c \
css_.c css-url.c ftp-basic.c ftp-ls.c hash.c host.c \
html-parse.c html-url.c http.c init.c log.c main.c netrc.c \
- progress.c ptimer.c recur.c res.c retr.c spider.c url.c \
+ progress.c ptimer.c recur.c res.c retr.c spider.c url.c warc.c \
utils.c exits.c build_info.c iri.c css-url.h css-tokens.h \
connect.h convert.h cookies.h ftp.h hash.h host.h html-parse.h \
html-url.h http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h spider.h \
- ssl.h sysdep.h url.h utils.h wget.h iri.h exits.h gettext.h
+ ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h exits.h \
+ gettext.h
@IRI_IS_ENABLED_TRUE@am__objects_3 = iri.$(OBJEXT)
am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
cookies.$(OBJEXT) ftp.$(OBJEXT) css_.$(OBJEXT) \
@@ -209,8 +241,8 @@ am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
html-url.$(OBJEXT) http.$(OBJEXT) init.$(OBJEXT) log.$(OBJEXT) \
main.$(OBJEXT) netrc.$(OBJEXT) progress.$(OBJEXT) \
ptimer.$(OBJEXT) recur.$(OBJEXT) res.$(OBJEXT) retr.$(OBJEXT) \
- spider.$(OBJEXT) url.$(OBJEXT) utils.$(OBJEXT) exits.$(OBJEXT) \
- build_info.$(OBJEXT) $(am__objects_3)
+ spider.$(OBJEXT) url.$(OBJEXT) warc.$(OBJEXT) utils.$(OBJEXT) \
+ exits.$(OBJEXT) build_info.$(OBJEXT) $(am__objects_3)
nodist_wget_OBJECTS = version.$(OBJEXT)
wget_OBJECTS = $(am_wget_OBJECTS) $(nodist_wget_OBJECTS)
wget_LDADD = $(LDADD)
@@ -227,6 +259,11 @@ SOURCES = $(libunittest_a_SOURCES) $(nodist_libunittest_a_SOURCES) \
$(wget_SOURCES) $(EXTRA_wget_SOURCES) $(nodist_wget_SOURCES)
DIST_SOURCES = $(am__libunittest_a_SOURCES_DIST) \
$(am__wget_SOURCES_DIST) $(EXTRA_wget_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -286,12 +323,15 @@ GNULIB_BIND = @GNULIB_BIND@
GNULIB_BTOWC = @GNULIB_BTOWC@
GNULIB_CALLOC_POSIX = @GNULIB_CALLOC_POSIX@
GNULIB_CANONICALIZE_FILE_NAME = @GNULIB_CANONICALIZE_FILE_NAME@
+GNULIB_CHDIR = @GNULIB_CHDIR@
GNULIB_CHOWN = @GNULIB_CHOWN@
GNULIB_CLOSE = @GNULIB_CLOSE@
GNULIB_CONNECT = @GNULIB_CONNECT@
GNULIB_DPRINTF = @GNULIB_DPRINTF@
+GNULIB_DUP = @GNULIB_DUP@
GNULIB_DUP2 = @GNULIB_DUP2@
GNULIB_DUP3 = @GNULIB_DUP3@
+GNULIB_DUPLOCALE = @GNULIB_DUPLOCALE@
GNULIB_ENVIRON = @GNULIB_ENVIRON@
GNULIB_EUIDACCESS = @GNULIB_EUIDACCESS@
GNULIB_FACCESSAT = @GNULIB_FACCESSAT@
@@ -300,6 +340,8 @@ GNULIB_FCHMODAT = @GNULIB_FCHMODAT@
GNULIB_FCHOWNAT = @GNULIB_FCHOWNAT@
GNULIB_FCLOSE = @GNULIB_FCLOSE@
GNULIB_FCNTL = @GNULIB_FCNTL@
+GNULIB_FDATASYNC = @GNULIB_FDATASYNC@
+GNULIB_FDOPEN = @GNULIB_FDOPEN@
GNULIB_FFLUSH = @GNULIB_FFLUSH@
GNULIB_FFS = @GNULIB_FFS@
GNULIB_FFSL = @GNULIB_FFSL@
@@ -317,6 +359,7 @@ GNULIB_FREOPEN = @GNULIB_FREOPEN@
GNULIB_FSCANF = @GNULIB_FSCANF@
GNULIB_FSEEK = @GNULIB_FSEEK@
GNULIB_FSEEKO = @GNULIB_FSEEKO@
+GNULIB_FSTAT = @GNULIB_FSTAT@
GNULIB_FSTATAT = @GNULIB_FSTATAT@
GNULIB_FSYNC = @GNULIB_FSYNC@
GNULIB_FTELL = @GNULIB_FTELL@
@@ -339,18 +382,19 @@ GNULIB_GETLOGIN = @GNULIB_GETLOGIN@
GNULIB_GETLOGIN_R = @GNULIB_GETLOGIN_R@
GNULIB_GETPAGESIZE = @GNULIB_GETPAGESIZE@
GNULIB_GETPEERNAME = @GNULIB_GETPEERNAME@
-GNULIB_GETS = @GNULIB_GETS@
GNULIB_GETSOCKNAME = @GNULIB_GETSOCKNAME@
GNULIB_GETSOCKOPT = @GNULIB_GETSOCKOPT@
GNULIB_GETSUBOPT = @GNULIB_GETSUBOPT@
GNULIB_GETTIMEOFDAY = @GNULIB_GETTIMEOFDAY@
GNULIB_GETUSERSHELL = @GNULIB_GETUSERSHELL@
+GNULIB_GL_UNISTD_H_GETOPT = @GNULIB_GL_UNISTD_H_GETOPT@
GNULIB_GRANTPT = @GNULIB_GRANTPT@
GNULIB_GROUP_MEMBER = @GNULIB_GROUP_MEMBER@
GNULIB_ICONV = @GNULIB_ICONV@
GNULIB_INET_NTOP = @GNULIB_INET_NTOP@
GNULIB_INET_PTON = @GNULIB_INET_PTON@
GNULIB_IOCTL = @GNULIB_IOCTL@
+GNULIB_ISATTY = @GNULIB_ISATTY@
GNULIB_ISWBLANK = @GNULIB_ISWBLANK@
GNULIB_ISWCTYPE = @GNULIB_ISWCTYPE@
GNULIB_LCHMOD = @GNULIB_LCHMOD@
@@ -358,6 +402,7 @@ GNULIB_LCHOWN = @GNULIB_LCHOWN@
GNULIB_LINK = @GNULIB_LINK@
GNULIB_LINKAT = @GNULIB_LINKAT@
GNULIB_LISTEN = @GNULIB_LISTEN@
+GNULIB_LOCALECONV = @GNULIB_LOCALECONV@
GNULIB_LSEEK = @GNULIB_LSEEK@
GNULIB_LSTAT = @GNULIB_LSTAT@
GNULIB_MALLOC_POSIX = @GNULIB_MALLOC_POSIX@
@@ -397,15 +442,18 @@ GNULIB_MKSTEMP = @GNULIB_MKSTEMP@
GNULIB_MKSTEMPS = @GNULIB_MKSTEMPS@
GNULIB_MKTIME = @GNULIB_MKTIME@
GNULIB_NANOSLEEP = @GNULIB_NANOSLEEP@
+GNULIB_NL_LANGINFO = @GNULIB_NL_LANGINFO@
GNULIB_NONBLOCKING = @GNULIB_NONBLOCKING@
GNULIB_OBSTACK_PRINTF = @GNULIB_OBSTACK_PRINTF@
GNULIB_OBSTACK_PRINTF_POSIX = @GNULIB_OBSTACK_PRINTF_POSIX@
GNULIB_OPEN = @GNULIB_OPEN@
GNULIB_OPENAT = @GNULIB_OPENAT@
+GNULIB_PCLOSE = @GNULIB_PCLOSE@
GNULIB_PERROR = @GNULIB_PERROR@
GNULIB_PIPE = @GNULIB_PIPE@
GNULIB_PIPE2 = @GNULIB_PIPE2@
GNULIB_POPEN = @GNULIB_POPEN@
+GNULIB_POSIX_OPENPT = @GNULIB_POSIX_OPENPT@
GNULIB_POSIX_SPAWN = @GNULIB_POSIX_SPAWN@
GNULIB_POSIX_SPAWNATTR_DESTROY = @GNULIB_POSIX_SPAWNATTR_DESTROY@
GNULIB_POSIX_SPAWNATTR_GETFLAGS = @GNULIB_POSIX_SPAWNATTR_GETFLAGS@
@@ -433,11 +481,14 @@ GNULIB_PRINTF_POSIX = @GNULIB_PRINTF_POSIX@
GNULIB_PSELECT = @GNULIB_PSELECT@
GNULIB_PTHREAD_SIGMASK = @GNULIB_PTHREAD_SIGMASK@
GNULIB_PTSNAME = @GNULIB_PTSNAME@
+GNULIB_PTSNAME_R = @GNULIB_PTSNAME_R@
GNULIB_PUTC = @GNULIB_PUTC@
GNULIB_PUTCHAR = @GNULIB_PUTCHAR@
GNULIB_PUTENV = @GNULIB_PUTENV@
GNULIB_PUTS = @GNULIB_PUTS@
GNULIB_PWRITE = @GNULIB_PWRITE@
+GNULIB_RAISE = @GNULIB_RAISE@
+GNULIB_RANDOM = @GNULIB_RANDOM@
GNULIB_RANDOM_R = @GNULIB_RANDOM_R@
GNULIB_RAWMEMCHR = @GNULIB_RAWMEMCHR@
GNULIB_READ = @GNULIB_READ@
@@ -457,6 +508,8 @@ GNULIB_SELECT = @GNULIB_SELECT@
GNULIB_SEND = @GNULIB_SEND@
GNULIB_SENDTO = @GNULIB_SENDTO@
GNULIB_SETENV = @GNULIB_SETENV@
+GNULIB_SETHOSTNAME = @GNULIB_SETHOSTNAME@
+GNULIB_SETLOCALE = @GNULIB_SETLOCALE@
GNULIB_SETSOCKOPT = @GNULIB_SETSOCKOPT@
GNULIB_SHUTDOWN = @GNULIB_SHUTDOWN@
GNULIB_SIGACTION = @GNULIB_SIGACTION@
@@ -497,7 +550,6 @@ GNULIB_TIME_R = @GNULIB_TIME_R@
GNULIB_TMPFILE = @GNULIB_TMPFILE@
GNULIB_TOWCTRANS = @GNULIB_TOWCTRANS@
GNULIB_TTYNAME_R = @GNULIB_TTYNAME_R@
-GNULIB_UNISTD_H_GETOPT = @GNULIB_UNISTD_H_GETOPT@
GNULIB_UNISTD_H_NONBLOCKING = @GNULIB_UNISTD_H_NONBLOCKING@
GNULIB_UNISTD_H_SIGPIPE = @GNULIB_UNISTD_H_SIGPIPE@
GNULIB_UNLINK = @GNULIB_UNLINK@
@@ -564,6 +616,7 @@ HAVE_CANONICALIZE_FILE_NAME = @HAVE_CANONICALIZE_FILE_NAME@
HAVE_CHOWN = @HAVE_CHOWN@
HAVE_DECL_ENVIRON = @HAVE_DECL_ENVIRON@
HAVE_DECL_FCHDIR = @HAVE_DECL_FCHDIR@
+HAVE_DECL_FDATASYNC = @HAVE_DECL_FDATASYNC@
HAVE_DECL_FPURGE = @HAVE_DECL_FPURGE@
HAVE_DECL_FREEADDRINFO = @HAVE_DECL_FREEADDRINFO@
HAVE_DECL_FSEEKO = @HAVE_DECL_FSEEKO@
@@ -585,6 +638,7 @@ HAVE_DECL_MEMMEM = @HAVE_DECL_MEMMEM@
HAVE_DECL_MEMRCHR = @HAVE_DECL_MEMRCHR@
HAVE_DECL_OBSTACK_PRINTF = @HAVE_DECL_OBSTACK_PRINTF@
HAVE_DECL_SETENV = @HAVE_DECL_SETENV@
+HAVE_DECL_SETHOSTNAME = @HAVE_DECL_SETHOSTNAME@
HAVE_DECL_SNPRINTF = @HAVE_DECL_SNPRINTF@
HAVE_DECL_STRDUP = @HAVE_DECL_STRDUP@
HAVE_DECL_STRERROR_R = @HAVE_DECL_STRERROR_R@
@@ -601,12 +655,14 @@ HAVE_DECL_WCWIDTH = @HAVE_DECL_WCWIDTH@
HAVE_DPRINTF = @HAVE_DPRINTF@
HAVE_DUP2 = @HAVE_DUP2@
HAVE_DUP3 = @HAVE_DUP3@
+HAVE_DUPLOCALE = @HAVE_DUPLOCALE@
HAVE_EUIDACCESS = @HAVE_EUIDACCESS@
HAVE_FACCESSAT = @HAVE_FACCESSAT@
HAVE_FCHDIR = @HAVE_FCHDIR@
HAVE_FCHMODAT = @HAVE_FCHMODAT@
HAVE_FCHOWNAT = @HAVE_FCHOWNAT@
HAVE_FCNTL = @HAVE_FCNTL@
+HAVE_FDATASYNC = @HAVE_FDATASYNC@
HAVE_FEATURES_H = @HAVE_FEATURES_H@
HAVE_FFS = @HAVE_FFS@
HAVE_FFSL = @HAVE_FFSL@
@@ -630,6 +686,11 @@ HAVE_GROUP_MEMBER = @HAVE_GROUP_MEMBER@
HAVE_INTTYPES_H = @HAVE_INTTYPES_H@
HAVE_ISWBLANK = @HAVE_ISWBLANK@
HAVE_ISWCNTRL = @HAVE_ISWCNTRL@
+HAVE_LANGINFO_CODESET = @HAVE_LANGINFO_CODESET@
+HAVE_LANGINFO_ERA = @HAVE_LANGINFO_ERA@
+HAVE_LANGINFO_H = @HAVE_LANGINFO_H@
+HAVE_LANGINFO_T_FMT_AMPM = @HAVE_LANGINFO_T_FMT_AMPM@
+HAVE_LANGINFO_YESEXPR = @HAVE_LANGINFO_YESEXPR@
HAVE_LCHMOD = @HAVE_LCHMOD@
HAVE_LCHOWN = @HAVE_LCHOWN@
HAVE_LIBGNUTLS = @HAVE_LIBGNUTLS@
@@ -656,13 +717,18 @@ HAVE_MKOSTEMP = @HAVE_MKOSTEMP@
HAVE_MKOSTEMPS = @HAVE_MKOSTEMPS@
HAVE_MKSTEMP = @HAVE_MKSTEMP@
HAVE_MKSTEMPS = @HAVE_MKSTEMPS@
+HAVE_MSVC_INVALID_PARAMETER_HANDLER = @HAVE_MSVC_INVALID_PARAMETER_HANDLER@
HAVE_NANOSLEEP = @HAVE_NANOSLEEP@
HAVE_NETDB_H = @HAVE_NETDB_H@
HAVE_NETINET_IN_H = @HAVE_NETINET_IN_H@
+HAVE_NL_LANGINFO = @HAVE_NL_LANGINFO@
HAVE_OPENAT = @HAVE_OPENAT@
HAVE_OS_H = @HAVE_OS_H@
+HAVE_PCLOSE = @HAVE_PCLOSE@
HAVE_PIPE = @HAVE_PIPE@
HAVE_PIPE2 = @HAVE_PIPE2@
+HAVE_POPEN = @HAVE_POPEN@
+HAVE_POSIX_OPENPT = @HAVE_POSIX_OPENPT@
HAVE_POSIX_SIGNALBLOCKING = @HAVE_POSIX_SIGNALBLOCKING@
HAVE_POSIX_SPAWN = @HAVE_POSIX_SPAWN@
HAVE_POSIX_SPAWNATTR_T = @HAVE_POSIX_SPAWNATTR_T@
@@ -671,7 +737,10 @@ HAVE_PREAD = @HAVE_PREAD@
HAVE_PSELECT = @HAVE_PSELECT@
HAVE_PTHREAD_SIGMASK = @HAVE_PTHREAD_SIGMASK@
HAVE_PTSNAME = @HAVE_PTSNAME@
+HAVE_PTSNAME_R = @HAVE_PTSNAME_R@
HAVE_PWRITE = @HAVE_PWRITE@
+HAVE_RAISE = @HAVE_RAISE@
+HAVE_RANDOM = @HAVE_RANDOM@
HAVE_RANDOM_H = @HAVE_RANDOM_H@
HAVE_RANDOM_R = @HAVE_RANDOM_R@
HAVE_RAWMEMCHR = @HAVE_RAWMEMCHR@
@@ -683,6 +752,7 @@ HAVE_RPMATCH = @HAVE_RPMATCH@
HAVE_SA_FAMILY_T = @HAVE_SA_FAMILY_T@
HAVE_SCHED_H = @HAVE_SCHED_H@
HAVE_SETENV = @HAVE_SETENV@
+HAVE_SETHOSTNAME = @HAVE_SETHOSTNAME@
HAVE_SIGACTION = @HAVE_SIGACTION@
HAVE_SIGHANDLER_T = @HAVE_SIGHANDLER_T@
HAVE_SIGINFO_T = @HAVE_SIGINFO_T@
@@ -774,6 +844,7 @@ HAVE_WMEMCPY = @HAVE_WMEMCPY@
HAVE_WMEMMOVE = @HAVE_WMEMMOVE@
HAVE_WMEMSET = @HAVE_WMEMSET@
HAVE_WS2TCPIP_H = @HAVE_WS2TCPIP_H@
+HAVE_XLOCALE_H = @HAVE_XLOCALE_H@
HAVE__BOOL = @HAVE__BOOL@
HAVE__EXIT = @HAVE__EXIT@
HOSTENT_LIB = @HOSTENT_LIB@
@@ -809,7 +880,9 @@ LIBSSL = @LIBSSL@
LIBSSL_PREFIX = @LIBSSL_PREFIX@
LIBTHREAD = @LIBTHREAD@
LIB_CLOCK_GETTIME = @LIB_CLOCK_GETTIME@
+LIB_SELECT = @LIB_SELECT@
LOCALCHARSET_TESTS_ENVIRONMENT = @LOCALCHARSET_TESTS_ENVIRONMENT@
+LOCALE_FR = @LOCALE_FR@
LOCALE_FR_UTF8 = @LOCALE_FR_UTF8@
LOCALE_JA = @LOCALE_JA@
LOCALE_ZH_CN = @LOCALE_ZH_CN@
@@ -834,6 +907,8 @@ NEXT_AS_FIRST_DIRECTIVE_FCNTL_H = @NEXT_AS_FIRST_DIRECTIVE_FCNTL_H@
NEXT_AS_FIRST_DIRECTIVE_FLOAT_H = @NEXT_AS_FIRST_DIRECTIVE_FLOAT_H@
NEXT_AS_FIRST_DIRECTIVE_GETOPT_H = @NEXT_AS_FIRST_DIRECTIVE_GETOPT_H@
NEXT_AS_FIRST_DIRECTIVE_ICONV_H = @NEXT_AS_FIRST_DIRECTIVE_ICONV_H@
+NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H = @NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H@
+NEXT_AS_FIRST_DIRECTIVE_LOCALE_H = @NEXT_AS_FIRST_DIRECTIVE_LOCALE_H@
NEXT_AS_FIRST_DIRECTIVE_NETDB_H = @NEXT_AS_FIRST_DIRECTIVE_NETDB_H@
NEXT_AS_FIRST_DIRECTIVE_NETINET_IN_H = @NEXT_AS_FIRST_DIRECTIVE_NETINET_IN_H@
NEXT_AS_FIRST_DIRECTIVE_SCHED_H = @NEXT_AS_FIRST_DIRECTIVE_SCHED_H@
@@ -862,6 +937,8 @@ NEXT_FCNTL_H = @NEXT_FCNTL_H@
NEXT_FLOAT_H = @NEXT_FLOAT_H@
NEXT_GETOPT_H = @NEXT_GETOPT_H@
NEXT_ICONV_H = @NEXT_ICONV_H@
+NEXT_LANGINFO_H = @NEXT_LANGINFO_H@
+NEXT_LOCALE_H = @NEXT_LOCALE_H@
NEXT_NETDB_H = @NEXT_NETDB_H@
NEXT_NETINET_IN_H = @NEXT_NETINET_IN_H@
NEXT_SCHED_H = @NEXT_SCHED_H@
@@ -910,9 +987,11 @@ REPLACE_CLOSE = @REPLACE_CLOSE@
REPLACE_DPRINTF = @REPLACE_DPRINTF@
REPLACE_DUP = @REPLACE_DUP@
REPLACE_DUP2 = @REPLACE_DUP2@
+REPLACE_DUPLOCALE = @REPLACE_DUPLOCALE@
REPLACE_FCHOWNAT = @REPLACE_FCHOWNAT@
REPLACE_FCLOSE = @REPLACE_FCLOSE@
REPLACE_FCNTL = @REPLACE_FCNTL@
+REPLACE_FDOPEN = @REPLACE_FDOPEN@
REPLACE_FFLUSH = @REPLACE_FFLUSH@
REPLACE_FOPEN = @REPLACE_FOPEN@
REPLACE_FPRINTF = @REPLACE_FPRINTF@
@@ -924,6 +1003,7 @@ REPLACE_FSTAT = @REPLACE_FSTAT@
REPLACE_FSTATAT = @REPLACE_FSTATAT@
REPLACE_FTELL = @REPLACE_FTELL@
REPLACE_FTELLO = @REPLACE_FTELLO@
+REPLACE_FTRUNCATE = @REPLACE_FTRUNCATE@
REPLACE_FUTIMENS = @REPLACE_FUTIMENS@
REPLACE_GAI_STRERROR = @REPLACE_GAI_STRERROR@
REPLACE_GETCWD = @REPLACE_GETCWD@
@@ -937,12 +1017,17 @@ REPLACE_GETTIMEOFDAY = @REPLACE_GETTIMEOFDAY@
REPLACE_ICONV = @REPLACE_ICONV@
REPLACE_ICONV_OPEN = @REPLACE_ICONV_OPEN@
REPLACE_ICONV_UTF = @REPLACE_ICONV_UTF@
+REPLACE_INET_NTOP = @REPLACE_INET_NTOP@
+REPLACE_INET_PTON = @REPLACE_INET_PTON@
REPLACE_IOCTL = @REPLACE_IOCTL@
+REPLACE_ISATTY = @REPLACE_ISATTY@
REPLACE_ISWBLANK = @REPLACE_ISWBLANK@
REPLACE_ISWCNTRL = @REPLACE_ISWCNTRL@
+REPLACE_ITOLD = @REPLACE_ITOLD@
REPLACE_LCHOWN = @REPLACE_LCHOWN@
REPLACE_LINK = @REPLACE_LINK@
REPLACE_LINKAT = @REPLACE_LINKAT@
+REPLACE_LOCALECONV = @REPLACE_LOCALECONV@
REPLACE_LOCALTIME_R = @REPLACE_LOCALTIME_R@
REPLACE_LSEEK = @REPLACE_LSEEK@
REPLACE_LSTAT = @REPLACE_LSTAT@
@@ -962,6 +1047,7 @@ REPLACE_MKNOD = @REPLACE_MKNOD@
REPLACE_MKSTEMP = @REPLACE_MKSTEMP@
REPLACE_MKTIME = @REPLACE_MKTIME@
REPLACE_NANOSLEEP = @REPLACE_NANOSLEEP@
+REPLACE_NL_LANGINFO = @REPLACE_NL_LANGINFO@
REPLACE_NULL = @REPLACE_NULL@
REPLACE_OBSTACK_PRINTF = @REPLACE_OBSTACK_PRINTF@
REPLACE_OPEN = @REPLACE_OPEN@
@@ -969,12 +1055,18 @@ REPLACE_OPENAT = @REPLACE_OPENAT@
REPLACE_PERROR = @REPLACE_PERROR@
REPLACE_POPEN = @REPLACE_POPEN@
REPLACE_POSIX_SPAWN = @REPLACE_POSIX_SPAWN@
+REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDCLOSE = @REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDCLOSE@
+REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDDUP2 = @REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDDUP2@
+REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDOPEN = @REPLACE_POSIX_SPAWN_FILE_ACTIONS_ADDOPEN@
REPLACE_PREAD = @REPLACE_PREAD@
REPLACE_PRINTF = @REPLACE_PRINTF@
REPLACE_PSELECT = @REPLACE_PSELECT@
REPLACE_PTHREAD_SIGMASK = @REPLACE_PTHREAD_SIGMASK@
+REPLACE_PTSNAME_R = @REPLACE_PTSNAME_R@
REPLACE_PUTENV = @REPLACE_PUTENV@
REPLACE_PWRITE = @REPLACE_PWRITE@
+REPLACE_RAISE = @REPLACE_RAISE@
+REPLACE_RANDOM_R = @REPLACE_RANDOM_R@
REPLACE_READ = @REPLACE_READ@
REPLACE_READLINK = @REPLACE_READLINK@
REPLACE_REALLOC = @REPLACE_REALLOC@
@@ -985,6 +1077,7 @@ REPLACE_RENAMEAT = @REPLACE_RENAMEAT@
REPLACE_RMDIR = @REPLACE_RMDIR@
REPLACE_SELECT = @REPLACE_SELECT@
REPLACE_SETENV = @REPLACE_SETENV@
+REPLACE_SETLOCALE = @REPLACE_SETLOCALE@
REPLACE_SLEEP = @REPLACE_SLEEP@
REPLACE_SNPRINTF = @REPLACE_SNPRINTF@
REPLACE_SPRINTF = @REPLACE_SPRINTF@
@@ -1004,6 +1097,8 @@ REPLACE_STRSIGNAL = @REPLACE_STRSIGNAL@
REPLACE_STRSTR = @REPLACE_STRSTR@
REPLACE_STRTOD = @REPLACE_STRTOD@
REPLACE_STRTOK_R = @REPLACE_STRTOK_R@
+REPLACE_STRUCT_LCONV = @REPLACE_STRUCT_LCONV@
+REPLACE_STRUCT_TIMEVAL = @REPLACE_STRUCT_TIMEVAL@
REPLACE_SYMLINK = @REPLACE_SYMLINK@
REPLACE_TIMEGM = @REPLACE_TIMEGM@
REPLACE_TMPFILE = @REPLACE_TMPFILE@
@@ -1034,6 +1129,7 @@ SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@
SIZE_T_SUFFIX = @SIZE_T_SUFFIX@
+STDALIGN_H = @STDALIGN_H@
STDBOOL_H = @STDBOOL_H@
STDDEF_H = @STDDEF_H@
STDINT_H = @STDINT_H@
@@ -1048,6 +1144,8 @@ UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS = @UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOC
USE_NLS = @USE_NLS@
VERSION = @VERSION@
WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@
+WINDOWS_64_BIT_OFF_T = @WINDOWS_64_BIT_OFF_T@
+WINDOWS_64_BIT_ST_SIZE = @WINDOWS_64_BIT_ST_SIZE@
WINT_T_SUFFIX = @WINT_T_SUFFIX@
XGETTEXT = @XGETTEXT@
XGETTEXT_015 = @XGETTEXT_015@
@@ -1114,13 +1212,13 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
css_.c css-url.c \
ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
- recur.c res.c retr.c spider.c url.c \
+ recur.c res.c retr.c spider.c url.c warc.c \
utils.c exits.c build_info.c $(IRI_OBJ) \
css-url.h css-tokens.h connect.h convert.h cookies.h \
ftp.h hash.h host.h html-parse.h html-url.h \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
- spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
+ spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \
exits.h gettext.h
nodist_wget_SOURCES = version.c
@@ -1171,10 +1269,8 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__aclocal_m4_deps):
config.h: stamp-h1
- @if test ! -f $@; then \
- rm -f stamp-h1; \
- $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
- else :; fi
+ @if test ! -f $@; then rm -f stamp-h1; else :; fi
+ @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
@@ -1189,14 +1285,17 @@ distclean-hdr:
clean-checkLIBRARIES:
-test -z "$(check_LIBRARIES)" || rm -f $(check_LIBRARIES)
-libunittest.a: $(libunittest_a_OBJECTS) $(libunittest_a_DEPENDENCIES)
+libunittest.a: $(libunittest_a_OBJECTS) $(libunittest_a_DEPENDENCIES) $(EXTRA_libunittest_a_DEPENDENCIES)
-rm -f libunittest.a
$(libunittest_a_AR) libunittest.a $(libunittest_a_OBJECTS) $(libunittest_a_LIBADD)
$(RANLIB) libunittest.a
install-binPROGRAMS: $(bin_PROGRAMS)
@$(NORMAL_INSTALL)
- test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
for p in $$list; do echo "$$p $$p"; done | \
sed 's/$(EXEEXT)$$//' | \
while read p p1; do if test -f $$p; \
@@ -1230,7 +1329,7 @@ uninstall-binPROGRAMS:
clean-binPROGRAMS:
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-wget$(EXEEXT): $(wget_OBJECTS) $(wget_DEPENDENCIES)
+wget$(EXEEXT): $(wget_OBJECTS) $(wget_DEPENDENCIES) $(EXTRA_wget_DEPENDENCIES)
@rm -f wget$(EXEEXT)
$(LINK) $(wget_OBJECTS) $(wget_LDADD) $(LIBS)
@@ -1294,6 +1393,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libunittest_a-url.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libunittest_a-utils.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libunittest_a-version.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libunittest_a-warc.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/log.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/netrc.Po@am__quote@
@@ -1306,6 +1406,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/url.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/version.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/warc.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@@ -1671,6 +1772,20 @@ libunittest_a-url.obj: url.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libunittest_a-url.obj `if test -f 'url.c'; then $(CYGPATH_W) 'url.c'; else $(CYGPATH_W) '$(srcdir)/url.c'; fi`
+libunittest_a-warc.o: warc.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libunittest_a-warc.o -MD -MP -MF $(DEPDIR)/libunittest_a-warc.Tpo -c -o libunittest_a-warc.o `test -f 'warc.c' || echo '$(srcdir)/'`warc.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libunittest_a-warc.Tpo $(DEPDIR)/libunittest_a-warc.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='warc.c' object='libunittest_a-warc.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libunittest_a-warc.o `test -f 'warc.c' || echo '$(srcdir)/'`warc.c
+
+libunittest_a-warc.obj: warc.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libunittest_a-warc.obj -MD -MP -MF $(DEPDIR)/libunittest_a-warc.Tpo -c -o libunittest_a-warc.obj `if test -f 'warc.c'; then $(CYGPATH_W) 'warc.c'; else $(CYGPATH_W) '$(srcdir)/warc.c'; fi`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libunittest_a-warc.Tpo $(DEPDIR)/libunittest_a-warc.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='warc.c' object='libunittest_a-warc.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libunittest_a-warc.obj `if test -f 'warc.c'; then $(CYGPATH_W) 'warc.c'; else $(CYGPATH_W) '$(srcdir)/warc.c'; fi`
+
libunittest_a-utils.o: utils.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libunittest_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libunittest_a-utils.o -MD -MP -MF $(DEPDIR)/libunittest_a-utils.Tpo -c -o libunittest_a-utils.o `test -f 'utils.c' || echo '$(srcdir)/'`utils.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libunittest_a-utils.Tpo $(DEPDIR)/libunittest_a-utils.Po
@@ -1855,10 +1970,15 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/build_info.c b/src/build_info.c
index f046444..8ac66c4 100644
--- a/src/build_info.c
+++ b/src/build_info.c
@@ -60,7 +60,7 @@ const char *compiled_features[] =
"-opie",
#endif
-#if defined HAVE_LIBSSL
+#if defined HAVE_LIBSSL || defined HAVE_LIBSSL32
"+ssl/openssl",
#elif defined HAVE_LIBGNUTLS
"+ssl/gnutls",
diff --git a/src/build_info.c.in b/src/build_info.c.in
index 892962a..c0b1677 100644
--- a/src/build_info.c.in
+++ b/src/build_info.c.in
@@ -9,5 +9,5 @@ ntlm defined ENABLE_NTLM
opie defined ENABLE_OPIE
ssl choice:
- openssl defined HAVE_LIBSSL
+ openssl defined HAVE_LIBSSL || defined HAVE_LIBSSL32
gnutls defined HAVE_LIBGNUTLS
diff --git a/src/config.h.in b/src/config.h.in
index 2763488..123560b 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -26,6 +26,13 @@
/* Define to 1 if using `alloca.c'. */
#undef C_ALLOCA
+/* Define as the bit index in the word where to find bit 0 of the exponent of
+ 'double'. */
+#undef DBL_EXPBIT0_BIT
+
+/* Define as the word index where to find the exponent of 'double'. */
+#undef DBL_EXPBIT0_WORD
+
/* Define to 1 if // is a file system root distinct from /. */
#undef DOUBLE_SLASH_IS_DISTINCT_ROOT
@@ -54,10 +61,16 @@
/* Define this to 1 if F_DUPFD behavior does not match POSIX */
#undef FCNTL_DUPFD_BUGGY
+/* Define to 1 if the system's ftello function has the Solaris bug. */
+#undef FTELLO_BROKEN_AFTER_SWITCHING_FROM_READ_TO_WRITE
+
/* Define to 1 if mkdir mistakenly creates a directory given with a trailing
dot component. */
#undef FUNC_MKDIR_DOT_BUG
+/* Define to 1 if nl_langinfo (YESEXPR) returns a non-empty string. */
+#undef FUNC_NL_LANGINFO_YESEXPR_WORKS
+
/* Define to 1 if futimesat mishandles a NULL file name. */
#undef FUTIMESAT_NULL_BUG
@@ -73,10 +86,22 @@
#undef GNULIB_FD_SAFER_FLAG
/* Define to a C preprocessor expression that evaluates to 1 or 0, depending
+ whether the gnulib module fscanf shall be considered present. */
+#undef GNULIB_FSCANF
+
+/* Define to a C preprocessor expression that evaluates to 1 or 0, depending
+ whether the gnulib module malloc-gnu shall be considered present. */
+#undef GNULIB_MALLOC_GNU
+
+/* Define to a C preprocessor expression that evaluates to 1 or 0, depending
whether the gnulib module pipe2-safer shall be considered present. */
#undef GNULIB_PIPE2_SAFER
/* Define to a C preprocessor expression that evaluates to 1 or 0, depending
+ whether the gnulib module scanf shall be considered present. */
+#undef GNULIB_SCANF
+
+/* Define to a C preprocessor expression that evaluates to 1 or 0, depending
whether the gnulib module sigpipe shall be considered present. */
#undef GNULIB_SIGPIPE
@@ -94,6 +119,9 @@
/* Define to 1 when the gnulib module bind should be tested. */
#undef GNULIB_TEST_BIND
+/* Define to 1 when the gnulib module btowc should be tested. */
+#undef GNULIB_TEST_BTOWC
+
/* Define to 1 when the gnulib module cloexec should be tested. */
#undef GNULIB_TEST_CLOEXEC
@@ -118,6 +146,15 @@
/* Define to 1 when the gnulib module fseeko should be tested. */
#undef GNULIB_TEST_FSEEKO
+/* Define to 1 when the gnulib module fstat should be tested. */
+#undef GNULIB_TEST_FSTAT
+
+/* Define to 1 when the gnulib module ftell should be tested. */
+#undef GNULIB_TEST_FTELL
+
+/* Define to 1 when the gnulib module ftello should be tested. */
+#undef GNULIB_TEST_FTELLO
+
/* Define to 1 when the gnulib module futimens should be tested. */
#undef GNULIB_TEST_FUTIMENS
@@ -151,6 +188,9 @@
/* Define to 1 when the gnulib module listen should be tested. */
#undef GNULIB_TEST_LISTEN
+/* Define to 1 when the gnulib module localeconv should be tested. */
+#undef GNULIB_TEST_LOCALECONV
+
/* Define to 1 when the gnulib module lseek should be tested. */
#undef GNULIB_TEST_LSEEK
@@ -172,6 +212,12 @@
/* Define to 1 when the gnulib module memchr should be tested. */
#undef GNULIB_TEST_MEMCHR
+/* Define to 1 when the gnulib module mkstemp should be tested. */
+#undef GNULIB_TEST_MKSTEMP
+
+/* Define to 1 when the gnulib module nl_langinfo should be tested. */
+#undef GNULIB_TEST_NL_LANGINFO
+
/* Define to 1 when the gnulib module open should be tested. */
#undef GNULIB_TEST_OPEN
@@ -217,6 +263,9 @@
tested. */
#undef GNULIB_TEST_POSIX_SPAWN_FILE_ACTIONS_INIT
+/* Define to 1 when the gnulib module raise should be tested. */
+#undef GNULIB_TEST_RAISE
+
/* Define to 1 when the gnulib module rawmemchr should be tested. */
#undef GNULIB_TEST_RAWMEMCHR
@@ -262,12 +311,21 @@
/* Define to 1 when the gnulib module strerror_r should be tested. */
#undef GNULIB_TEST_STRERROR_R
+/* Define to 1 when the gnulib module strtok_r should be tested. */
+#undef GNULIB_TEST_STRTOK_R
+
/* Define to 1 when the gnulib module vasprintf should be tested. */
#undef GNULIB_TEST_VASPRINTF
+/* Define to 1 when the gnulib module vsnprintf should be tested. */
+#undef GNULIB_TEST_VSNPRINTF
+
/* Define to 1 when the gnulib module waitpid should be tested. */
#undef GNULIB_TEST_WAITPID
+/* Define to 1 when the gnulib module wcrtomb should be tested. */
+#undef GNULIB_TEST_WCRTOMB
+
/* Define to 1 when the gnulib module write should be tested. */
#undef GNULIB_TEST_WRITE
@@ -285,6 +343,9 @@
/* Define to 1 if you have the <bp-sym.h> header file. */
#undef HAVE_BP_SYM_H
+/* Define to 1 if you have the `btowc' function. */
+#undef HAVE_BTOWC
+
/* Define to 1 if you have the `catgets' function. */
#undef HAVE_CATGETS
@@ -353,6 +414,10 @@
*/
#undef HAVE_DECL_FSEEKO
+/* Define to 1 if you have the declaration of `ftello', and to 0 if you don't.
+ */
+#undef HAVE_DECL_FTELLO
+
/* Define to 1 if you have the declaration of `funlockfile', and to 0 if you
don't. */
#undef HAVE_DECL_FUNLOCKFILE
@@ -409,6 +474,10 @@
don't. */
#undef HAVE_DECL_INET_NTOP
+/* Define to 1 if you have the declaration of `isblank', and to 0 if you
+ don't. */
+#undef HAVE_DECL_ISBLANK
+
/* Define to 1 if you have the declaration of `mbrtowc', and to 0 if you
don't. */
#undef HAVE_DECL_MBRTOWC
@@ -437,10 +506,22 @@
don't. */
#undef HAVE_DECL_STRNCASECMP
+/* Define to 1 if you have the declaration of `strtok_r', and to 0 if you
+ don't. */
+#undef HAVE_DECL_STRTOK_R
+
/* Define to 1 if you have the declaration of `towlower', and to 0 if you
don't. */
#undef HAVE_DECL_TOWLOWER
+/* Define to 1 if you have the declaration of `vsnprintf', and to 0 if you
+ don't. */
+#undef HAVE_DECL_VSNPRINTF
+
+/* Define to 1 if you have the declaration of `wcrtomb', and to 0 if you
+ don't. */
+#undef HAVE_DECL_WCRTOMB
+
/* Define to 1 if you have the declaration of `_snprintf', and to 0 if you
don't. */
#undef HAVE_DECL__SNPRINTF
@@ -560,12 +641,21 @@
/* Define to 1 if you have the `isatty' function. */
#undef HAVE_ISATTY
+/* Define to 1 if you have the `isblank' function. */
+#undef HAVE_ISBLANK
+
/* Define to 1 if you have the `iswcntrl' function. */
#undef HAVE_ISWCNTRL
+/* Define to 1 if you have the `iswctype' function. */
+#undef HAVE_ISWCTYPE
+
/* Define if you have <langinfo.h> and nl_langinfo(CODESET). */
#undef HAVE_LANGINFO_CODESET
+/* Define to 1 if you have the <langinfo.h> header file. */
+#undef HAVE_LANGINFO_H
+
/* Define to 1 if you have the `dl' library (-ldl). */
#undef HAVE_LIBDL
@@ -581,9 +671,15 @@
/* Define to 1 if you have the `gpg-error' library (-lgpg-error). */
#undef HAVE_LIBGPG_ERROR
+/* Define to 1 if you have the <libintl.h> header file. */
+#undef HAVE_LIBINTL_H
+
/* Define to 1 if you have the `nsl' library (-lnsl). */
#undef HAVE_LIBNSL
+/* Define if libpcre is available. */
+#undef HAVE_LIBPCRE
+
/* Define to 1 if you have the `rt' library (-lrt). */
#undef HAVE_LIBRT
@@ -593,10 +689,16 @@
/* Define if you have the libssl library. */
#undef HAVE_LIBSSL
+/* Define to 1 if you have the `ssl32' library (-lssl32). */
+#undef HAVE_LIBSSL32
+
+/* Define if libuuid is available. */
+#undef HAVE_LIBUUID
+
/* Define to 1 if you have the `z' library (-lz). */
#undef HAVE_LIBZ
-/* Define to 1 if the system has the type `long long int'. */
+/* Define to 1 if the system has the type 'long long int'. */
#undef HAVE_LONG_LONG_INT
/* Define to 1 if you have the `lstat' function. */
@@ -605,6 +707,10 @@
/* Define to 1 if you have the `lutimes' function. */
#undef HAVE_LUTIMES
+/* Define to 1 if your system has a GNU libc compatible 'malloc' function, and
+ to 0 otherwise. */
+#undef HAVE_MALLOC_GNU
+
/* Define if the 'malloc' function is POSIX compliant. */
#undef HAVE_MALLOC_POSIX
@@ -630,12 +736,19 @@
/* Define to 1 if you have the `memrchr' function. */
#undef HAVE_MEMRCHR
+/* Define to 1 if you have the `mkstemp' function. */
+#undef HAVE_MKSTEMP
+
/* Define to 1 if you have a working `mmap' system call. */
#undef HAVE_MMAP
/* Define to 1 if you have the `mprotect' function. */
#undef HAVE_MPROTECT
+/* Define to 1 on MSVC platforms that have the "invalid parameter handler"
+ concept. */
+#undef HAVE_MSVC_INVALID_PARAMETER_HANDLER
+
/* Define if you have the nanosleep function. */
#undef HAVE_NANOSLEEP
@@ -648,6 +761,9 @@
/* Define to 1 if you have the <netinet/in.h> header file. */
#undef HAVE_NETINET_IN_H
+/* Define to 1 if you have the `nl_langinfo' function. */
+#undef HAVE_NL_LANGINFO
+
/* Define to 1 if you have the <paths.h> header file. */
#undef HAVE_PATHS_H
@@ -675,6 +791,9 @@
/* Define to 1 if you have the <pwd.h> header file. */
#undef HAVE_PWD_H
+/* Define to 1 if you have the `raise' function. */
+#undef HAVE_RAISE
+
/* Define to 1 if you have the `rawmemchr' function. */
#undef HAVE_RAWMEMCHR
@@ -697,6 +816,9 @@
macros. */
#undef HAVE_RAW_DECL_CANONICALIZE_FILE_NAME
+/* Define to 1 if chdir is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_CHDIR
+
/* Define to 1 if chown is declared even after undefining macros. */
#undef HAVE_RAW_DECL_CHOWN
@@ -706,12 +828,18 @@
/* Define to 1 if dprintf is declared even after undefining macros. */
#undef HAVE_RAW_DECL_DPRINTF
+/* Define to 1 if dup is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_DUP
+
/* Define to 1 if dup2 is declared even after undefining macros. */
#undef HAVE_RAW_DECL_DUP2
/* Define to 1 if dup3 is declared even after undefining macros. */
#undef HAVE_RAW_DECL_DUP3
+/* Define to 1 if duplocale is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_DUPLOCALE
+
/* Define to 1 if endusershell is declared even after undefining macros. */
#undef HAVE_RAW_DECL_ENDUSERSHELL
@@ -736,6 +864,9 @@
/* Define to 1 if fcntl is declared even after undefining macros. */
#undef HAVE_RAW_DECL_FCNTL
+/* Define to 1 if fdatasync is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_FDATASYNC
+
/* Define to 1 if ffs is declared even after undefining macros. */
#undef HAVE_RAW_DECL_FFS
@@ -754,6 +885,9 @@
/* Define to 1 if fseeko is declared even after undefining macros. */
#undef HAVE_RAW_DECL_FSEEKO
+/* Define to 1 if fstat is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_FSTAT
+
/* Define to 1 if fstatat is declared even after undefining macros. */
#undef HAVE_RAW_DECL_FSTATAT
@@ -814,6 +948,9 @@
/* Define to 1 if getpeername is declared even after undefining macros. */
#undef HAVE_RAW_DECL_GETPEERNAME
+/* Define to 1 if gets is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_GETS
+
/* Define to 1 if getsockname is declared even after undefining macros. */
#undef HAVE_RAW_DECL_GETSOCKNAME
@@ -841,12 +978,18 @@
/* Define to 1 if inet_pton is declared even after undefining macros. */
#undef HAVE_RAW_DECL_INET_PTON
-/* Define to 1 if initstat_r is declared even after undefining macros. */
-#undef HAVE_RAW_DECL_INITSTAT_R
+/* Define to 1 if initstate is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_INITSTATE
+
+/* Define to 1 if initstate_r is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_INITSTATE_R
/* Define to 1 if ioctl is declared even after undefining macros. */
#undef HAVE_RAW_DECL_IOCTL
+/* Define to 1 if isatty is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_ISATTY
+
/* Define to 1 if iswctype is declared even after undefining macros. */
#undef HAVE_RAW_DECL_ISWCTYPE
@@ -925,9 +1068,15 @@
/* Define to 1 if mkstemps is declared even after undefining macros. */
#undef HAVE_RAW_DECL_MKSTEMPS
+/* Define to 1 if nl_langinfo is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_NL_LANGINFO
+
/* Define to 1 if openat is declared even after undefining macros. */
#undef HAVE_RAW_DECL_OPENAT
+/* Define to 1 if pclose is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_PCLOSE
+
/* Define to 1 if pipe is declared even after undefining macros. */
#undef HAVE_RAW_DECL_PIPE
@@ -937,6 +1086,9 @@
/* Define to 1 if popen is declared even after undefining macros. */
#undef HAVE_RAW_DECL_POPEN
+/* Define to 1 if posix_openpt is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_POSIX_OPENPT
+
/* Define to 1 if posix_spawn is declared even after undefining macros. */
#undef HAVE_RAW_DECL_POSIX_SPAWN
@@ -1031,9 +1183,15 @@
/* Define to 1 if ptsname is declared even after undefining macros. */
#undef HAVE_RAW_DECL_PTSNAME
+/* Define to 1 if ptsname_r is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_PTSNAME_R
+
/* Define to 1 if pwrite is declared even after undefining macros. */
#undef HAVE_RAW_DECL_PWRITE
+/* Define to 1 if random is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_RANDOM
+
/* Define to 1 if random_r is declared even after undefining macros. */
#undef HAVE_RAW_DECL_RANDOM_R
@@ -1076,9 +1234,18 @@
/* Define to 1 if setenv is declared even after undefining macros. */
#undef HAVE_RAW_DECL_SETENV
+/* Define to 1 if sethostname is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_SETHOSTNAME
+
+/* Define to 1 if setlocale is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_SETLOCALE
+
/* Define to 1 if setsockopt is declared even after undefining macros. */
#undef HAVE_RAW_DECL_SETSOCKOPT
+/* Define to 1 if setstate is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_SETSTATE
+
/* Define to 1 if setstate_r is declared even after undefining macros. */
#undef HAVE_RAW_DECL_SETSTATE_R
@@ -1121,6 +1288,9 @@
/* Define to 1 if socket is declared even after undefining macros. */
#undef HAVE_RAW_DECL_SOCKET
+/* Define to 1 if srandom is declared even after undefining macros. */
+#undef HAVE_RAW_DECL_SRANDOM
+
/* Define to 1 if srandom_r is declared even after undefining macros. */
#undef HAVE_RAW_DECL_SRANDOM_R
@@ -1454,12 +1624,18 @@
/* Define to 1 if you have the `strptime' function. */
#undef HAVE_STRPTIME
+/* Define to 1 if you have the `strtok_r' function. */
+#undef HAVE_STRTOK_R
+
/* Define to 1 if you have the `strtoll' function. */
#undef HAVE_STRTOLL
/* Define to 1 if the system has the type `struct addrinfo'. */
#undef HAVE_STRUCT_ADDRINFO
+/* Define to 1 if `decimal_point' is a member of `struct lconv'. */
+#undef HAVE_STRUCT_LCONV_DECIMAL_POINT
+
/* Define to 1 if `sa_sigaction' is a member of `struct sigaction'. */
#undef HAVE_STRUCT_SIGACTION_SA_SIGACTION
@@ -1568,7 +1744,7 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
-/* Define to 1 if the system has the type `unsigned long long int'. */
+/* Define to 1 if the system has the type 'unsigned long long int'. */
#undef HAVE_UNSIGNED_LONG_LONG_INT
/* Define to 1 if you have the `usleep' function. */
@@ -1607,6 +1783,9 @@
/* Define to 1 if you have the `wcrtomb' function. */
#undef HAVE_WCRTOMB
+/* Define to 1 if you have the `wcscoll' function. */
+#undef HAVE_WCSCOLL
+
/* Define to 1 if you have the `wcslen' function. */
#undef HAVE_WCSLEN
@@ -1634,21 +1813,40 @@
/* Define to 1 if O_NOFOLLOW works. */
#undef HAVE_WORKING_O_NOFOLLOW
+/* Define if you have the posix_spawn and posix_spawnp functions and they
+ work. */
+#undef HAVE_WORKING_POSIX_SPAWN
+
/* Define if utimes works properly. */
#undef HAVE_WORKING_UTIMES
/* Define to 1 if you have the <ws2tcpip.h> header file. */
#undef HAVE_WS2TCPIP_H
+/* Define to 1 if you have the <xlocale.h> header file. */
+#undef HAVE_XLOCALE_H
+
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
+/* Define to 1 if you have the `_fseeki64' function. */
+#undef HAVE__FSEEKI64
+
+/* Define to 1 if you have the `_ftelli64' function. */
+#undef HAVE__FTELLI64
+
/* Define to 1 if you have the `_ftime' function. */
#undef HAVE__FTIME
+/* Define to 1 if you have the `_set_invalid_parameter_handler' function. */
+#undef HAVE__SET_INVALID_PARAMETER_HANDLER
+
/* Define to 1 if you have the `__fsetlocking' function. */
#undef HAVE___FSETLOCKING
+/* Define to 1 if you have the `__secure_getenv' function. */
+#undef HAVE___SECURE_GETENV
+
/* Define to 1 if you have the `__xpg_strerror_r' function. */
#undef HAVE___XPG_STRERROR_R
@@ -1658,7 +1856,7 @@
/* Define to 1 if lseek does not detect pipes. */
#undef LSEEK_PIPE_BROKEN
-/* Define to 1 if `lstat' dereferences a symlink specified with a trailing
+/* Define to 1 if 'lstat' dereferences a symlink specified with a trailing
slash. */
#undef LSTAT_FOLLOWS_SLASHED_SYMLINK
@@ -1726,6 +1924,9 @@
slash */
#undef REPLACE_FUNC_STAT_FILE
+/* Define if nl_langinfo exists but is overridden by gnulib. */
+#undef REPLACE_NL_LANGINFO
+
/* Define to 1 if strerror(0) does not return a message implying success. */
#undef REPLACE_STRERROR_0
@@ -1767,9 +1968,9 @@
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
automatically deduced at runtime.
- STACK_DIRECTION > 0 => grows toward higher addresses
- STACK_DIRECTION < 0 => grows toward lower addresses
- STACK_DIRECTION = 0 => direction of growth unknown */
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
#undef STACK_DIRECTION
/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
@@ -1815,8 +2016,8 @@
safe for multithreaded apps. */
#undef USE_UNLOCKED_IO
-/* Define if the Win32 multithreading API can be used. */
-#undef USE_WIN32_THREADS
+/* Define if the native Windows multithreading API can be used. */
+#undef USE_WINDOWS_THREADS
/* Version number of package */
#undef VERSION
@@ -1848,7 +2049,7 @@
`char[]'. */
#undef YYTEXT_POINTER
-/* Enable large inode numbers on Mac OS X. */
+/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
@@ -1856,21 +2057,29 @@
/* Number of bits in a file offset, on hosts where this is settable. */
#undef _FILE_OFFSET_BITS
+/* Define to 1 if Gnulib overrides 'struct stat' on Windows so that struct
+ stat.st_size becomes 64-bit. */
+#undef _GL_WINDOWS_64_BIT_ST_SIZE
+
/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
#undef _LARGEFILE_SOURCE
/* Define for large files, on AIX-style hosts. */
#undef _LARGE_FILES
+/* Define to 1 on Solaris. */
+#undef _LCONV_C99
+
/* Define to 1 if on MINIX. */
#undef _MINIX
-/* The _Noreturn keyword of draft C1X. */
-#ifndef _Noreturn
+/* The _Noreturn keyword of C11. */
+#if ! (defined _Noreturn \
+ || (defined __STDC_VERSION__ && 201112 <= __STDC_VERSION__))
# if (3 <= __GNUC__ || (__GNUC__ == 2 && 8 <= __GNUC_MINOR__) \
|| 0x5110 <= __SUNPRO_C)
# define _Noreturn __attribute__ ((__noreturn__))
-# elif 1200 <= _MSC_VER
+# elif defined _MSC_VER && 1200 <= _MSC_VER
# define _Noreturn __declspec (noreturn)
# else
# define _Noreturn
@@ -1886,9 +2095,16 @@
functions. */
#undef _POSIX_PII_SOCKET
-/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* Define to 1 if you need to in order for 'stat' and other things to work. */
#undef _POSIX_SOURCE
+/* Define if you want <regex.h> to include <limits.h>, so that it consistently
+ overrides <limits.h>'s RE_DUP_MAX. */
+#undef _REGEX_INCLUDE_LIMITS_H
+
+/* Define if you want regoff_t to be at least as wide POSIX requires. */
+#undef _REGEX_LARGE_OFFSETS
+
/* Define to 500 only on HP-UX. */
#undef _XOPEN_SOURCE
@@ -1896,7 +2112,7 @@
#ifndef _ALL_SOURCE
# undef _ALL_SOURCE
#endif
-/* Enable general extensions on MacOS X. */
+/* Enable general extensions on Mac OS X. */
#ifndef _DARWIN_C_SOURCE
# undef _DARWIN_C_SOURCE
#endif
@@ -1925,6 +2141,35 @@
/* Define to empty if `const' does not conform to ANSI C. */
#undef const
+/* _GL_INLINE is a portable alternative to ISO C99 plain 'inline'.
+ _GL_EXTERN_INLINE is a portable alternative to 'extern inline'.
+ _GL_INLINE_HEADER_BEGIN contains useful stuff to put
+ in an include file, before uses of _GL_INLINE.
+ It suppresses GCC's bogus "no previous prototype for 'FOO'" diagnostic,
+ when FOO is an inline function in the header; see
+ <http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54113>.
+ _GL_INLINE_HEADER_END contains useful stuff to put
+ in the same include file, after uses of _GL_INLINE. */
+#if __GNUC__ ? __GNUC_STDC_INLINE__ : 199901L <= __STDC_VERSION__
+# define _GL_INLINE inline
+# define _GL_EXTERN_INLINE extern inline
+# if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
+# define _GL_INLINE_HEADER_BEGIN \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wmissing-prototypes\"")
+# define _GL_INLINE_HEADER_END \
+ _Pragma ("GCC diagnostic pop")
+# endif
+#else
+# define _GL_INLINE static inline
+# define _GL_EXTERN_INLINE static inline
+#endif
+
+#ifndef _GL_INLINE_HEADER_BEGIN
+# define _GL_INLINE_HEADER_BEGIN
+# define _GL_INLINE_HEADER_END
+#endif
+
/* Define to a replacement function name for getpass(). */
#undef getpass
@@ -1946,7 +2191,7 @@
/* Work around a bug in Apple GCC 4.0.1 build 5465: In C99 mode, it supports
the ISO C 99 semantics of 'extern inline' (unlike the GNU C semantics of
earlier versions), but does not display it by setting __GNUC_STDC_INLINE__.
- __APPLE__ && __MACH__ test for MacOS X.
+ __APPLE__ && __MACH__ test for Mac OS X.
__APPLE_CC__ tests for the Apple compiler and its version.
__STDC_VERSION__ tests for the C99 mode. */
#if defined __APPLE__ && defined __MACH__ && __APPLE_CC__ >= 5465 && !defined __cplusplus && __STDC_VERSION__ >= 199901L && !defined __GNUC_STDC_INLINE__
@@ -1972,6 +2217,51 @@
doesn't define it. */
#undef ptrdiff_t
+/* Define to rpl_re_comp if the replacement should be used. */
+#undef re_comp
+
+/* Define to rpl_re_compile_fastmap if the replacement should be used. */
+#undef re_compile_fastmap
+
+/* Define to rpl_re_compile_pattern if the replacement should be used. */
+#undef re_compile_pattern
+
+/* Define to rpl_re_exec if the replacement should be used. */
+#undef re_exec
+
+/* Define to rpl_re_match if the replacement should be used. */
+#undef re_match
+
+/* Define to rpl_re_match_2 if the replacement should be used. */
+#undef re_match_2
+
+/* Define to rpl_re_search if the replacement should be used. */
+#undef re_search
+
+/* Define to rpl_re_search_2 if the replacement should be used. */
+#undef re_search_2
+
+/* Define to rpl_re_set_registers if the replacement should be used. */
+#undef re_set_registers
+
+/* Define to rpl_re_set_syntax if the replacement should be used. */
+#undef re_set_syntax
+
+/* Define to rpl_re_syntax_options if the replacement should be used. */
+#undef re_syntax_options
+
+/* Define to rpl_regcomp if the replacement should be used. */
+#undef regcomp
+
+/* Define to rpl_regerror if the replacement should be used. */
+#undef regerror
+
+/* Define to rpl_regexec if the replacement should be used. */
+#undef regexec
+
+/* Define to rpl_regfree if the replacement should be used. */
+#undef regfree
+
/* Define to the equivalent of the C99 'restrict' keyword, or to
nothing if this is not supported. Do not define if restrict is
supported directly. */
@@ -1987,7 +2277,7 @@
#endif
/* Define as an integer type suitable for memory locations that can be
- accessed atomically even in the presence of asynchnonous signals. */
+ accessed atomically even in the presence of asynchronous signals. */
#undef sig_atomic_t
/* Define to `unsigned int' if <sys/types.h> does not define. */
@@ -1996,6 +2286,9 @@
/* type to use in place of socklen_t if not defined */
#undef socklen_t
+/* Define as a signed type of the same size as size_t. */
+#undef ssize_t
+
/* Define to `int' if <sys/types.h> doesn't define. */
#undef uid_t
diff --git a/src/connect.c b/src/connect.c
index e12c049..119ccb7 100644
--- a/src/connect.c
+++ b/src/connect.c
@@ -53,9 +53,7 @@ as that of the covered work. */
#include <errno.h>
#include <string.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
+#include <sys/time.h>
#include "utils.h"
#include "host.h"
#include "connect.h"
@@ -293,7 +291,12 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
xfree (str);
}
else
- logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
+ {
+ if (ip->family == AF_INET)
+ logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
+ else if (ip->family == AF_INET6)
+ logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
+ }
}
/* Store the sockaddr info to SA. */
@@ -581,6 +584,36 @@ socket_ip_address (int sock, ip_address *ip, int endpoint)
}
}
+/* Get the socket family of connection on FD and store
+ Return family type on success, -1 otherwise.
+
+ If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
+ (client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it
+ returns the sock family of the remote (peer's) side of the socket. */
+
+int
+socket_family (int sock, int endpoint)
+{
+ struct sockaddr_storage storage;
+ struct sockaddr *sockaddr = (struct sockaddr *) &storage;
+ socklen_t addrlen = sizeof (storage);
+ int ret;
+
+ memset (sockaddr, 0, addrlen);
+
+ if (endpoint == ENDPOINT_LOCAL)
+ ret = getsockname (sock, sockaddr, &addrlen);
+ else if (endpoint == ENDPOINT_PEER)
+ ret = getpeername (sock, sockaddr, &addrlen);
+ else
+ abort ();
+
+ if (ret < 0)
+ return -1;
+
+ return sockaddr->sa_family;
+}
+
/* Return true if the error from the connect code can be considered
retryable. Wget normally retries after errors, but the exception
are the "unsupported protocol" type errors (possible on IPv4/IPv6
diff --git a/src/connect.h b/src/connect.h
index 20bb243..bb3f26a 100644
--- a/src/connect.h
+++ b/src/connect.h
@@ -51,6 +51,7 @@ enum {
ENDPOINT_PEER
};
bool socket_ip_address (int, ip_address *, int);
+int socket_family (int sock, int endpoint);
bool retryable_socket_connect_error (int);
diff --git a/src/convert.c b/src/convert.c
index c6ccf53..f5a9cba 100644
--- a/src/convert.c
+++ b/src/convert.c
@@ -58,7 +58,7 @@ struct hash_table *downloaded_css_set;
static void convert_links (const char *, struct urlpos *);
-void
+static void
convert_links_in_hashtable (struct hash_table *downloaded_set,
int is_css,
int *file_count)
@@ -124,6 +124,9 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
set_uri_encoding (pi, opt.locale, true);
u = url_parse (cur_url->url->url, NULL, pi, true);
+ if (!u)
+ continue;
+
local_name = hash_table_get (dl_url_file_map, u->url);
/* Decide on the conversion type. */
@@ -870,7 +873,7 @@ register_delete_file (const char *file)
/* Register that FILE is an HTML file that has been downloaded. */
void
-register_html (const char *url, const char *file)
+register_html (const char *file)
{
if (!downloaded_html_set)
downloaded_html_set = make_string_hash_table (0);
@@ -880,7 +883,7 @@ register_html (const char *url, const char *file)
/* Register that FILE is a CSS file that has been downloaded. */
void
-register_css (const char *url, const char *file)
+register_css (const char *file)
{
if (!downloaded_css_set)
downloaded_css_set = make_string_hash_table (0);
diff --git a/src/convert.h b/src/convert.h
index 1f034e5..cdd0a48 100644
--- a/src/convert.h
+++ b/src/convert.h
@@ -101,8 +101,8 @@ downloaded_file_t downloaded_file (downloaded_file_t, const char *);
void register_download (const char *, const char *);
void register_redirection (const char *, const char *);
-void register_html (const char *, const char *);
-void register_css (const char *, const char *);
+void register_html (const char *);
+void register_css (const char *);
void register_delete_file (const char *);
void convert_all_links (void);
void convert_cleanup (void);
diff --git a/src/cookies.c b/src/cookies.c
index 7c3fb1c..a10971c 100644
--- a/src/cookies.c
+++ b/src/cookies.c
@@ -391,6 +391,9 @@ parse_set_cookie (const char *set_cookie, bool silent)
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
+ /* Check if expiration spec is valid.
+ If not, assume default (cookie doesn't expire, but valid only for
+ this session.) */
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
{
@@ -402,10 +405,6 @@ parse_set_cookie (const char *set_cookie, bool silent)
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
}
- else
- /* Error in expiration spec. Assume default (cookie doesn't
- expire, but valid only for this session.) */
- ;
}
else if (TOKEN_IS (name, "max-age"))
{
@@ -433,9 +432,7 @@ parse_set_cookie (const char *set_cookie, bool silent)
/* ignore value completely */
cookie->secure = 1;
}
- else
- /* Ignore unrecognized attribute. */
- ;
+ /* else: Ignore unrecognized attribute. */
}
if (*ptr)
/* extract_param has encountered a syntax error */
diff --git a/src/css-url.c b/src/css-url.c
index de1caad..f97690d 100644
--- a/src/css-url.c
+++ b/src/css-url.c
@@ -55,6 +55,7 @@ as that of the covered work. */
#include "convert.h"
#include "html-url.h"
#include "css-tokens.h"
+#include "css-url.h"
/* from lex.yy.c */
extern char *yytext;
@@ -107,7 +108,7 @@ const char *token_names[] = {
whitespace after the opening parenthesis and before the closing
parenthesis.
*/
-char *
+static char *
get_uri_string (const char *at, int *pos, int *length)
{
char *uri;
diff --git a/src/css-url.h b/src/css-url.h
index 8d32c34..7f940e6 100644
--- a/src/css-url.h
+++ b/src/css-url.h
@@ -31,6 +31,7 @@ as that of the covered work. */
#define CSS_URL_H
void get_urls_css (struct map_context *, int, int);
+void get_urls_css (struct map_context *, int, int);
struct urlpos *get_urls_css_file (const char *, const char *);
#endif /* CSS_URL_H */
diff --git a/src/exits.c b/src/exits.c
index 3d846b5..e23fc1c 100644
--- a/src/exits.c
+++ b/src/exits.c
@@ -1,7 +1,5 @@
-/* Command line parsing.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
- Inc.
+/* Exit status handling.
+ Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -60,7 +58,7 @@ get_status_for_err (uerr_t err)
case RETROK:
return WGET_EXIT_SUCCESS;
case FOPENERR: case FOPEN_EXCL_ERR: case FWRITEERR: case WRITEFAILED:
- case UNLINKERR:
+ case UNLINKERR: case CLOSEFAILED:
return WGET_EXIT_IO_FAIL;
case NOCONERROR: case HOSTERR: case CONSOCKERR: case CONERROR:
case CONSSLERR: case CONIMPOSSIBLE: case FTPRERR: case FTPINVPASV:
diff --git a/src/exits.h b/src/exits.h
index dfe9516..98dde9a 100644
--- a/src/exits.h
+++ b/src/exits.h
@@ -1,5 +1,5 @@
-/* Internationalization related declarations.
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+/* Exit status related declarations.
+ Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
diff --git a/src/ftp-basic.c b/src/ftp-basic.c
index 178fdfe..045d125 100644
--- a/src/ftp-basic.c
+++ b/src/ftp-basic.c
@@ -524,7 +524,10 @@ ftp_pasv (int csock, ip_address *addr, int *port)
for (s += 4; *s && !c_isdigit (*s); s++)
;
if (!*s)
- return FTPINVPASV;
+ {
+ xfree (respline);
+ return FTPINVPASV;
+ }
for (i = 0; i < 6; i++)
{
tmp[i] = 0;
@@ -593,7 +596,10 @@ ftp_lpsv (int csock, ip_address *addr, int *port)
for (s += 4; *s && !c_isdigit (*s); s++)
;
if (!*s)
- return FTPINVPASV;
+ {
+ xfree (respline);
+ return FTPINVPASV;
+ }
/* First, get the address family */
af = 0;
diff --git a/src/ftp.c b/src/ftp.c
index a586d84..669e663 100644
--- a/src/ftp.c
+++ b/src/ftp.c
@@ -49,6 +49,7 @@ as that of the covered work. */
#include "netrc.h"
#include "convert.h" /* for downloaded_file */
#include "recur.h" /* for INFINITE_RECURSION */
+#include "warc.h"
#ifdef __VMS
# include "vms.h"
@@ -237,17 +238,17 @@ static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **);
/* Retrieves a file with denoted parameters through opening an FTP
connection to the server. It always closes the data connection,
- and closes the control connection in case of error. */
+ and closes the control connection in case of error. If warc_tmp
+ is non-NULL, the downloaded data will be written there as well. */
static uerr_t
getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
- wgint restval, ccon *con, int count)
+ wgint restval, ccon *con, int count, FILE *warc_tmp)
{
int csock, dtsock, local_sock, res;
uerr_t err = RETROK; /* appease the compiler */
FILE *fp;
- char *user, *passwd, *respline;
- char *tms;
- const char *tmrate;
+ char *respline, *tms;
+ const char *user, *passwd, *tmrate;
int cmd = con->cmd;
bool pasv_mode_open = false;
wgint expected_bytes = 0;
@@ -287,13 +288,6 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
{
char *host = con->proxy ? con->proxy->host : u->host;
int port = con->proxy ? con->proxy->port : u->port;
- char *logname = user;
-
- if (con->proxy)
- {
- /* If proxy is in use, log in as username@target-site. */
- logname = concat_strings (user, "@", u->host, (char *) 0);
- }
/* Login to the server: */
@@ -301,20 +295,10 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
csock = connect_to_host (host, port);
if (csock == E_HOST)
- {
- if (con->proxy)
- xfree (logname);
-
return HOSTERR;
- }
else if (csock < 0)
- {
- if (con->proxy)
- xfree (logname);
-
return (retryable_socket_connect_error (errno)
? CONERROR : CONIMPOSSIBLE);
- }
if (cmd & LEAVE_PENDING)
con->csock = csock;
@@ -326,10 +310,15 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
quotearg_style (escape_quoting_style, user));
if (opt.server_response)
logputs (LOG_ALWAYS, "\n");
- err = ftp_login (csock, logname, passwd);
-
if (con->proxy)
- xfree (logname);
+ {
+ /* If proxy is in use, log in as username@target-site. */
+ char *logname = concat_strings (user, "@", u->host, (char *) 0);
+ err = ftp_login (csock, logname, passwd);
+ xfree (logname);
+ }
+ else
+ err = ftp_login (csock, user, passwd);
/* FTPRERR, FTPSRVERR, WRITEFAILED, FTPLOGREFUSED, FTPLOGINC */
switch (err)
@@ -512,7 +501,7 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, _("==> CWD not needed.\n"));
else
{
- char *targ = NULL;
+ const char *targ = NULL;
int cwd_count;
int cwd_end;
int cwd_start;
@@ -1152,13 +1141,25 @@ Error in server response, closing control connection.\n"));
Elsewhere, define a constant "binary" flag.
Isn't it nice to have distinct text and binary file types?
*/
-# define BIN_TYPE_TRANSFER (type_char != 'A')
+/* 2011-09-30 SMS.
+ Added listing files to the set of non-"binary" (text, Stream_LF)
+ files. (Wget works either way, but other programs, like, say, text
+ editors, work better on listing files which have text attributes.)
+ Now we use "binary" attributes for a binary ("IMAGE") transfer,
+ unless "--ftp-stmlf" was specified, and we always use non-"binary"
+ (text, Stream_LF) attributes for a listing file, or for an ASCII
+ transfer.
+ Tidied the VMS-specific BIN_TYPE_xxx macros, and changed the call to
+ fopen_excl() (restored?) to use BIN_TYPE_FILE instead of "true".
+*/
#ifdef __VMS
+# define BIN_TYPE_TRANSFER (type_char != 'A')
+# define BIN_TYPE_FILE \
+ ((!(cmd & DO_LIST)) && BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
# define FOPEN_OPT_ARGS_BIN "ctx=bin,stm", "rfm=fix", "mrs=512" FOPEN_OPT_ARGS
-# define BIN_TYPE_FILE (BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
#else /* def __VMS */
-# define BIN_TYPE_FILE 1
+# define BIN_TYPE_FILE true
#endif /* def __VMS [else] */
if (restval && !(con->cmd & DO_LIST))
@@ -1182,7 +1183,7 @@ Error in server response, closing control connection.\n"));
}
else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct
|| opt.output_document || count > 0)
- {
+ {
if (opt.unlink && file_exists_p (con->target))
{
int res = unlink (con->target);
@@ -1217,7 +1218,7 @@ Error in server response, closing control connection.\n"));
}
else
{
- fp = fopen_excl (con->target, true);
+ fp = fopen_excl (con->target, BIN_TYPE_FILE);
if (!fp && errno == EEXIST)
{
/* We cannot just invent a new name and use it (which is
@@ -1262,7 +1263,7 @@ Error in server response, closing control connection.\n"));
rd_size = 0;
res = fd_read_body (dtsock, fp,
expected_bytes ? expected_bytes - restval : 0,
- restval, &rd_size, qtyread, &con->dltime, flags);
+ restval, &rd_size, qtyread, &con->dltime, flags, warc_tmp);
tms = datetime_str (time (NULL));
tmrate = retr_rate (rd_size, con->dltime);
@@ -1273,15 +1274,18 @@ Error in server response, closing control connection.\n"));
if (!output_stream || con->cmd & DO_LIST)
fclose (fp);
- /* If fd_read_body couldn't write to fp, bail out. */
- if (res == -2)
+ /* If fd_read_body couldn't write to fp or warc_tmp, bail out. */
+ if (res == -2 || (warc_tmp != NULL && res == -3))
{
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
con->target, strerror (errno));
fd_close (csock);
con->csock = -1;
fd_close (dtsock);
- return FWRITEERR;
+ if (res == -2)
+ return FWRITEERR;
+ else if (res == -3)
+ return WARC_TMP_FWRITEERR;
}
else if (res == -1)
{
@@ -1397,6 +1401,11 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
uerr_t err;
struct_stat st;
+ /* Declare WARC variables. */
+ bool warc_enabled = (opt.warc_filename != NULL);
+ FILE *warc_tmp = NULL;
+ ip_address *warc_ip = NULL;
+
/* Get the target, and set the name for the message accordingly. */
if ((f == NULL) && (con->target))
{
@@ -1433,6 +1442,21 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
orig_lp = con->cmd & LEAVE_PENDING ? 1 : 0;
+ /* For file RETR requests, we can write a WARC record.
+ We record the file contents to a temporary file. */
+ if (warc_enabled && (con->cmd & DO_RETR))
+ {
+ warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ return WARC_TMP_FOPENERR;
+
+ if (!con->proxy && con->csock != -1)
+ {
+ warc_ip = (ip_address *) alloca (sizeof (ip_address));
+ socket_ip_address (con->csock, warc_ip, ENDPOINT_PEER);
+ }
+ }
+
/* THE loop. */
do
{
@@ -1497,7 +1521,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
len = f->size;
else
len = 0;
- err = getftp (u, len, &qtyread, restval, con, count);
+
+ /* If we are working on a WARC record, getftp should also write
+ to the warc_tmp file. */
+ err = getftp (u, len, &qtyread, restval, con, count, warc_tmp);
if (con->csock == -1)
con->st &= ~DONE_CWD;
@@ -1508,8 +1535,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
{
case HOSTERR: case CONIMPOSSIBLE: case FWRITEERR: case FOPENERR:
case FTPNSFOD: case FTPLOGINC: case FTPNOPASV: case CONTNOTSUPPORTED:
- case UNLINKERR:
+ case UNLINKERR: case WARC_TMP_FWRITEERR:
/* Fatal errors, give up. */
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
return err;
case CONSOCKERR: case CONERROR: case FTPSRVERR: case FTPRERR:
case WRITEFAILED: case FTPUNKNOWNTYPE: case FTPSYSERR:
@@ -1577,6 +1606,19 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
xfree (hurl);
}
+ if (warc_enabled && (con->cmd & DO_RETR))
+ {
+ /* Create and store a WARC resource record for the retrieved file. */
+ bool warc_res;
+
+ warc_res = warc_write_resource_record (NULL, u->url, NULL, NULL,
+ warc_ip, NULL, warc_tmp, -1);
+ if (! warc_res)
+ return WARC_ERR;
+
+ /* warc_write_resource_record has also closed warc_tmp. */
+ }
+
if ((con->cmd & DO_LIST))
/* This is a directory listing file. */
{
@@ -1880,8 +1922,10 @@ Already have correct symlink %s -> %s\n\n"),
set_local_file (&actual_target, con->target);
- /* If downloading a plain file, set valid (non-zero) permissions. */
- if (dlthis && (actual_target != NULL) && (f->type == FT_PLAINFILE))
+ /* If downloading a plain file, and the user requested it, then
+ set valid (non-zero) permissions. */
+ if (dlthis && (actual_target != NULL) &&
+ (f->type == FT_PLAINFILE) && opt.preserve_perm)
{
if (f->perms)
chmod (actual_target, f->perms);
@@ -1914,7 +1958,9 @@ Already have correct symlink %s -> %s\n\n"),
xfree (ofile);
/* Break on fatals. */
- if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
+ if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR
+ || err == WARC_ERR || err == WARC_TMP_FOPENERR
+ || err == WARC_TMP_FWRITEERR)
break;
con->cmd &= ~ (DO_CWD | DO_LOGIN);
f = f->next;
diff --git a/src/gnutls.c b/src/gnutls.c
index 40a04ef..32c6d17 100644
--- a/src/gnutls.c
+++ b/src/gnutls.c
@@ -1,5 +1,5 @@
/* SSL support via GnuTLS library.
- Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
Foundation, Inc.
This file is part of GNU Wget.
@@ -54,15 +54,38 @@ as that of the covered work. */
# include "w32sock.h"
#endif
+#include "host.h"
+
+static int
+key_type_to_gnutls_type (enum keyfile_type type)
+{
+ switch (type)
+ {
+ case keyfile_pem:
+ return GNUTLS_X509_FMT_PEM;
+ case keyfile_asn1:
+ return GNUTLS_X509_FMT_DER;
+ default:
+ abort ();
+ }
+}
+
/* Note: some of the functions private to this file have names that
begin with "wgnutls_" (e.g. wgnutls_read) so that they wouldn't be
confused with actual gnutls functions -- such as the gnutls_read
preprocessor macro. */
-static gnutls_certificate_credentials credentials;
+static gnutls_certificate_credentials_t credentials;
bool
-ssl_init ()
+ssl_init (void)
{
+ /* Becomes true if GnuTLS is initialized. */
+ static bool ssl_initialized = false;
+
+ /* GnuTLS should be initialized only once. */
+ if (ssl_initialized)
+ return true;
+
const char *ca_directory;
DIR *dir;
@@ -101,15 +124,48 @@ ssl_init ()
closedir (dir);
}
+ /* Use the private key from the cert file unless otherwise specified. */
+ if (opt.cert_file && !opt.private_key)
+ {
+ opt.private_key = opt.cert_file;
+ opt.private_key_type = opt.cert_type;
+ }
+ /* Use the cert from the private key file unless otherwise specified. */
+ if (!opt.cert_file && opt.private_key)
+ {
+ opt.cert_file = opt.private_key;
+ opt.cert_type = opt.private_key_type;
+ }
+
+ if (opt.cert_file && opt.private_key)
+ {
+ int type;
+ if (opt.private_key_type != opt.cert_type)
+ {
+ /* GnuTLS can't handle this */
+ logprintf (LOG_NOTQUIET, _("ERROR: GnuTLS requires the key and the \
+cert to be of the same type.\n"));
+ }
+
+ type = key_type_to_gnutls_type (opt.private_key_type);
+
+ gnutls_certificate_set_x509_key_file (credentials, opt.cert_file,
+ opt.private_key,
+ type);
+ }
+
if (opt.ca_cert)
gnutls_certificate_set_x509_trust_file (credentials, opt.ca_cert,
GNUTLS_X509_FMT_PEM);
+
+ ssl_initialized = true;
+
return true;
}
struct wgnutls_transport_context
{
- gnutls_session session; /* GnuTLS session handle */
+ gnutls_session_t session; /* GnuTLS session handle */
int last_error; /* last error returned by read/write/... */
/* Since GnuTLS doesn't support the equivalent to recv(...,
@@ -132,7 +188,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
int flags = 0;
#endif
int ret = 0;
- struct ptimer *timer;
+ struct ptimer *timer = NULL;
struct wgnutls_transport_context *ctx = arg;
int timed_out = 0;
@@ -142,64 +198,56 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
flags = fcntl (fd, F_GETFL, 0);
if (flags < 0)
return flags;
+ if (fcntl (fd, F_SETFL, flags | O_NONBLOCK))
+ return -1;
+#else
+ /* XXX: Assume it was blocking before. */
+ const int one = 1;
+ if (ioctl (fd, FIONBIO, &one) < 0)
+ return -1;
#endif
+
timer = ptimer_new ();
- if (timer == 0)
+ if (timer == NULL)
return -1;
}
do
{
- double next_timeout = timeout - ptimer_measure (timer);
- if (timeout && next_timeout < 0)
- break;
+ double next_timeout = 0;
+ if (timeout)
+ {
+ next_timeout = timeout - ptimer_measure (timer);
+ if (next_timeout < 0)
+ break;
+ }
ret = GNUTLS_E_AGAIN;
if (timeout == 0 || gnutls_record_check_pending (ctx->session)
|| select_fd (fd, next_timeout, WAIT_FOR_READ))
{
- if (timeout)
- {
-#ifdef F_GETFL
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
- if (ret < 0)
- return ret;
-#else
- /* XXX: Assume it was blocking before. */
- const int one = 1;
- ret = ioctl (fd, FIONBIO, &one);
- if (ret < 0)
- return ret;
-#endif
- }
-
ret = gnutls_record_recv (ctx->session, buf, bufsize);
-
- if (timeout)
- {
- int status;
-#ifdef F_GETFL
- status = fcntl (fd, F_SETFL, flags);
- if (status < 0)
- return status;
-#else
- const int zero = 0;
- status = ioctl (fd, FIONBIO, &zero);
- if (status < 0)
- return status;
-#endif
- }
+ timed_out = timeout && ptimer_measure (timer) >= timeout;
}
-
- timed_out = timeout && ptimer_measure (timer) >= timeout;
}
while (ret == GNUTLS_E_INTERRUPTED || (ret == GNUTLS_E_AGAIN && !timed_out));
if (timeout)
- ptimer_destroy (timer);
+ {
+ ptimer_destroy (timer);
- if (timeout && timed_out && ret == GNUTLS_E_AGAIN)
- errno = ETIMEDOUT;
+#ifdef F_GETFL
+ if (fcntl (fd, F_SETFL, flags) < 0)
+ return -1;
+#else
+ const int zero = 0;
+ if (ioctl (fd, FIONBIO, &zero) < 0)
+ return -1;
+#endif
+
+ if (timed_out && ret == GNUTLS_E_AGAIN)
+ errno = ETIMEDOUT;
+ }
return ret;
}
@@ -207,11 +255,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
static int
wgnutls_read (int fd, char *buf, int bufsize, void *arg)
{
-#ifdef F_GETFL
- int flags = 0;
-#endif
int ret = 0;
- struct ptimer *timer;
struct wgnutls_transport_context *ctx = arg;
if (ctx->peeklen)
@@ -250,8 +294,12 @@ static int
wgnutls_poll (int fd, double timeout, int wait_for, void *arg)
{
struct wgnutls_transport_context *ctx = arg;
- return ctx->peeklen || gnutls_record_check_pending (ctx->session)
- || select_fd (fd, timeout, wait_for);
+
+ if (timeout)
+ return ctx->peeklen || gnutls_record_check_pending (ctx->session)
+ || select_fd (fd, timeout, wait_for);
+ else
+ return ctx->peeklen || gnutls_record_check_pending (ctx->session);
}
static int
@@ -260,15 +308,19 @@ wgnutls_peek (int fd, char *buf, int bufsize, void *arg)
int read = 0;
struct wgnutls_transport_context *ctx = arg;
int offset = MIN (bufsize, ctx->peeklen);
- if (bufsize > sizeof ctx->peekbuf)
- bufsize = sizeof ctx->peekbuf;
if (ctx->peeklen)
- memcpy (buf, ctx->peekbuf, offset);
+ {
+ memcpy (buf, ctx->peekbuf, offset);
+ return offset;
+ }
+
+ if (bufsize > sizeof ctx->peekbuf)
+ bufsize = sizeof ctx->peekbuf;
if (bufsize > offset)
{
- if (gnutls_record_check_pending (ctx->session) <= 0
+ if (opt.read_timeout && gnutls_record_check_pending (ctx->session) == 0
&& select_fd (fd, 0.0, WAIT_FOR_READ) <= 0)
read = 0;
else
@@ -320,18 +372,26 @@ static struct transport_implementation wgnutls_transport =
};
bool
-ssl_connect_wget (int fd)
+ssl_connect_wget (int fd, const char *hostname)
{
struct wgnutls_transport_context *ctx;
- gnutls_session session;
+ gnutls_session_t session;
int err;
gnutls_init (&session, GNUTLS_CLIENT);
+
+ /* We set the server name but only if it's not an IP address. */
+ if (! is_valid_ip_address (hostname))
+ {
+ gnutls_server_name_set (session, GNUTLS_NAME_DNS, hostname,
+ strlen (hostname));
+ }
+
gnutls_set_default_priority (session);
gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, credentials);
#ifndef FD_TO_SOCKET
# define FD_TO_SOCKET(X) (X)
#endif
- gnutls_transport_set_ptr (session, (gnutls_transport_ptr) FD_TO_SOCKET (fd));
+ gnutls_transport_set_ptr (session, (gnutls_transport_ptr_t) FD_TO_SOCKET (fd));
err = 0;
#if HAVE_GNUTLS_PRIORITY_SET_DIRECT
@@ -438,8 +498,8 @@ ssl_check_certificate (int fd, const char *host)
if (gnutls_certificate_type_get (ctx->session) == GNUTLS_CRT_X509)
{
time_t now = time (NULL);
- gnutls_x509_crt cert;
- const gnutls_datum *cert_list;
+ gnutls_x509_crt_t cert;
+ const gnutls_datum_t *cert_list;
unsigned int cert_list_size;
if ((err = gnutls_x509_crt_init (&cert)) < 0)
diff --git a/src/hash.c b/src/hash.c
index 6c40801..129ead1 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -423,14 +423,14 @@ grow_hash_table (struct hash_table *ht)
table if necessary. */
void
-hash_table_put (struct hash_table *ht, const void *key, void *value)
+hash_table_put (struct hash_table *ht, const void *key, const void *value)
{
struct cell *c = find_cell (ht, key);
if (CELL_OCCUPIED (c))
{
/* update existing item */
c->key = (void *)key; /* const? */
- c->value = value;
+ c->value = (void *)value;
return;
}
@@ -445,7 +445,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
/* add new item */
++ht->count;
c->key = (void *)key; /* const? */
- c->value = value;
+ c->value = (void *)value;
}
/* Remove KEY->value mapping from HT. Return 0 if there was no such
diff --git a/src/hash.h b/src/hash.h
index 1dadf09..8576760 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -42,7 +42,7 @@ int hash_table_get_pair (const struct hash_table *, const void *,
void *, void *);
int hash_table_contains (const struct hash_table *, const void *);
-void hash_table_put (struct hash_table *, const void *, void *);
+void hash_table_put (struct hash_table *, const void *, const void *);
int hash_table_remove (struct hash_table *, const void *);
void hash_table_clear (struct hash_table *);
diff --git a/src/host.c b/src/host.c
index 86f107a..86bf83b 100644
--- a/src/host.c
+++ b/src/host.c
@@ -1,6 +1,6 @@
/* Host name resolution and matching.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -914,3 +914,18 @@ host_cleanup (void)
host_name_addresses_map = NULL;
}
}
+
+bool
+is_valid_ip_address (const char *name)
+{
+ const char *endp;
+
+ endp = name + strlen(name);
+ if (is_valid_ipv4_address (name, endp))
+ return true;
+#ifdef ENABLE_IPV6
+ if (is_valid_ipv6_address (name, endp))
+ return true;
+#endif
+ return false;
+}
diff --git a/src/host.h b/src/host.h
index 3f4a02a..3f27ea0 100644
--- a/src/host.h
+++ b/src/host.h
@@ -1,6 +1,6 @@
/* Declarations for host.c
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -98,6 +98,8 @@ const char *print_address (const ip_address *);
bool is_valid_ipv6_address (const char *, const char *);
#endif
+bool is_valid_ip_address (const char *name);
+
bool accept_domain (struct url *);
bool sufmatch (const char **, const char *);
diff --git a/src/html-parse.c b/src/html-parse.c
index 9fafd8f..20791cd 100644
--- a/src/html-parse.c
+++ b/src/html-parse.c
@@ -280,7 +280,7 @@ struct tagstack_item {
struct tagstack_item *next;
};
-struct tagstack_item *
+static struct tagstack_item *
tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
{
struct tagstack_item *ts = xmalloc(sizeof(struct tagstack_item));
@@ -301,7 +301,7 @@ tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
}
/* remove ts and everything after it from the stack */
-void
+static void
tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
struct tagstack_item *ts)
{
@@ -343,7 +343,7 @@ tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
}
}
-struct tagstack_item *
+static struct tagstack_item *
tagstack_find (struct tagstack_item *tail, const char *tagname_begin,
const char *tagname_end)
{
diff --git a/src/html-url.c b/src/html-url.c
index f5ab293..55563e2 100644
--- a/src/html-url.c
+++ b/src/html-url.c
@@ -1,6 +1,6 @@
/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -675,8 +675,9 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
check_style_attr (tag, ctx);
- if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
- tag->contents_begin && tag->contents_end)
+ if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style"))
+ && tag->contents_begin && tag->contents_end
+ && tag->contents_begin <= tag->contents_end)
{
/* parse contents */
get_urls_css (ctx, tag->contents_begin - ctx->text,
@@ -829,7 +830,7 @@ get_urls_file (const char *file)
return head;
}
-void
+static void
cleanup_html_url (void)
{
/* Destroy the hash tables. The hash table keys and values are not
diff --git a/src/http.c b/src/http.c
index 748b4e8..fa2d5ed 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,6 +1,6 @@
/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -58,6 +58,7 @@ as that of the covered work. */
#include "md5.h"
#include "convert.h"
#include "spider.h"
+#include "warc.h"
#ifdef TESTING
#include "test.h"
@@ -230,7 +231,7 @@ release_header (struct request_header *hdr)
*/
static void
-request_set_header (struct request *req, char *name, char *value,
+request_set_header (struct request *req, const char *name, const char *value,
enum rp release_policy)
{
struct request_header *hdr;
@@ -241,7 +242,7 @@ request_set_header (struct request *req, char *name, char *value,
/* A NULL value is a no-op; if freeing the name is requested,
free it now to avoid leaks. */
if (release_policy == rel_name || release_policy == rel_both)
- xfree (name);
+ xfree ((void *)name);
return;
}
@@ -252,8 +253,8 @@ request_set_header (struct request *req, char *name, char *value,
{
/* Replace existing header. */
release_header (hdr);
- hdr->name = name;
- hdr->value = value;
+ hdr->name = (void *)name;
+ hdr->value = (void *)value;
hdr->release_policy = release_policy;
return;
}
@@ -267,8 +268,8 @@ request_set_header (struct request *req, char *name, char *value,
req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
}
hdr = &req->headers[req->hcount++];
- hdr->name = name;
- hdr->value = value;
+ hdr->name = (void *)name;
+ hdr->value = (void *)value;
hdr->release_policy = release_policy;
}
@@ -295,7 +296,7 @@ request_set_user_header (struct request *req, const char *header)
the header was actually removed, false otherwise. */
static bool
-request_remove_header (struct request *req, char *name)
+request_remove_header (struct request *req, const char *name)
{
int i;
for (i = 0; i < req->hcount; i++)
@@ -320,10 +321,12 @@ request_remove_header (struct request *req, char *name)
p += A_len; \
} while (0)
-/* Construct the request and write it to FD using fd_write. */
+/* Construct the request and write it to FD using fd_write.
+ If warc_tmp is set to a file pointer, the request string will
+ also be written to that file. */
static int
-request_send (const struct request *req, int fd)
+request_send (const struct request *req, int fd, FILE *warc_tmp)
{
char *request_string, *p;
int i, size, write_error;
@@ -374,6 +377,13 @@ request_send (const struct request *req, int fd)
if (write_error < 0)
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
fd_errstr (fd));
+ else if (warc_tmp != NULL)
+ {
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
+ if (warc_tmp_written != size - 1)
+ return -2;
+ }
return write_error;
}
@@ -444,10 +454,12 @@ register_basic_auth_host (const char *hostname)
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
- longer, read only that much; if the file is shorter, report an error. */
+ longer, read only that much; if the file is shorter, report an error.
+ If warc_tmp is set to a file pointer, the post data will
+ also be written to that file. */
static int
-post_file (int sock, const char *file_name, wgint promised_size)
+post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
{
static char chunk[8192];
wgint written = 0;
@@ -472,6 +484,16 @@ post_file (int sock, const char *file_name, wgint promised_size)
fclose (fp);
return -1;
}
+ if (warc_tmp != NULL)
+ {
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
+ if (warc_tmp_written != towrite)
+ {
+ fclose (fp);
+ return -2;
+ }
+ }
written += towrite;
}
fclose (fp);
@@ -929,9 +951,12 @@ skip_short_body (int fd, wgint contlen, bool chunked)
break;
remaining_chunk_size = strtol (line, &endl, 16);
+ xfree (line);
+
if (remaining_chunk_size == 0)
{
- fd_read_line (fd);
+ line = fd_read_line (fd);
+ xfree_null (line);
break;
}
}
@@ -956,8 +981,13 @@ skip_short_body (int fd, wgint contlen, bool chunked)
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
- if (fd_read_line (fd) == NULL)
- return false;
+ {
+ char *line = fd_read_line (fd);
+ if (line == NULL)
+ return false;
+ else
+ xfree (line);
+ }
}
/* Safe even if %.*s bogusly expects terminating \0 because
@@ -1462,6 +1492,135 @@ File %s already there; not retrieving.\n\n"), quote (filename));
*dt |= TEXTHTML;
}
+/* Download the response body from the socket and writes it to
+ an output file. The headers have already been read from the
+ socket. If WARC is enabled, the response body will also be
+ written to a WARC response record.
+
+ hs, contlen, contrange, chunked_transfer_encoding and url are
+ parameters from the gethttp method. fp is a pointer to the
+ output file.
+
+ url, warc_timestamp_str, warc_request_uuid, warc_ip, type
+ and statcode will be saved in the headers of the WARC record.
+ The head parameter contains the HTTP headers of the response.
+
+ If fp is NULL and WARC is enabled, the response body will be
+ written only to the WARC file. If WARC is disabled and fp
+ is a file pointer, the data will be written to the file.
+ If fp is a file pointer and WARC is enabled, the body will
+ be written to both destinations.
+
+ Returns the error code. */
+static int
+read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
+ wgint contrange, bool chunked_transfer_encoding,
+ char *url, char *warc_timestamp_str, char *warc_request_uuid,
+ ip_address *warc_ip, char *type, int statcode, char *head)
+{
+ int warc_payload_offset = 0;
+ FILE *warc_tmp = NULL;
+ int warcerr = 0;
+
+ if (opt.warc_filename != NULL)
+ {
+ /* Open a temporary file where we can write the response before we
+ add it to the WARC record. */
+ warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ warcerr = WARC_TMP_FOPENERR;
+
+ if (warcerr == 0)
+ {
+ /* We should keep the response headers for the WARC record. */
+ int head_len = strlen (head);
+ int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
+ if (warc_tmp_written != head_len)
+ warcerr = WARC_TMP_FWRITEERR;
+ warc_payload_offset = head_len;
+ }
+
+ if (warcerr != 0)
+ {
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+ return warcerr;
+ }
+ }
+
+ if (fp != NULL)
+ {
+ /* This confuses the timestamping code that checks for file size.
+ #### The timestamping code should be smarter about file size. */
+ if (opt.save_headers && hs->restval == 0)
+ fwrite (head, 1, strlen (head), fp);
+ }
+
+ /* Read the response body. */
+ int flags = 0;
+ if (contlen != -1)
+ /* If content-length is present, read that much; otherwise, read
+ until EOF. The HTTP spec doesn't require the server to
+ actually close the connection when it's done sending data. */
+ flags |= rb_read_exactly;
+ if (fp != NULL && hs->restval > 0 && contrange == 0)
+ /* If the server ignored our range request, instruct fd_read_body
+ to skip the first RESTVAL bytes of body. */
+ flags |= rb_skip_startpos;
+ if (chunked_transfer_encoding)
+ flags |= rb_chunked_transfer_encoding;
+
+ hs->len = hs->restval;
+ hs->rd_size = 0;
+ /* Download the response body and write it to fp.
+ If we are working on a WARC file, we simultaneously write the
+ response body to warc_tmp. */
+ hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
+ hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
+ flags, warc_tmp);
+ if (hs->res >= 0)
+ {
+ if (warc_tmp != NULL)
+ {
+ /* Create a response record and write it to the WARC file.
+ Note: per the WARC standard, the request and response should share
+ the same date header. We re-use the timestamp of the request.
+ The response record should also refer to the uuid of the request. */
+ bool r = warc_write_response_record (url, warc_timestamp_str,
+ warc_request_uuid, warc_ip,
+ warc_tmp, warc_payload_offset,
+ type, statcode, hs->newloc);
+
+ /* warc_write_response_record has closed warc_tmp. */
+
+ if (! r)
+ return WARC_ERR;
+ }
+
+ return RETRFINISHED;
+ }
+
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+
+ if (hs->res == -2)
+ {
+ /* Error while writing to fd. */
+ return FWRITEERR;
+ }
+ else if (hs->res == -3)
+ {
+ /* Error while writing to warc_tmp. */
+ return WARC_TMP_FWRITEERR;
+ }
+ else
+ {
+ /* A read error! */
+ hs->rderrmsg = xstrdup (fd_errstr (sock));
+ return RETRFINISHED;
+ }
+}
+
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (c_isspace (line[sizeof (string_constant) - 1]) \
@@ -1519,9 +1678,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
wgint contlen, contrange;
struct url *conn;
FILE *fp;
+ int err;
int sock = -1;
- int flags;
/* Set to 1 when the authorization has already been sent and should
not be tried again. */
@@ -1547,6 +1706,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
char hdrval[256];
char *message;
+ /* Declare WARC variables. */
+ bool warc_enabled = (opt.warc_filename != NULL);
+ FILE *warc_tmp = NULL;
+ char warc_timestamp_str [21];
+ char warc_request_uuid [48];
+ ip_address *warc_ip = NULL;
+ off_t warc_payload_offset = -1;
+
/* Whether this connection will be kept alive after the HTTP request
is done. */
bool keep_alive;
@@ -1792,11 +1959,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
#endif
&host_lookup_failed))
{
+ int family = socket_family (pconn.socket, ENDPOINT_PEER);
sock = pconn.socket;
using_ssl = pconn.ssl;
- logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- quotearg_style (escape_quoting_style, pconn.host),
- pconn.port);
+ if (family == AF_INET6)
+ logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
+ else
+ logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
/* If the connection is already authorized, the "Basic"
@@ -1852,11 +2025,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
that the contents of Host would be exactly the same as
the contents of CONNECT. */
- write_error = request_send (connreq, sock);
+ write_error = request_send (connreq, sock, 0);
request_free (connreq);
if (write_error < 0)
{
CLOSE_INVALIDATE (sock);
+ request_free (req);
return WRITEFAILED;
}
@@ -1866,6 +2040,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
fd_errstr (sock));
CLOSE_INVALIDATE (sock);
+ request_free (req);
return HERR;
}
message = NULL;
@@ -1886,6 +2061,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
quotearg_style (escape_quoting_style,
_("Malformed status line")));
xfree (head);
+ request_free (req);
return HERR;
}
hs->message = xstrdup (message);
@@ -1897,6 +2073,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
+ request_free (req);
return CONSSLERR;
}
xfree_null (message);
@@ -1909,14 +2086,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
if (conn->scheme == SCHEME_HTTPS)
{
- if (!ssl_connect_wget (sock))
+ if (!ssl_connect_wget (sock, u->host))
{
fd_close (sock);
+ request_free (req);
return CONSSLERR;
}
else if (!ssl_check_certificate (sock, u->host))
{
fd_close (sock);
+ request_free (req);
return VERIFCERTERR;
}
using_ssl = true;
@@ -1924,8 +2103,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
#endif /* HAVE_SSL */
}
+ /* Open the temporary file where we will write the request. */
+ if (warc_enabled)
+ {
+ warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return WARC_TMP_FOPENERR;
+ }
+
+ if (! proxy)
+ {
+ warc_ip = (ip_address *) alloca (sizeof (ip_address));
+ socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
+ }
+ }
+
/* Send the request to server. */
- write_error = request_send (req, sock);
+ write_error = request_send (req, sock, warc_tmp);
if (write_error >= 0)
{
@@ -1933,16 +2130,39 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
{
DEBUGP (("[POST data: %s]\n", opt.post_data));
write_error = fd_write (sock, opt.post_data, post_data_size, -1);
+ if (write_error >= 0 && warc_tmp != NULL)
+ {
+ /* Remember end of headers / start of payload. */
+ warc_payload_offset = ftello (warc_tmp);
+
+ /* Write a copy of the data to the WARC record. */
+ int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
+ if (warc_tmp_written != post_data_size)
+ write_error = -2;
+ }
}
else if (opt.post_file_name && post_data_size != 0)
- write_error = post_file (sock, opt.post_file_name, post_data_size);
+ {
+ if (warc_tmp != NULL)
+ /* Remember end of headers / start of payload. */
+ warc_payload_offset = ftello (warc_tmp);
+
+ write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
+ }
}
if (write_error < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
- return WRITEFAILED;
+
+ if (warc_tmp != NULL)
+ fclose (warc_tmp);
+
+ if (write_error == -2)
+ return WARC_TMP_FWRITEERR;
+ else
+ return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
proxy ? "Proxy" : "HTTP");
@@ -1950,6 +2170,29 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
contrange = 0;
*dt &= ~RETROKF;
+
+ if (warc_enabled)
+ {
+ bool warc_result;
+ /* Generate a timestamp and uuid for this request. */
+ warc_timestamp (warc_timestamp_str);
+ warc_uuid_str (warc_request_uuid);
+
+ /* Create a request record and store it in the WARC file. */
+ warc_result = warc_write_request_record (u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip,
+ warc_tmp, warc_payload_offset);
+ if (! warc_result)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ return WARC_ERR;
+ }
+
+ /* warc_write_request_record has also closed warc_tmp. */
+ }
+
+
read_header:
head = read_http_response_head (sock);
if (!head)
@@ -1985,6 +2228,7 @@ read_header:
quotearg_style (escape_quoting_style,
_("Malformed status line")));
CLOSE_INVALIDATE (sock);
+ resp_free (resp);
request_free (req);
xfree (head);
return HERR;
@@ -1993,6 +2237,7 @@ read_header:
if (H_10X (statcode))
{
DEBUGP (("Ignoring response\n"));
+ resp_free (resp);
xfree (head);
goto read_header;
}
@@ -2073,11 +2318,42 @@ read_header:
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
- if (keep_alive && !head_only
- && skip_short_body (sock, contlen, chunked_transfer_encoding))
- CLOSE_FINISH (sock);
+
+ /* Normally we are not interested in the response body.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err;
+ type = resp_header_strdup (resp, "Content-Type");
+ err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+ xfree_null (type);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
pconn.authorized = false;
if (!auth_finished && (user && passwd))
{
@@ -2182,6 +2458,8 @@ read_header:
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
get_file_flags (hs->local_file, dt);
+ request_free (req);
+ resp_free (resp);
xfree (head);
xfree_null (message);
return RETRUNNEEDED;
@@ -2325,11 +2603,42 @@ read_header:
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
- if (keep_alive && !head_only
- && skip_short_body (sock, contlen, chunked_transfer_encoding))
- CLOSE_FINISH (sock);
+
+ /* In case the caller cares to look... */
+ hs->len = 0;
+ hs->res = 0;
+ hs->restval = 0;
+
+ /* Normally we are not interested in the response body of a redirect.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ xfree_null (type);
+ xfree (head);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (keep_alive && !head_only
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
xfree_null (type);
xfree (head);
/* From RFC2616: The status codes 303 and 307 have
@@ -2447,30 +2756,55 @@ read_header:
logputs (LOG_VERBOSE, "\n");
}
}
- xfree_null (type);
- type = NULL; /* We don't need it any more. */
/* Return if we have no intention of further downloading. */
- if (!(*dt & RETROKF) || head_only)
+ if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
{
/* In case the caller cares to look... */
hs->len = 0;
hs->res = 0;
- xfree_null (type);
- if (head_only)
- /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
- servers not to send body in response to a HEAD request, and
- those that do will likely be caught by test_socket_open.
- If not, they can be worked around using
- `--no-http-keep-alive'. */
- CLOSE_FINISH (sock);
- else if (keep_alive
- && skip_short_body (sock, contlen, chunked_transfer_encoding))
- /* Successfully skipped the body; also keep using the socket. */
- CLOSE_FINISH (sock);
+ hs->restval = 0;
+
+ /* Normally we are not interested in the response body of a error responses.
+ But if we are writing a WARC file we are: we like to keep everyting. */
+ if (warc_enabled)
+ {
+ int err = read_response_body (hs, sock, NULL, contlen, 0,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
+
+ if (err != RETRFINISHED || hs->res < 0)
+ {
+ CLOSE_INVALIDATE (sock);
+ xfree (head);
+ xfree_null (type);
+ return err;
+ }
+ else
+ CLOSE_FINISH (sock);
+ }
else
- CLOSE_INVALIDATE (sock);
+ {
+ /* Since WARC is disabled, we are not interested in the response body. */
+ if (head_only)
+ /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
+ servers not to send body in response to a HEAD request, and
+ those that do will likely be caught by test_socket_open.
+ If not, they can be worked around using
+ `--no-http-keep-alive'. */
+ CLOSE_FINISH (sock);
+ else if (keep_alive
+ && skip_short_body (sock, contlen, chunked_transfer_encoding))
+ /* Successfully skipped the body; also keep using the socket. */
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ }
+
xfree (head);
+ xfree_null (type);
return RETRFINISHED;
}
@@ -2512,6 +2846,7 @@ read_header:
strerror (errno));
CLOSE_INVALIDATE (sock);
xfree (head);
+ xfree_null (type);
return UNLINKERR;
}
}
@@ -2539,6 +2874,7 @@ read_header:
hs->local_file);
CLOSE_INVALIDATE (sock);
xfree (head);
+ xfree_null (type);
return FOPEN_EXCL_ERR;
}
}
@@ -2547,6 +2883,7 @@ read_header:
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
xfree (head);
+ xfree_null (type);
return FOPENERR;
}
}
@@ -2560,49 +2897,26 @@ read_header:
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
- /* This confuses the timestamping code that checks for file size.
- #### The timestamping code should be smarter about file size. */
- if (opt.save_headers && hs->restval == 0)
- fwrite (head, 1, strlen (head), fp);
+
+ err = read_response_body (hs, sock, fp, contlen, contrange,
+ chunked_transfer_encoding,
+ u->url, warc_timestamp_str,
+ warc_request_uuid, warc_ip, type,
+ statcode, head);
/* Now we no longer need to store the response header. */
xfree (head);
-
- /* Download the request body. */
- flags = 0;
- if (contlen != -1)
- /* If content-length is present, read that much; otherwise, read
- until EOF. The HTTP spec doesn't require the server to
- actually close the connection when it's done sending data. */
- flags |= rb_read_exactly;
- if (hs->restval > 0 && contrange == 0)
- /* If the server ignored our range request, instruct fd_read_body
- to skip the first RESTVAL bytes of body. */
- flags |= rb_skip_startpos;
-
- if (chunked_transfer_encoding)
- flags |= rb_chunked_transfer_encoding;
-
- hs->len = hs->restval;
- hs->rd_size = 0;
- hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
- hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
- flags);
+ xfree_null (type);
if (hs->res >= 0)
CLOSE_FINISH (sock);
else
- {
- if (hs->res < 0)
- hs->rderrmsg = xstrdup (fd_errstr (sock));
- CLOSE_INVALIDATE (sock);
- }
+ CLOSE_INVALIDATE (sock);
if (!output_stream)
fclose (fp);
- if (hs->res == -2)
- return FWRITEERR;
- return RETRFINISHED;
+
+ return err;
}
/* The genuine HTTP loop! This is the part where the retrieval is
@@ -2626,6 +2940,12 @@ http_loop (struct url *u, struct url *original_url, char **newloc,
char *file_name;
bool force_full_retrieve = false;
+
+ /* If we are writing to a WARC file: always retrieve the whole file. */
+ if (opt.warc_filename != NULL)
+ force_full_retrieve = true;
+
+
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
@@ -2795,6 +3115,18 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Fatal errors just return from the function. */
ret = err;
goto exit;
+ case WARC_ERR:
+ /* A fatal WARC error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot write to WARC file.\n"));
+ ret = err;
+ goto exit;
+ case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
+ /* A fatal WARC error. */
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
+ ret = err;
+ goto exit;
case CONSSLERR:
/* Another fatal error. */
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
@@ -3323,19 +3655,23 @@ digest_authentication_encode (const char *au, const char *user,
const char *passwd, const char *method,
const char *path)
{
- static char *realm, *opaque, *nonce;
+ static char *realm, *opaque, *nonce, *qop;
static struct {
const char *name;
char **variable;
} options[] = {
{ "realm", &realm },
{ "opaque", &opaque },
- { "nonce", &nonce }
+ { "nonce", &nonce },
+ { "qop", &qop }
};
+ char cnonce[16] = "";
char *res;
+ size_t res_size;
param_token name, value;
- realm = opaque = nonce = NULL;
+
+ realm = opaque = nonce = qop = NULL;
au += 6; /* skip over `Digest' */
while (extract_param (&au, &name, &value, ','))
@@ -3351,11 +3687,19 @@ digest_authentication_encode (const char *au, const char *user,
break;
}
}
+
+ if (qop != NULL && strcmp(qop,"auth"))
+ {
+ logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
+ user = NULL; /* force freeing mem and return */
+ }
+
if (!realm || !nonce || !user || !passwd || !path || !method)
{
xfree_null (realm);
xfree_null (opaque);
xfree_null (nonce);
+ xfree_null (qop);
return NULL;
}
@@ -3384,27 +3728,69 @@ digest_authentication_encode (const char *au, const char *user,
md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
- /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
- md5_init_ctx (&ctx);
- md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
- md5_process_bytes ((unsigned char *)":", 1, &ctx);
- md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
- md5_process_bytes ((unsigned char *)":", 1, &ctx);
- md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
- md5_finish_ctx (&ctx, hash);
+ if (!strcmp(qop,"auth"))
+ {
+ /* RFC 2617 Digest Access Authentication */
+ /* generate random hex string */
+ snprintf(cnonce, sizeof(cnonce), "%08x", random_number(INT_MAX));
+
+ /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" noncecount ":" clientnonce ":" qop ": " A2BUF) */
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)"00000001", 8, &ctx); /* TODO: keep track of server nonce values */
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)cnonce, strlen(cnonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)qop, strlen(qop), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_finish_ctx (&ctx, hash);
+ }
+ else
+ {
+ /* RFC 2069 Digest Access Authentication */
+ /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
+ md5_init_ctx (&ctx);
+ md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
+ md5_process_bytes ((unsigned char *)":", 1, &ctx);
+ md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
+ md5_finish_ctx (&ctx, hash);
+ }
+
dump_hash (response_digest, hash);
- res = xmalloc (strlen (user)
- + strlen (user)
- + strlen (realm)
- + strlen (nonce)
- + strlen (path)
- + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
- + (opaque ? strlen (opaque) : 0)
- + 128);
- sprintf (res, "Digest \
-username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
- user, realm, nonce, path, response_digest);
+ res_size = strlen (user)
+ + strlen (user)
+ + strlen (realm)
+ + strlen (nonce)
+ + strlen (path)
+ + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ + (opaque ? strlen (opaque) : 0)
+ + (qop ? 128: 0)
+ + 128;
+
+ res = xmalloc (res_size);
+
+ if (!strcmp(qop,"auth"))
+ {
+ snprintf (res, res_size, "Digest "\
+ "username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\""\
+ ", qop=auth, nc=00000001, cnonce=\"%s\"",
+ user, realm, nonce, path, response_digest, cnonce);
+
+ }
+ else
+ {
+ snprintf (res, res_size, "Digest "\
+ "username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
+ user, realm, nonce, path, response_digest);
+ }
+
if (opaque)
{
char *p = res + strlen (res);
diff --git a/src/init.c b/src/init.c
index 0389c39..365fb5b 100644
--- a/src/init.c
+++ b/src/init.c
@@ -1,6 +1,6 @@
/* Reading/parsing the initialization file.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -30,6 +30,7 @@ shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
#include "wget.h"
+#include "exits.h"
#include <stdio.h>
#include <stdlib.h>
@@ -46,6 +47,10 @@ as that of the covered work. */
# endif
#endif
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
#ifdef HAVE_PWD_H
# include <pwd.h>
@@ -62,6 +67,7 @@ as that of the covered work. */
#include "res.h" /* for res_cleanup */
#include "http.h" /* for http_cleanup */
#include "retr.h" /* for output_stream */
+#include "warc.h" /* for warc_close */
#ifdef TESTING
#include "test.h"
@@ -88,12 +94,15 @@ CMD_DECLARE (cmd_vector);
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_spec_warc_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
CMD_DECLARE (cmd_spec_prefer_family);
CMD_DECLARE (cmd_spec_progress);
CMD_DECLARE (cmd_spec_recursive);
+CMD_DECLARE (cmd_spec_regex_type);
CMD_DECLARE (cmd_spec_restrict_file_names);
+CMD_DECLARE (cmd_spec_report_speed);
#ifdef HAVE_SSL
CMD_DECLARE (cmd_spec_secure_protocol);
#endif
@@ -115,6 +124,7 @@ static const struct {
} commands[] = {
/* KEEP THIS LIST ALPHABETICALLY SORTED */
{ "accept", &opt.accepts, cmd_vector },
+ { "acceptregex", &opt.acceptregex_s, cmd_string },
{ "addhostdir", &opt.add_hostdir, cmd_boolean },
{ "adjustextension", &opt.adjust_extension, cmd_boolean },
{ "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
@@ -139,6 +149,7 @@ static const struct {
{ "chooseconfig", &opt.choose_config, cmd_file },
{ "connecttimeout", &opt.connect_timeout, cmd_time },
{ "contentdisposition", &opt.content_disposition, cmd_boolean },
+ { "contentonerror", &opt.content_on_error, cmd_boolean },
{ "continue", &opt.always_rest, cmd_boolean },
{ "convertlinks", &opt.convert_links, cmd_boolean },
{ "cookies", &opt.cookies, cmd_boolean },
@@ -213,7 +224,7 @@ static const struct {
{ "postdata", &opt.post_data, cmd_string },
{ "postfile", &opt.post_file_name, cmd_file },
{ "preferfamily", NULL, cmd_spec_prefer_family },
- { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
+ { "preservepermissions", &opt.preserve_perm, cmd_boolean },
#ifdef HAVE_SSL
{ "privatekey", &opt.private_key, cmd_file },
{ "privatekeytype", &opt.private_key_type, cmd_cert_type },
@@ -233,10 +244,13 @@ static const struct {
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },
+ { "regextype", &opt.regex_type, cmd_spec_regex_type },
{ "reject", &opt.rejects, cmd_vector },
+ { "rejectregex", &opt.rejectregex_s, cmd_string },
{ "relativeonly", &opt.relative_only, cmd_boolean },
{ "remoteencoding", &opt.encoding_remote, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
+ { "reportspeed", &opt.report_bps, cmd_spec_report_speed},
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
{ "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
@@ -263,6 +277,17 @@ static const struct {
{ "verbose", NULL, cmd_spec_verbose },
{ "wait", &opt.wait, cmd_time },
{ "waitretry", &opt.waitretry, cmd_time },
+ { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
+ { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
+#ifdef HAVE_LIBZ
+ { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
+#endif
+ { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
+ { "warcfile", &opt.warc_filename, cmd_file },
+ { "warcheader", NULL, cmd_spec_warc_header },
+ { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
+ { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
+ { "warctempdir", &opt.warc_tempdir, cmd_directory },
#ifdef USE_WATT32
{ "wdebug", &opt.wdebug, cmd_boolean },
#endif
@@ -347,6 +372,8 @@ defaults (void)
opt.restrict_files_nonascii = false;
opt.restrict_files_case = restrict_no_case_restriction;
+ opt.regex_type = regex_type_posix;
+
opt.max_redirect = 20;
opt.waitretry = 10;
@@ -361,6 +388,18 @@ defaults (void)
opt.useservertimestamps = true;
opt.show_all_dns_entries = false;
+
+ opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
+#ifdef HAVE_LIBZ
+ opt.warc_compression_enabled = true;
+#else
+ opt.warc_compression_enabled = false;
+#endif
+ opt.warc_digests_enabled = true;
+ opt.warc_cdx_enabled = false;
+ opt.warc_cdx_dedup_filename = NULL;
+ opt.warc_tempdir = NULL;
+ opt.warc_keep_log = true;
}
/* Return the user's home directory (strdup-ed), or NULL if none is
@@ -443,7 +482,7 @@ wgetrc_env_file_name (void)
return NULL;
}
-/* Check for the existance of '$HOME/.wgetrc' and return it's path
+/* Check for the existance of '$HOME/.wgetrc' and return its path
if it exists and is set. */
char *
wgetrc_user_file_name (void)
@@ -597,21 +636,34 @@ initialize (void)
variable has been set. For internal testing purposes only! */
env_sysrc = getenv ("SYSTEM_WGETRC");
if (env_sysrc && file_exists_p (env_sysrc))
- ok &= run_wgetrc (env_sysrc);
+ {
+ ok &= run_wgetrc (env_sysrc);
+ /* If there are any problems parsing the system wgetrc file, tell
+ the user and exit */
+ if (! ok)
+ {
+ fprintf (stderr, _("\
+Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
+'%s',\n\
+or specify a different file using --config.\n"), env_sysrc);
+ exit (2);
+ }
+ }
/* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
else if (file_exists_p (SYSTEM_WGETRC))
ok &= run_wgetrc (SYSTEM_WGETRC);
-#endif
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
{
fprintf (stderr, _("\
-Parsing system wgetrc file failed, please check '%s'. \
-Or specify a different file using --config\n"), SYSTEM_WGETRC);
+Parsing system wgetrc file failed. Please check\n\
+'%s',\n\
+or specify a different file using --config.\n"), SYSTEM_WGETRC);
exit (2);
}
+#endif
/* Override it with your own, if one exists. */
file = wgetrc_file_name ();
if (!file)
@@ -1222,6 +1274,27 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored)
}
static bool
+cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
+{
+ /* Empty value means reset the list of headers. */
+ if (*val == '\0')
+ {
+ free_vec (opt.warc_user_headers);
+ opt.warc_user_headers = NULL;
+ return true;
+ }
+
+ if (!check_user_specified_header (val))
+ {
+ fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
+ exec_name, com, quote (val));
+ return false;
+ }
+ opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
+ return true;
+}
+
+static bool
cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
{
int flag = cmd_boolean (com, val, &opt.htmlify);
@@ -1308,6 +1381,25 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
return true;
}
+/* Validate --regex-type and set the choice. */
+
+static bool
+cmd_spec_regex_type (const char *com, const char *val, void *place_ignored)
+{
+ static const struct decode_item choices[] = {
+ { "posix", regex_type_posix },
+#ifdef HAVE_LIBPCRE
+ { "pcre", regex_type_pcre },
+#endif
+ };
+ int regex_type = regex_type_posix;
+ int ok = decode_string (val, choices, countof (choices), &regex_type);
+ if (!ok)
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
+ opt.regex_type = regex_type;
+ return ok;
+}
+
static bool
cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
{
@@ -1362,6 +1454,15 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
return true;
}
+static bool
+cmd_spec_report_speed (const char *com, const char *val, void *place_ignored)
+{
+ opt.report_bps = strcasecmp (val, "bits") == 0;
+ if (!opt.report_bps)
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
+ return opt.report_bps;
+}
+
#ifdef HAVE_SSL
static bool
cmd_spec_secure_protocol (const char *com, const char *val, void *place)
@@ -1576,8 +1677,16 @@ cleanup (void)
{
/* Free external resources, close files, etc. */
+ /* Close WARC file. */
+ if (opt.warc_filename != 0)
+ warc_close ();
+
+ log_close ();
+
if (output_stream)
- fclose (output_stream);
+ if (fclose (output_stream) == EOF)
+ inform_exit_status (CLOSEFAILED);
+
/* No need to check for error because Wget flushes its output (and
checks for errors) after any data arrives. */
@@ -1597,6 +1706,9 @@ cleanup (void)
host_cleanup ();
log_cleanup ();
+ for (i = 0; i < nurl; i++)
+ xfree (url[i]);
+
{
extern acc_t *netrc_list;
free_netrc (netrc_list);
@@ -1625,6 +1737,7 @@ cleanup (void)
xfree_null (opt.http_user);
xfree_null (opt.http_passwd);
free_vec (opt.user_headers);
+ free_vec (opt.warc_user_headers);
# ifdef HAVE_SSL
xfree_null (opt.cert_file);
xfree_null (opt.private_key);
diff --git a/src/log.c b/src/log.c
index 361b453..0185df1 100644
--- a/src/log.c
+++ b/src/log.c
@@ -79,6 +79,10 @@ as that of the covered work. */
logging is inhibited, logfp is set back to NULL. */
static FILE *logfp;
+/* A second file descriptor pointing to the temporary log file for the
+ WARC writer. If WARC writing is disabled, this is NULL. */
+static FILE *warclogfp;
+
/* If true, it means logging is inhibited, i.e. nothing is printed or
stored. */
static bool inhibit_logging;
@@ -304,6 +308,31 @@ get_log_fp (void)
return logfp;
return stderr;
}
+
+/* Returns the file descriptor for the secondary log file. This is
+ WARCLOGFP, except if called before log_init, in which case it
+ returns stderr. This is useful in case someone calls a logging
+ function before log_init.
+
+ If logging is inhibited, return NULL. */
+
+static FILE *
+get_warc_log_fp (void)
+{
+ if (inhibit_logging)
+ return NULL;
+ if (warclogfp)
+ return warclogfp;
+ return NULL;
+}
+
+/* Sets the file descriptor for the secondary log file. */
+
+void
+log_set_warc_log_fp (FILE * fp)
+{
+ warclogfp = fp;
+}
/* Log a literal string S. The string is logged as-is, without a
newline appended. */
@@ -312,13 +341,17 @@ void
logputs (enum log_options o, const char *s)
{
FILE *fp;
+ FILE *warcfp;
check_redirect_output ();
if ((fp = get_log_fp ()) == NULL)
return;
+ warcfp = get_warc_log_fp ();
CHECK_VERBOSE (o);
FPUTS (s, fp);
+ if (warcfp != NULL)
+ FPUTS (s, warcfp);
if (save_context_p)
saved_append (s);
if (flush_log_p)
@@ -356,8 +389,9 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
int available_size = sizeof (smallmsg);
int numwritten;
FILE *fp = get_log_fp ();
+ FILE *warcfp = get_warc_log_fp ();
- if (!save_context_p)
+ if (!save_context_p && warcfp == NULL)
{
/* In the simple case just call vfprintf(), to avoid needless
allocation and games with vsnprintf(). */
@@ -407,8 +441,11 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
}
/* Writing succeeded. */
- saved_append (write_ptr);
+ if (save_context_p)
+ saved_append (write_ptr);
FPUTS (write_ptr, fp);
+ if (warcfp != NULL)
+ FPUTS (write_ptr, warcfp);
if (state->bigmsg)
xfree (state->bigmsg);
@@ -426,6 +463,7 @@ void
logflush (void)
{
FILE *fp = get_log_fp ();
+ FILE *warcfp = get_warc_log_fp ();
if (fp)
{
/* 2005-10-25 SMS.
@@ -440,6 +478,10 @@ logflush (void)
fflush (fp);
#endif /* def __VMS [else] */
}
+
+ if (warcfp != NULL)
+ fflush (warcfp);
+
needs_flushing = false;
}
@@ -573,14 +615,14 @@ log_init (const char *file, bool appendp)
}
}
-/* Close LOGFP, inhibit further logging and free the memory associated
- with it. */
+/* Close LOGFP (only if we opened it, not if it's stderr), inhibit
+ further logging and free the memory associated with it. */
void
log_close (void)
{
int i;
- if (logfp)
+ if (logfp && (logfp != stderr))
fclose (logfp);
logfp = NULL;
inhibit_logging = true;
@@ -598,6 +640,7 @@ log_dump_context (void)
{
int num = log_line_current;
FILE *fp = get_log_fp ();
+ FILE *warcfp = get_warc_log_fp ();
if (!fp)
return;
@@ -609,14 +652,23 @@ log_dump_context (void)
{
struct log_ln *ln = log_lines + num;
if (ln->content)
- FPUTS (ln->content, fp);
+ {
+ FPUTS (ln->content, fp);
+ if (warcfp != NULL)
+ FPUTS (ln->content, warcfp);
+ }
ROT_ADVANCE (num);
}
while (num != log_line_current);
if (trailing_line)
if (log_lines[log_line_current].content)
- FPUTS (log_lines[log_line_current].content, fp);
+ {
+ FPUTS (log_lines[log_line_current].content, fp);
+ if (warcfp != NULL)
+ FPUTS (log_lines[log_line_current].content, warcfp);
+ }
fflush (fp);
+ fflush (warcfp);
}
/* String escape functions. */
diff --git a/src/log.h b/src/log.h
index 48c2f1b..d74ca53 100644
--- a/src/log.h
+++ b/src/log.h
@@ -34,8 +34,12 @@ as that of the covered work. */
/* The log file to which Wget writes to after HUP. */
#define DEFAULT_LOGFILE "wget-log"
+#include <stdio.h>
+
enum log_options { LOG_VERBOSE, LOG_NOTQUIET, LOG_NONVERBOSE, LOG_ALWAYS };
+void log_set_warc_log_fp (FILE *);
+
void logprintf (enum log_options, const char *, ...)
GCC_FORMAT_ATTR (2, 3);
void debug_logprintf (const char *, ...) GCC_FORMAT_ATTR (1, 2);
diff --git a/src/main.c b/src/main.c
index 7dffd5d..b8b2869 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,6 +1,6 @@
/* Command line parsing.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -55,7 +55,7 @@ as that of the covered work. */
#include "spider.h"
#include "http.h" /* for save_cookies */
#include "ptimer.h"
-
+#include "warc.h"
#include <getopt.h>
#include <getpass.h>
#include <quote.h>
@@ -157,6 +157,7 @@ struct cmdline_option {
static struct cmdline_option option_data[] =
{
{ "accept", 'A', OPT_VALUE, "accept", -1 },
+ { "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
{ "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
{ "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
{ "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
@@ -178,6 +179,7 @@ static struct cmdline_option option_data[] =
{ "continue", 'c', OPT_BOOLEAN, "continue", -1 },
{ "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 },
{ "content-disposition", 0, OPT_BOOLEAN, "contentdisposition", -1 },
+ { "content-on-error", 0, OPT_BOOLEAN, "contentonerror", -1 },
{ "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
{ "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
{ WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
@@ -242,7 +244,7 @@ static struct cmdline_option option_data[] =
{ "post-data", 0, OPT_VALUE, "postdata", -1 },
{ "post-file", 0, OPT_VALUE, "postfile", -1 },
{ "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
- { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 }, /* deprecated */
+ { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
{ IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
{ IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
{ "progress", 0, OPT_VALUE, "progress", -1 },
@@ -259,10 +261,13 @@ static struct cmdline_option option_data[] =
{ "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
{ "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
{ "referer", 0, OPT_VALUE, "referer", -1 },
+ { "regex-type", 0, OPT_VALUE, "regextype", -1 },
{ "reject", 'R', OPT_VALUE, "reject", -1 },
+ { "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
{ "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
+ { "report-speed", 0, OPT_BOOLEAN, "reportspeed", -1 },
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
{ "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
@@ -286,6 +291,17 @@ static struct cmdline_option option_data[] =
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
{ "wait", 'w', OPT_VALUE, "wait", -1 },
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
+ { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
+#ifdef HAVE_LIBZ
+ { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
+#endif
+ { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
+ { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
+ { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
+ { "warc-header", 0, OPT_VALUE, "warcheader", -1 },
+ { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
+ { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
+ { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
#ifdef USE_WATT32
{ "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
#endif
@@ -444,6 +460,8 @@ Logging and input file:\n"),
N_("\
-nv, --no-verbose turn off verboseness, without being quiet.\n"),
N_("\
+ --report-speed=TYPE Output bandwidth as TYPE. TYPE can be bits.\n"),
+ N_("\
-i, --input-file=FILE download URLs found in local or external FILE.\n"),
N_("\
-F, --force-html treat input file as HTML.\n"),
@@ -595,6 +613,8 @@ HTTP options:\n"),
--content-disposition honor the Content-Disposition header when\n\
choosing local file names (EXPERIMENTAL).\n"),
N_("\
+ --content-on-error output the received content on server errors.\n"),
+ N_("\
--auth-no-challenge send Basic HTTP authentication information\n\
without first waiting for the server's\n\
challenge.\n"),
@@ -644,10 +664,37 @@ FTP options:\n"),
N_("\
--no-passive-ftp disable the \"passive\" transfer mode.\n"),
N_("\
+ --preserve-permissions preserve remote file permissions.\n"),
+ N_("\
--retr-symlinks when recursing, get linked-to files (not dir).\n"),
"\n",
N_("\
+WARC options:\n"),
+ N_("\
+ --warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
+ N_("\
+ --warc-header=STRING insert STRING into the warcinfo record.\n"),
+ N_("\
+ --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
+ N_("\
+ --warc-cdx write CDX index files.\n"),
+ N_("\
+ --warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
+#ifdef HAVE_LIBZ
+ N_("\
+ --no-warc-compression do not compress WARC files with GZIP.\n"),
+#endif
+ N_("\
+ --no-warc-digests do not calculate SHA1 digests.\n"),
+ N_("\
+ --no-warc-keep-log do not store the log file in a WARC record.\n"),
+ N_("\
+ --warc-tempdir=DIRECTORY location for temporary files created by the\n\
+ WARC writer.\n"),
+ "\n",
+
+ N_("\
Recursive download:\n"),
N_("\
-r, --recursive specify recursive download.\n"),
@@ -680,6 +727,17 @@ Recursive accept/reject:\n"),
N_("\
-R, --reject=LIST comma-separated list of rejected extensions.\n"),
N_("\
+ --accept-regex=REGEX regex matching accepted URLs.\n"),
+ N_("\
+ --reject-regex=REGEX regex matching rejected URLs.\n"),
+#ifdef HAVE_LIBPCRE
+ N_("\
+ --regex-type=TYPE regex type (posix|pcre).\n"),
+#else
+ N_("\
+ --regex-type=TYPE regex type (posix).\n"),
+#endif
+ N_("\
-D, --domains=LIST comma-separated list of accepted domains.\n"),
N_("\
--exclude-domains=LIST comma-separated list of rejected domains.\n"),
@@ -703,7 +761,6 @@ Recursive accept/reject:\n"),
N_("\
-np, --no-parent don't ascend to the parent directory.\n"),
"\n",
-
N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
};
@@ -882,9 +939,9 @@ print_version (void)
exit (3);
/* TRANSLATORS: When available, an actual copyright character
- (cirle-c) should be used in preference to "(C)". */
+ (circle-c) should be used in preference to "(C)". */
if (fputs (_("\
-Copyright (C) 2009 Free Software Foundation, Inc.\n"), stdout) < 0)
+Copyright (C) 2011 Free Software Foundation, Inc.\n"), stdout) < 0)
exit (3);
if (fputs (_("\
License GPLv3+: GNU GPL version 3 or later\n\
@@ -905,6 +962,7 @@ There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
}
char *program_name; /* Needed by lib/error.c. */
+char *program_argstring; /* Needed by wget_warc.c. */
int
main (int argc, char **argv)
@@ -940,13 +998,34 @@ main (int argc, char **argv)
windows_main ((char **) &exec_name);
#endif
+ /* Construct the arguments string. */
+ int argstring_length = 1;
+ for (i = 1; i < argc; i++)
+ argstring_length += strlen (argv[i]) + 2 + 1;
+ char *p = program_argstring = malloc (argstring_length * sizeof (char));
+ if (p == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
+ for (i = 1; i < argc; i++)
+ {
+ *p++ = '"';
+ int arglen = strlen (argv[i]);
+ memcpy (p, argv[i], arglen);
+ p += arglen;
+ *p++ = '"';
+ *p++ = ' ';
+ }
+ *p = '\0';
+
/* Load the hard-coded defaults. */
defaults ();
init_switches ();
- /* This seperate getopt_long is needed to find the user config
- and parse it before the other user options. */
+ /* This separate getopt_long is needed to find the user config file
+ option ("--config") and parse it before the other user options. */
longindex = -1;
int retconf;
bool use_userconfig = false;
@@ -957,20 +1036,25 @@ main (int argc, char **argv)
int confval;
bool userrc_ret = true;
struct cmdline_option *config_opt;
- confval = long_options[longindex].val;
- config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
- if (strcmp (config_opt->long_name, "config") == 0)
- {
- userrc_ret &= run_wgetrc (optarg);
- use_userconfig = true;
- }
- if (!userrc_ret)
+
+ /* There is no short option for "--config". */
+ if (longindex >= 0)
{
- printf ("Exiting due to error in %s\n", optarg);
- exit (2);
+ confval = long_options[longindex].val;
+ config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
+ if (strcmp (config_opt->long_name, "config") == 0)
+ {
+ userrc_ret &= run_wgetrc (optarg);
+ use_userconfig = true;
+ }
+ if (!userrc_ret)
+ {
+ fprintf (stderr, "Exiting due to error in %s\n", optarg);
+ exit (2);
+ }
+ else
+ break;
}
- else
- break;
}
/* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
@@ -993,9 +1077,10 @@ main (int argc, char **argv)
{
if (ret == '?')
{
- print_usage (0);
- printf ("\n");
- printf (_("Try `%s --help' for more options.\n"), exec_name);
+ print_usage (1);
+ fprintf (stderr, "\n");
+ fprintf (stderr, _("Try `%s --help' for more options.\n"),
+ exec_name);
exit (2);
}
/* Find the short option character in the mapping. */
@@ -1103,7 +1188,7 @@ main (int argc, char **argv)
{
fprintf (stderr,
_("Both --no-clobber and --convert-links were specified,"
- "only --convert-links will be used.\n"));
+ " only --convert-links will be used.\n"));
opt.noclobber = false;
}
@@ -1184,6 +1269,47 @@ for details.\n\n"));
}
}
+ if (opt.warc_filename != 0)
+ {
+ if (opt.noclobber)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --no-clobber, "
+ "--no-clobber will be disabled.\n"));
+ opt.noclobber = false;
+ }
+ if (opt.timestamping)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with timestamping, "
+ "timestamping will be disabled.\n"));
+ opt.timestamping = false;
+ }
+ if (opt.spider)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --spider.\n"));
+ exit (1);
+ }
+ if (opt.always_rest)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --continue, "
+ "--continue will be disabled.\n"));
+ opt.always_rest = false;
+ }
+ if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
+ {
+ fprintf (stderr,
+ _("Digests are disabled; WARC deduplication will "
+ "not find duplicate records.\n"));
+ }
+ if (opt.warc_keep_log)
+ {
+ opt.progress_type = xstrdup ("dot");
+ }
+ }
+
if (opt.ask_passwd && opt.passwd)
{
fprintf (stderr,
@@ -1197,13 +1323,42 @@ for details.\n\n"));
/* No URL specified. */
fprintf (stderr, _("%s: missing URL\n"), exec_name);
print_usage (1);
- printf ("\n");
+ fprintf (stderr, "\n");
/* #### Something nicer should be printed here -- similar to the
pre-1.5 `--help' page. */
fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
exit (1);
}
+ /* Compile the regular expressions. */
+ switch (opt.regex_type)
+ {
+#ifdef HAVE_LIBPCRE
+ case regex_type_pcre:
+ opt.regex_compile_fun = compile_pcre_regex;
+ opt.regex_match_fun = match_pcre_regex;
+ break;
+#endif
+
+ case regex_type_posix:
+ default:
+ opt.regex_compile_fun = compile_posix_regex;
+ opt.regex_match_fun = match_posix_regex;
+ break;
+ }
+ if (opt.acceptregex_s)
+ {
+ opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
+ if (!opt.acceptregex)
+ exit (1);
+ }
+ if (opt.rejectregex_s)
+ {
+ opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
+ if (!opt.rejectregex)
+ exit (1);
+ }
+
#ifdef ENABLE_IRI
if (opt.enable_iri)
{
@@ -1250,6 +1405,11 @@ for details.\n\n"));
/* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
+ if (url == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_shorthand_url (argv[optind]);
@@ -1263,6 +1423,10 @@ for details.\n\n"));
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
+ /* Open WARC file. */
+ if (opt.warc_filename != 0)
+ warc_init ();
+
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
version_string, OS_TYPE));
@@ -1395,7 +1559,7 @@ outputting to a regular file.\n"));
&dt, opt.recursive, iri, true);
}
- if (opt.delete_after && file_exists_p(filename))
+ if (opt.delete_after && filename != NULL && file_exists_p (filename))
{
DEBUGP (("Removing file due to --delete-after in main():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
@@ -1462,12 +1626,9 @@ outputting to a regular file.\n"));
if (opt.convert_links && !opt.delete_after)
convert_all_links ();
- log_close ();
- for (i = 0; i < nurl; i++)
- xfree (url[i]);
cleanup ();
- return get_exit_status ();
+ exit (get_exit_status ());
}
#endif /* TESTING */
diff --git a/src/openssl.c b/src/openssl.c
index 2e23669..3924e41 100644
--- a/src/openssl.c
+++ b/src/openssl.c
@@ -1,6 +1,6 @@
/* SSL support via OpenSSL library.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
- 2009, 2010, 2011 Free Software Foundation, Inc.
+ 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
Originally contributed by Christian Fraenkel.
This file is part of GNU Wget.
@@ -159,7 +159,7 @@ key_type_to_ssl_type (enum keyfile_type type)
Returns true on success, false otherwise. */
bool
-ssl_init ()
+ssl_init (void)
{
SSL_METHOD const *meth;
@@ -201,7 +201,9 @@ ssl_init ()
abort ();
}
- ssl_ctx = SSL_CTX_new (meth);
+ /* The type cast below accommodates older OpenSSL versions (0.9.8)
+ where SSL_CTX_new() is declared without a "const" argument. */
+ ssl_ctx = SSL_CTX_new ((SSL_METHOD *)meth);
if (!ssl_ctx)
goto error;
@@ -393,7 +395,7 @@ static struct transport_implementation openssl_transport = {
Returns true on success, false on failure. */
bool
-ssl_connect_wget (int fd)
+ssl_connect_wget (int fd, const char *hostname)
{
SSL *conn;
struct openssl_transport_context *ctx;
@@ -404,6 +406,19 @@ ssl_connect_wget (int fd)
conn = SSL_new (ssl_ctx);
if (!conn)
goto error;
+#if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT)
+ /* If the SSL library was build with support for ServerNameIndication
+ then use it whenever we have a hostname. If not, don't, ever. */
+ if (! is_valid_ip_address (hostname))
+ {
+ if (! SSL_set_tlsext_host_name (conn, hostname))
+ {
+ DEBUGP (("Failed to set TLS server-name indication."));
+ goto error;
+ }
+ }
+#endif
+
#ifndef FD_TO_SOCKET
# define FD_TO_SOCKET(X) (X)
#endif
diff --git a/src/options.h b/src/options.h
index 252bf81..44e0a70 100644
--- a/src/options.h
+++ b/src/options.h
@@ -74,6 +74,19 @@ struct options
bool ignore_case; /* Whether to ignore case when
matching dirs and files */
+ char *acceptregex_s; /* Patterns to accept (a regex string). */
+ char *rejectregex_s; /* Patterns to reject (a regex string). */
+ void *acceptregex; /* Patterns to accept (a regex struct). */
+ void *rejectregex; /* Patterns to reject (a regex struct). */
+ enum {
+#ifdef HAVE_LIBPCRE
+ regex_type_pcre,
+#endif
+ regex_type_posix
+ } regex_type; /* The regex library. */
+ void *(*regex_compile_fun)(const char *); /* Function to compile a regex. */
+ bool (*regex_match_fun)(const void *, const char *); /* Function to match a string to a regex. */
+
char **domains; /* See host.c */
char **exclude_domains;
bool dns_cache; /* whether we cache DNS lookups. */
@@ -87,6 +100,15 @@ struct options
FTP. */
char *output_document; /* The output file to which the
documents will be printed. */
+ char *warc_filename; /* WARC output filename */
+ char *warc_tempdir; /* WARC temp dir */
+ char *warc_cdx_dedup_filename; /* CDX file to be used for deduplication. */
+ wgint warc_maxsize; /* WARC max archive size */
+ bool warc_compression_enabled; /* For GZIP compression. */
+ bool warc_digests_enabled; /* For SHA1 digests. */
+ bool warc_cdx_enabled; /* Create CDX files? */
+ bool warc_keep_log; /* Store the log file in a WARC record. */
+ char **warc_user_headers; /* User-defined WARC header(s). */
char *user; /* Generic username */
char *passwd; /* Generic password */
@@ -130,6 +152,8 @@ struct options
bool server_response; /* Do we print server response? */
bool save_headers; /* Do we save headers together with
file? */
+ bool content_on_error; /* Do we output the content when the HTTP
+ status code indicates a server error */
#ifdef ENABLE_DEBUG
bool debug; /* Debugging on/off */
@@ -255,6 +279,7 @@ struct options
bool show_all_dns_entries; /* Show all the DNS entries when resolving a
name. */
+ bool report_bps; /*Output bandwidth in bits format*/
};
extern struct options opt;
diff --git a/src/progress.c b/src/progress.c
index 219b5be..2e888a9 100644
--- a/src/progress.c
+++ b/src/progress.c
@@ -766,7 +766,7 @@ update_speed_ring (struct bar_progress *bp, wgint howmuch, double dltime)
}
#if USE_NLS_PROGRESS_BAR
-int
+static int
count_cols (const char *mbs)
{
wchar_t wc;
@@ -795,7 +795,7 @@ count_cols (const char *mbs)
# define count_cols(mbs) ((int)(strlen(mbs)))
#endif
-const char *
+static const char *
get_eta (int *bcd)
{
/* TRANSLATORS: "ETA" is English-centric, but this must
@@ -861,7 +861,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
struct bar_progress_hist *hist = &bp->hist;
/* The progress bar should look like this:
- xx% [=======> ] nn,nnn 12.34K/s eta 36m 51s
+ xx% [=======> ] nn,nnn 12.34KB/s eta 36m 51s
Calculate the geometry. The idea is to assign as much room as
possible to the progress bar. The other idea is to never let
@@ -873,7 +873,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
"xx% " or "100%" - percentage - 4 chars
"[]" - progress bar decorations - 2 chars
" nnn,nnn,nnn" - downloaded bytes - 12 chars or very rarely more
- " 12.5K/s" - download rate - 8 chars
+ " 12.5KB/s" - download rate - 9 chars
" eta 36m 51s" - ETA - 14 chars
"=====>..." - progress bar - the rest
@@ -977,10 +977,11 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
*p++ = ' ';
}
- /* " 12.52K/s" */
+ /* " 12.52Kb/s or 12.52KB/s" */
if (hist->total_time > 0 && hist->total_bytes)
{
- static const char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
+ static const char *short_units[] = { "B/s", "KB/s", "MB/s", "GB/s" };
+ static const char *short_units_bits[] = { "b/s", "Kb/s", "Mb/s", "Gb/s" };
int units = 0;
/* Calculate the download speed using the history ring and
recent data that hasn't made it to the ring yet. */
@@ -988,7 +989,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
double dltime = hist->total_time + (dl_total_time - bp->recent_start);
double dlspeed = calc_rate (dlquant, dltime, &units);
sprintf (p, " %4.*f%s", dlspeed >= 99.95 ? 0 : dlspeed >= 9.995 ? 1 : 2,
- dlspeed, short_units[units]);
+ dlspeed, !opt.report_bps ? short_units[units] : short_units_bits[units]);
move_to_end (p);
}
else
diff --git a/src/ptimer.c b/src/ptimer.c
index c06e8b9..c53b5e7 100644
--- a/src/ptimer.c
+++ b/src/ptimer.c
@@ -59,9 +59,7 @@ as that of the covered work. */
#include <errno.h>
#include <unistd.h>
#include <time.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
+#include <sys/time.h>
/* Cygwin currently (as of 2005-04-08, Cygwin 1.5.14) lacks clock_getres,
but still defines _POSIX_TIMERS! Because of that we simply use the
diff --git a/src/recur.c b/src/recur.c
index 139fe2e..72274fb 100644
--- a/src/recur.c
+++ b/src/recur.c
@@ -586,6 +586,11 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
goto out;
}
}
+ if (!accept_url (url))
+ {
+ DEBUGP (("%s is excluded/not-included through regex.\n", url));
+ goto out;
+ }
/* 6. Check for acceptance/rejection rules. We ignore these rules
for directories (no file name to match) and for non-leaf HTMLs,
diff --git a/src/retr.c b/src/retr.c
index 7394765..6204839 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -139,13 +139,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
amount of data and decrease SKIP. Increment *TOTAL by the amount
- of data written. */
+ of data written. If OUT2 is not NULL, also write BUF to OUT2.
+ In case of error writing to OUT, -1 is returned. In case of error
+ writing to OUT2, -2 is returned. In case of any other error,
+ 1 is returned. */
static int
-write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
- wgint *written)
+write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
+ wgint *skip, wgint *written)
{
- if (!out)
+ if (out == NULL && out2 == NULL)
return 1;
if (*skip > bufsize)
{
@@ -161,7 +164,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
return 1;
}
- fwrite (buf, 1, bufsize, out);
+ if (out != NULL)
+ fwrite (buf, 1, bufsize, out);
+ if (out2 != NULL)
+ fwrite (buf, 1, bufsize, out2);
*written += bufsize;
/* Immediately flush the downloaded data. This should not hinder
@@ -178,9 +184,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
actual justification. (Also, why 16K? Anyone test other values?)
*/
#ifndef __VMS
- fflush (out);
+ if (out != NULL)
+ fflush (out);
+ if (out2 != NULL)
+ fflush (out2);
#endif /* ndef __VMS */
- return !ferror (out);
+ if (out != NULL && ferror (out))
+ return -1;
+ else if (out2 != NULL && ferror (out2))
+ return -2;
+ else
+ return 0;
}
/* Read the contents of file descriptor FD until it the connection
@@ -198,13 +212,20 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
the amount of data written to disk. The time it took to download
the data is stored to ELAPSED.
+ If OUT2 is non-NULL, the contents is also written to OUT2.
+ OUT2 will get an exact copy of the response: if this is a chunked
+ response, everything -- including the chunk headers -- is written
+ to OUT2. (OUT will only get the unchunked response.)
+
The function exits and returns the amount of data read. In case of
error while reading data, -1 is returned. In case of error while
- writing data, -2 is returned. */
+ writing data to OUT, -2 is returned. In case of error while writing
+ data to OUT2, -3 is returned. */
int
fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
- wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
+ wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
+ FILE *out2)
{
int ret = 0;
#undef max
@@ -287,13 +308,24 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
ret = -1;
break;
}
+ else if (out2 != NULL)
+ fwrite (line, 1, strlen (line), out2);
remaining_chunk_size = strtol (line, &endl, 16);
+ xfree (line);
+
if (remaining_chunk_size == 0)
{
ret = 0;
- if (fd_read_line (fd) == NULL)
+ line = fd_read_line (fd);
+ if (line == NULL)
ret = -1;
+ else
+ {
+ if (out2 != NULL)
+ fwrite (line, 1, strlen (line), out2);
+ xfree (line);
+ }
break;
}
}
@@ -343,20 +375,30 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
if (ret > 0)
{
sum_read += ret;
- if (!write_data (out, dlbuf, ret, &skip, &sum_written))
+ int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
+ if (write_res != 0)
{
- ret = -2;
+ ret = (write_res == -3) ? -3 : -2;
goto out;
}
if (chunked)
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
- if (fd_read_line (fd) == NULL)
- {
- ret = -1;
- break;
- }
+ {
+ char *line = fd_read_line (fd);
+ if (line == NULL)
+ {
+ ret = -1;
+ break;
+ }
+ else
+ {
+ if (out2 != NULL)
+ fwrite (line, 1, strlen (line), out2);
+ xfree (line);
+ }
+ }
}
}
@@ -578,6 +620,7 @@ retr_rate (wgint bytes, double secs)
{
static char res[20];
static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
+ static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
int units;
double dlrate = calc_rate (bytes, secs, &units);
@@ -585,7 +628,7 @@ retr_rate (wgint bytes, double secs)
e.g. "1022", "247", "12.5", "2.38". */
sprintf (res, "%.*f %s",
dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
- dlrate, rate_names[units]);
+ dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
return res;
}
@@ -602,6 +645,11 @@ double
calc_rate (wgint bytes, double secs, int *units)
{
double dlrate;
+ double bibyte = 1000.0;
+
+ if (!opt.report_bps)
+ bibyte = 1024.0;
+
assert (secs >= 0);
assert (bytes >= 0);
@@ -613,16 +661,17 @@ calc_rate (wgint bytes, double secs, int *units)
0 and the timer's resolution, assume half the resolution. */
secs = ptimer_resolution () / 2.0;
- dlrate = bytes / secs;
- if (dlrate < 1024.0)
+ dlrate = convert_to_bits (bytes) / secs;
+ if (dlrate < bibyte)
*units = 0;
- else if (dlrate < 1024.0 * 1024.0)
- *units = 1, dlrate /= 1024.0;
- else if (dlrate < 1024.0 * 1024.0 * 1024.0)
- *units = 2, dlrate /= (1024.0 * 1024.0);
+ else if (dlrate < (bibyte * bibyte))
+ *units = 1, dlrate /= bibyte;
+ else if (dlrate < (bibyte * bibyte * bibyte))
+ *units = 2, dlrate /= (bibyte * bibyte);
+
else
/* Maybe someone will need this, one day. */
- *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
+ *units = 3, dlrate /= (bibyte * bibyte * bibyte);
return dlrate;
}
@@ -883,10 +932,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
- register_html (u->url, local_file);
+ register_html (local_file);
if (*dt & TEXTCSS)
- register_css (u->url, local_file);
+ register_css (local_file);
}
if (file)
diff --git a/src/retr.h b/src/retr.h
index 7329b03..22ab9ec 100644
--- a/src/retr.h
+++ b/src/retr.h
@@ -50,7 +50,7 @@ enum {
rb_chunked_transfer_encoding = 4
};
-int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
+int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
diff --git a/src/spider.c b/src/spider.c
index ae2f392..dad9a23 100644
--- a/src/spider.c
+++ b/src/spider.c
@@ -45,7 +45,7 @@ static struct hash_table *nonexisting_urls_set;
/* Cleanup the data structures associated with this file. */
-void
+static void
spider_cleanup (void)
{
if (nonexisting_urls_set)
diff --git a/src/ssl.h b/src/ssl.h
index 0532c40..e365c4f 100644
--- a/src/ssl.h
+++ b/src/ssl.h
@@ -1,6 +1,6 @@
/* SSL support.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
- 2009, 2010, 2011 Free Software Foundation, Inc.
+ 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
Originally contributed by Christian Fraenkel.
This file is part of GNU Wget.
@@ -33,7 +33,7 @@ as that of the covered work. */
#define GEN_SSLFUNC_H
bool ssl_init (void);
-bool ssl_connect_wget (int);
+bool ssl_connect_wget (int, const char *);
bool ssl_check_certificate (int, const char *);
#endif /* GEN_SSLFUNC_H */
diff --git a/src/test.c b/src/test.c
index e7ce54c..80abaff 100644
--- a/src/test.c
+++ b/src/test.c
@@ -46,6 +46,8 @@ const char *test_append_uri_pathel();
const char *test_are_urls_equal();
const char *test_is_robots_txt_url();
+const char *program_argstring = "TEST";
+
int tests_run;
static const char *
diff --git a/src/url.c b/src/url.c
index 2593d09..e44dfcd 100644
--- a/src/url.c
+++ b/src/url.c
@@ -1502,9 +1502,9 @@ url_file_name (const struct url *u, char *replaced_filename)
{
struct growable fnres; /* stands for "file name result" */
- const char *u_file, *u_query;
+ const char *u_file;
char *fname, *unique;
- char *index_filename = "index.html"; /* The default index file is index.html */
+ const char *index_filename = "index.html"; /* The default index file is index.html */
fnres.base = NULL;
fnres.size = 0;
@@ -1561,12 +1561,11 @@ url_file_name (const struct url *u, char *replaced_filename)
u_file = *u->file ? u->file : index_filename;
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
- /* Append "?query" to the file name. */
- u_query = u->query && *u->query ? u->query : NULL;
- if (u_query)
+ /* Append "?query" to the file name, even if empty */
+ if (u->query)
{
append_char (FN_QUERY_SEP, &fnres);
- append_uri_pathel (u_query, u_query + strlen (u_query),
+ append_uri_pathel (u->query, u->query + strlen (u->query),
true, &fnres);
}
}
diff --git a/src/utils.c b/src/utils.c
index 4950ab2..567dc35 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -59,12 +59,12 @@ as that of the covered work. */
# endif
#endif
+#include <sys/time.h>
+
#include <sys/stat.h>
/* For TIOCGWINSZ and friends: */
-#ifdef HAVE_SYS_IOCTL_H
-# include <sys/ioctl.h>
-#endif
+#include <sys/ioctl.h>
#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
@@ -73,6 +73,11 @@ as that of the covered work. */
#include <signal.h>
#include <setjmp.h>
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
+
#ifndef HAVE_SIGSETJMP
/* If sigsetjmp is a macro, configure won't pick it up. */
# ifdef sigsetjmp
@@ -769,8 +774,7 @@ fopen_excl (const char *fname, int binary)
open_id = 13;
fd = open( fname, /* File name. */
flags, /* Flags. */
- 0777, /* Mode for default protection.
-*/
+ 0777, /* Mode for default protection. */
"rfm=stmlf", /* Stream_LF. */
OPEN_OPT_ARGS); /* Access callback. */
}
@@ -918,6 +922,19 @@ acceptable (const char *s)
return true;
}
+/* Determine whether an URL is acceptable to be followed, according to
+ regex patterns to accept/reject. */
+bool
+accept_url (const char *s)
+{
+ if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
+ return false;
+ if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
+ return false;
+
+ return true;
+}
+
/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
will return true if and only if D2 begins with `/something/' or is exactly
'/something'. */
@@ -1826,6 +1843,17 @@ number_to_static_string (wgint number)
ringpos = (ringpos + 1) % RING_SIZE;
return buf;
}
+
+/* Converts the byte to bits format if --report-bps option is enabled
+ */
+wgint
+convert_to_bits (wgint num)
+{
+ if (opt.report_bps)
+ return num * 8;
+ return num;
+}
+
/* Determine the width of the terminal we're running on. If that's
not possible, return 0. */
@@ -2299,6 +2327,89 @@ base64_decode (const char *base64, void *dest)
return q - (char *) dest;
}
+#ifdef HAVE_LIBPCRE
+/* Compiles the PCRE regex. */
+void *
+compile_pcre_regex (const char *str)
+{
+ const char *errbuf;
+ int erroffset;
+ pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
+ if (! regex)
+ {
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ return false;
+ }
+ return regex;
+}
+#endif
+
+/* Compiles the POSIX regex. */
+void *
+compile_posix_regex (const char *str)
+{
+ regex_t *regex = xmalloc (sizeof (regex_t));
+ int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
+ if (errcode != 0)
+ {
+ int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
+ fprintf (stderr, _("Invalid regular expression %s, %s\n"),
+ quote (str), errbuf);
+ xfree (errbuf);
+ return NULL;
+ }
+
+ return regex;
+}
+
+#ifdef HAVE_LIBPCRE
+#define OVECCOUNT 30
+/* Matches a PCRE regex. */
+bool
+match_pcre_regex (const void *regex, const char *str)
+{
+ int l = strlen (str);
+ int ovector[OVECCOUNT];
+
+ int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
+ if (rc == PCRE_ERROR_NOMATCH)
+ return false;
+ else if (rc < 0)
+ {
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ return false;
+ }
+ else
+ return true;
+}
+#undef OVECCOUNT
+#endif
+
+/* Matches a POSIX regex. */
+bool
+match_posix_regex (const void *regex, const char *str)
+{
+ int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
+ if (rc == REG_NOMATCH)
+ return false;
+ else if (rc == 0)
+ return true;
+ else
+ {
+ int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
+ char *errbuf = xmalloc (errbuf_size);
+ regerror (rc, opt.acceptregex, errbuf, errbuf_size);
+ logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
+ quote (str), rc);
+ xfree (errbuf);
+ return false;
+ }
+}
+
#undef IS_ASCII
#undef NEXT_CHAR
diff --git a/src/utils.h b/src/utils.h
index 8b1a8a1..409cdc5 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -90,6 +90,7 @@ char *file_merge (const char *, const char *);
int fnmatch_nocase (const char *, const char *, int);
bool acceptable (const char *);
+bool accept_url (const char *);
bool accdir (const char *s);
char *suffix (const char *s);
bool match_tail (const char *, const char *, bool);
@@ -127,6 +128,7 @@ char *human_readable (HR_NUMTYPE);
int numdigit (wgint);
char *number_to_string (char *, wgint);
char *number_to_static_string (wgint);
+wgint convert_to_bits (wgint);
int determine_screen_width (void);
int random_number (int);
@@ -141,6 +143,14 @@ void xsleep (double);
int base64_encode (const void *, int, char *);
int base64_decode (const char *, void *);
+#ifdef HAVE_LIBPCRE
+void *compile_pcre_regex (const char *);
+bool match_pcre_regex (const void *, const char *);
+#endif
+
+void *compile_posix_regex (const char *);
+bool match_posix_regex (const void *, const char *);
+
void stable_sort (void *, size_t, size_t, int (*) (const void *, const void *));
const char *print_decimal (double);
diff --git a/src/warc.c b/src/warc.c
new file mode 100644
index 0000000..69f80be
--- /dev/null
+++ b/src/warc.c
@@ -0,0 +1,1440 @@
+/* Utility functions for writing WARC files.
+ Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of GNU Wget.
+
+GNU Wget is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+GNU Wget is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Wget. If not, see <http://www.gnu.org/licenses/>.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#define _GNU_SOURCE
+
+#include "wget.h"
+#include "hash.h"
+#include "utils.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <tmpdir.h>
+#include <sha1.h>
+#include <base32.h>
+#include <unistd.h>
+#ifdef HAVE_LIBZ
+#include <zlib.h>
+#endif
+#ifdef HAVE_LIBUUID
+#include <uuid/uuid.h>
+#endif
+
+#ifndef WINDOWS
+#include <libgen.h>
+#endif
+
+#include "warc.h"
+
+extern char *version_string;
+
+/* Set by main in main.c */
+extern char *program_argstring;
+
+
+/* The log file (a temporary file that contains a copy
+ of the wget log). */
+static FILE *warc_log_fp;
+
+/* The manifest file (a temporary file that contains the
+ warcinfo uuid of every file in this crawl). */
+static FILE *warc_manifest_fp;
+
+/* The current WARC file (or NULL, if WARC is disabled). */
+static FILE *warc_current_file;
+
+#ifdef HAVE_LIBZ
+/* The gzip stream for the current WARC file
+ (or NULL, if WARC or gzip is disabled). */
+static gzFile warc_current_gzfile;
+
+/* The offset of the current gzip record in the WARC file. */
+static off_t warc_current_gzfile_offset;
+
+/* The uncompressed size (so far) of the current record. */
+static off_t warc_current_gzfile_uncompressed_size;
+# endif
+
+/* This is true until a warc_write_* method fails. */
+static bool warc_write_ok;
+
+/* The current CDX file (or NULL, if CDX is disabled). */
+static FILE *warc_current_cdx_file;
+
+/* The record id of the warcinfo record of the current WARC file. */
+static char *warc_current_warcinfo_uuid_str;
+
+/* The file name of the current WARC file. */
+static char *warc_current_filename;
+
+/* The serial number of the current WARC file. This number is
+ incremented each time a new file is opened and is used in the
+ WARC file's filename. */
+static int warc_current_file_number;
+
+/* The table of CDX records, if deduplication is enabled. */
+struct hash_table * warc_cdx_dedup_table;
+
+static bool warc_start_new_file (bool meta);
+
+
+struct warc_cdx_record
+{
+ char *url;
+ char *uuid;
+ char digest[SHA1_DIGEST_SIZE];
+};
+
+static unsigned long
+warc_hash_sha1_digest (const void *key)
+{
+ /* We just use some of the first bytes of the digest. */
+ unsigned long v = 0;
+ memcpy (&v, key, sizeof (unsigned long));
+ return v;
+}
+
+static int
+warc_cmp_sha1_digest (const void *digest1, const void *digest2)
+{
+ return !memcmp (digest1, digest2, SHA1_DIGEST_SIZE);
+}
+
+
+
+/* Writes SIZE bytes from BUFFER to the current WARC file,
+ through gzwrite if compression is enabled.
+ Returns the number of uncompressed bytes written. */
+static size_t
+warc_write_buffer (const char *buffer, size_t size)
+{
+#ifdef HAVE_LIBZ
+ if (warc_current_gzfile)
+ {
+ warc_current_gzfile_uncompressed_size += size;
+ return gzwrite (warc_current_gzfile, buffer, size);
+ }
+ else
+#endif
+ return fwrite (buffer, 1, size, warc_current_file);
+}
+
+/* Writes STR to the current WARC file.
+ Returns false and set warc_write_ok to false if there
+ is an error. */
+static bool
+warc_write_string (const char *str)
+{
+ if (!warc_write_ok)
+ return false;
+
+ size_t n = strlen (str);
+ if (n != warc_write_buffer (str, n))
+ warc_write_ok = false;
+
+ return warc_write_ok;
+}
+
+
+#define EXTRA_GZIP_HEADER_SIZE 12
+#define GZIP_STATIC_HEADER_SIZE 10
+#define FLG_FEXTRA 0x04
+#define OFF_FLG 3
+
+/* Starts a new WARC record. Writes the version header.
+ If opt.warc_maxsize is set and the current file is becoming
+ too large, this will open a new WARC file.
+
+ If compression is enabled, this will start a new
+ gzip stream in the current WARC file.
+
+ Returns false and set warc_write_ok to false if there
+ is an error. */
+static bool
+warc_write_start_record (void)
+{
+ if (!warc_write_ok)
+ return false;
+
+ fflush (warc_current_file);
+ if (opt.warc_maxsize > 0 && ftello (warc_current_file) >= opt.warc_maxsize)
+ warc_start_new_file (false);
+
+#ifdef HAVE_LIBZ
+ /* Start a GZIP stream, if required. */
+ if (opt.warc_compression_enabled)
+ {
+ /* Record the starting offset of the new record. */
+ warc_current_gzfile_offset = ftello (warc_current_file);
+
+ /* Reserve space for the extra GZIP header field.
+ In warc_write_end_record we will fill this space
+ with information about the uncompressed and
+ compressed size of the record. */
+ fprintf (warc_current_file, "XXXXXXXXXXXX");
+ fflush (warc_current_file);
+
+ /* Start a new GZIP stream. */
+ warc_current_gzfile = gzdopen (dup (fileno (warc_current_file)), "wb9");
+ warc_current_gzfile_uncompressed_size = 0;
+
+ if (warc_current_gzfile == NULL)
+ {
+ logprintf (LOG_NOTQUIET,
+_("Error opening GZIP stream to WARC file.\n"));
+ warc_write_ok = false;
+ return false;
+ }
+ }
+#endif
+
+ warc_write_string ("WARC/1.0\r\n");
+ return warc_write_ok;
+}
+
+/* Writes a WARC header to the current WARC record.
+ This method may be run after warc_write_start_record and
+ before warc_write_block_from_file. */
+static bool
+warc_write_header (const char *name, const char *value)
+{
+ if (value)
+ {
+ warc_write_string (name);
+ warc_write_string (": ");
+ warc_write_string (value);
+ warc_write_string ("\r\n");
+ }
+ return warc_write_ok;
+}
+
+/* Copies the contents of DATA_IN to the WARC record.
+ Adds a Content-Length header to the WARC record.
+ Run this method after warc_write_header,
+ then run warc_write_end_record. */
+static bool
+warc_write_block_from_file (FILE *data_in)
+{
+ /* Add the Content-Length header. */
+ char *content_length;
+ fseeko (data_in, 0L, SEEK_END);
+ if (! asprintf (&content_length, "%ld", ftello (data_in)))
+ {
+ warc_write_ok = false;
+ return false;
+ }
+ warc_write_header ("Content-Length", content_length);
+ free (content_length);
+
+ /* End of the WARC header section. */
+ warc_write_string ("\r\n");
+
+ if (fseeko (data_in, 0L, SEEK_SET) != 0)
+ warc_write_ok = false;
+
+ /* Copy the data in the file to the WARC record. */
+ char buffer[BUFSIZ];
+ size_t s;
+ while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0)
+ {
+ if (warc_write_buffer (buffer, s) < s)
+ warc_write_ok = false;
+ }
+
+ return warc_write_ok;
+}
+
+/* Run this method to close the current WARC record.
+
+ If compression is enabled, this method closes the
+ current GZIP stream and fills the extra GZIP header
+ with the uncompressed and compressed length of the
+ record. */
+static bool
+warc_write_end_record (void)
+{
+ warc_write_buffer ("\r\n\r\n", 4);
+
+#ifdef HAVE_LIBZ
+ /* We start a new gzip stream for each record. */
+ if (warc_write_ok && warc_current_gzfile)
+ {
+ if (gzclose (warc_current_gzfile) != Z_OK)
+ {
+ warc_write_ok = false;
+ return false;
+ }
+
+ fflush (warc_current_file);
+ fseeko (warc_current_file, 0, SEEK_END);
+
+ /* The WARC standard suggests that we add 'skip length' data in the
+ extra header field of the GZIP stream.
+
+ In warc_write_start_record we reserved space for this extra header.
+ This extra space starts at warc_current_gzfile_offset and fills
+ EXTRA_GZIP_HEADER_SIZE bytes. The static GZIP header starts at
+ warc_current_gzfile_offset + EXTRA_GZIP_HEADER_SIZE.
+
+ We need to do three things:
+ 1. Move the static GZIP header to warc_current_gzfile_offset;
+ 2. Set the FEXTRA flag in the GZIP header;
+ 3. Write the extra GZIP header after the static header, that is,
+ starting at warc_current_gzfile_offset + GZIP_STATIC_HEADER_SIZE.
+ */
+
+ /* Calculate the uncompressed and compressed sizes. */
+ off_t current_offset = ftello (warc_current_file);
+ off_t uncompressed_size = current_offset - warc_current_gzfile_offset;
+ off_t compressed_size = warc_current_gzfile_uncompressed_size;
+
+ /* Go back to the static GZIP header. */
+ fseeko (warc_current_file, warc_current_gzfile_offset
+ + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
+
+ /* Read the header. */
+ char static_header[GZIP_STATIC_HEADER_SIZE];
+ size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
+ warc_current_file);
+ if (result != GZIP_STATIC_HEADER_SIZE)
+ {
+ warc_write_ok = false;
+ return false;
+ }
+
+ /* Set the FEXTRA flag in the flags byte of the header. */
+ static_header[OFF_FLG] = static_header[OFF_FLG] | FLG_FEXTRA;
+
+ /* Write the header back to the file, but starting at
+ warc_current_gzfile_offset. */
+ fseeko (warc_current_file, warc_current_gzfile_offset, SEEK_SET);
+ fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
+
+ /* Prepare the extra GZIP header. */
+ char extra_header[EXTRA_GZIP_HEADER_SIZE];
+ /* XLEN, the length of the extra header fields. */
+ extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255);
+ extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255;
+ /* The extra header field identifier for the WARC skip length. */
+ extra_header[2] = 's';
+ extra_header[3] = 'l';
+ /* The size of the uncompressed record. */
+ extra_header[4] = (uncompressed_size & 255);
+ extra_header[5] = (uncompressed_size >> 8) & 255;
+ extra_header[6] = (uncompressed_size >> 16) & 255;
+ extra_header[7] = (uncompressed_size >> 24) & 255;
+ /* The size of the compressed record. */
+ extra_header[8] = (compressed_size & 255);
+ extra_header[9] = (compressed_size >> 8) & 255;
+ extra_header[10] = (compressed_size >> 16) & 255;
+ extra_header[11] = (compressed_size >> 24) & 255;
+
+ /* Write the extra header after the static header. */
+ fseeko (warc_current_file, warc_current_gzfile_offset
+ + GZIP_STATIC_HEADER_SIZE, SEEK_SET);
+ fwrite (extra_header, 1, EXTRA_GZIP_HEADER_SIZE, warc_current_file);
+
+ /* Done, move back to the end of the file. */
+ fflush (warc_current_file);
+ fseeko (warc_current_file, 0, SEEK_END);
+ }
+#endif /* HAVE_LIBZ */
+
+ return warc_write_ok;
+}
+
+
+/* Writes the WARC-Date header for the given timestamp to
+ the current WARC record.
+ If timestamp is NULL, the current time will be used. */
+static bool
+warc_write_date_header (const char *timestamp)
+{
+ if (timestamp == NULL)
+ {
+ char current_timestamp[21];
+ warc_timestamp (current_timestamp);
+ timestamp = current_timestamp;
+ }
+ return warc_write_header ("WARC-Date", timestamp);
+}
+
+/* Writes the WARC-IP-Address header for the given IP to
+ the current WARC record. If IP is NULL, no header will
+ be written. */
+static bool
+warc_write_ip_header (ip_address *ip)
+{
+ if (ip != NULL)
+ return warc_write_header ("WARC-IP-Address", print_address (ip));
+ else
+ return warc_write_ok;
+}
+
+
+/* warc_sha1_stream_with_payload is a modified copy of sha1_stream
+ from gnulib/sha1.c. This version calculates two digests in one go.
+
+ Compute SHA1 message digests for bytes read from STREAM. The
+ digest of the complete file will be written into the 16 bytes
+ beginning at RES_BLOCK.
+
+ If payload_offset >= 0, a second digest will be calculated of the
+ portion of the file starting at payload_offset and continuing to
+ the end of the file. The digest number will be written into the
+ 16 bytes beginning ad RES_PAYLOAD. */
+static int
+warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
+ off_t payload_offset)
+{
+#define BLOCKSIZE 32768
+
+ struct sha1_ctx ctx_block;
+ struct sha1_ctx ctx_payload;
+ off_t pos;
+ off_t sum;
+
+ char *buffer = malloc (BLOCKSIZE + 72);
+ if (!buffer)
+ return 1;
+
+ /* Initialize the computation context. */
+ sha1_init_ctx (&ctx_block);
+ if (payload_offset >= 0)
+ sha1_init_ctx (&ctx_payload);
+
+ pos = 0;
+
+ /* Iterate over full file contents. */
+ while (1)
+ {
+ /* We read the file in blocks of BLOCKSIZE bytes. One call of the
+ computation function processes the whole buffer so that with the
+ next round of the loop another block can be read. */
+ off_t n;
+ sum = 0;
+
+ /* Read block. Take care for partial reads. */
+ while (1)
+ {
+ n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
+
+ sum += n;
+ pos += n;
+
+ if (sum == BLOCKSIZE)
+ break;
+
+ if (n == 0)
+ {
+ /* Check for the error flag IFF N == 0, so that we don't
+ exit the loop after a partial read due to e.g., EAGAIN
+ or EWOULDBLOCK. */
+ if (ferror (stream))
+ {
+ free (buffer);
+ return 1;
+ }
+ goto process_partial_block;
+ }
+
+ /* We've read at least one byte, so ignore errors. But always
+ check for EOF, since feof may be true even though N > 0.
+ Otherwise, we could end up calling fread after EOF. */
+ if (feof (stream))
+ goto process_partial_block;
+ }
+
+ /* Process buffer with BLOCKSIZE bytes. Note that
+ BLOCKSIZE % 64 == 0
+ */
+ sha1_process_block (buffer, BLOCKSIZE, &ctx_block);
+ if (payload_offset >= 0 && payload_offset < pos)
+ {
+ /* At least part of the buffer contains data from payload. */
+ off_t start_of_payload = payload_offset - (pos - BLOCKSIZE);
+ if (start_of_payload <= 0)
+ /* All bytes in the buffer belong to the payload. */
+ start_of_payload = 0;
+
+ /* Process the payload part of the buffer.
+ Note: we can't use sha1_process_block here even if we
+ process the complete buffer. Because the payload doesn't
+ have to start with a full block, there may still be some
+ bytes left from the previous buffer. Therefore, we need
+ to continue with sha1_process_bytes. */
+ sha1_process_bytes (buffer + start_of_payload,
+ BLOCKSIZE - start_of_payload, &ctx_payload);
+ }
+ }
+
+ process_partial_block:;
+
+ /* Process any remaining bytes. */
+ if (sum > 0)
+ {
+ sha1_process_bytes (buffer, sum, &ctx_block);
+ if (payload_offset >= 0 && payload_offset < pos)
+ {
+ /* At least part of the buffer contains data from payload. */
+ off_t start_of_payload = payload_offset - (pos - sum);
+ if (start_of_payload <= 0)
+ /* All bytes in the buffer belong to the payload. */
+ start_of_payload = 0;
+
+ /* Process the payload part of the buffer. */
+ sha1_process_bytes (buffer + start_of_payload,
+ sum - start_of_payload, &ctx_payload);
+ }
+ }
+
+ /* Construct result in desired memory. */
+ sha1_finish_ctx (&ctx_block, res_block);
+ if (payload_offset >= 0)
+ sha1_finish_ctx (&ctx_payload, res_payload);
+ free (buffer);
+ return 0;
+
+#undef BLOCKSIZE
+}
+
+/* Converts the SHA1 digest to a base32-encoded string.
+ "sha1:DIGEST\0" (Allocates a new string for the response.) */
+static char *
+warc_base32_sha1_digest (char *sha1_digest)
+{
+ // length: "sha1:" + digest + "\0"
+ char *sha1_base32 = malloc (BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5 );
+ base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5,
+ BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1);
+ memcpy (sha1_base32, "sha1:", 5);
+ sha1_base32[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5] = '\0';
+ return sha1_base32;
+}
+
+
+/* Sets the digest headers of the record.
+ This method will calculate the block digest and, if payload_offset >= 0,
+ will also calculate the payload digest of the payload starting at the
+ provided offset. */
+static void
+warc_write_digest_headers (FILE *file, long payload_offset)
+{
+ if (opt.warc_digests_enabled)
+ {
+ /* Calculate the block and payload digests. */
+ char sha1_res_block[SHA1_DIGEST_SIZE];
+ char sha1_res_payload[SHA1_DIGEST_SIZE];
+
+ rewind (file);
+ if (warc_sha1_stream_with_payload (file, sha1_res_block,
+ sha1_res_payload, payload_offset) == 0)
+ {
+ char *digest;
+
+ digest = warc_base32_sha1_digest (sha1_res_block);
+ warc_write_header ("WARC-Block-Digest", digest);
+ free (digest);
+
+ if (payload_offset >= 0)
+ {
+ digest = warc_base32_sha1_digest (sha1_res_payload);
+ warc_write_header ("WARC-Payload-Digest", digest);
+ free (digest);
+ }
+ }
+ }
+}
+
+
+/* Fills timestamp with the current time and date.
+ The UTC time is formatted following ISO 8601, as required
+ for use in the WARC-Date header.
+ The timestamp will be 21 characters long. */
+void
+warc_timestamp (char *timestamp)
+{
+ time_t rawtime;
+ struct tm * timeinfo;
+ time ( &rawtime );
+ timeinfo = gmtime (&rawtime);
+ strftime (timestamp, 21, "%Y-%m-%dT%H:%M:%SZ", timeinfo);
+}
+
+#ifdef HAVE_LIBUUID
+/* Fills urn_str with a UUID in the format required
+ for the WARC-Record-Id header.
+ The string will be 47 characters long. */
+void
+warc_uuid_str (char *urn_str)
+{
+ char uuid_str[37];
+
+ uuid_t record_id;
+ uuid_generate (record_id);
+ uuid_unparse (record_id, uuid_str);
+
+ sprintf (urn_str, "<urn:uuid:%s>", uuid_str);
+}
+#else
+/* Fills urn_str with a UUID based on random numbers in the format
+ required for the WARC-Record-Id header.
+ (See RFC 4122, UUID version 4.)
+
+ Note: this is a fallback method, it is much better to use the
+ methods provided by libuuid.
+
+ The string will be 47 characters long. */
+void
+warc_uuid_str (char *urn_str)
+{
+ // RFC 4122, a version 4 UUID with only random numbers
+
+ unsigned char uuid_data[16];
+ int i;
+ for (i=0; i<16; i++)
+ uuid_data[i] = random_number (255);
+
+ // Set the four most significant bits (bits 12 through 15) of the
+ // time_hi_and_version field to the 4-bit version number
+ uuid_data[6] = (uuid_data[6] & 0x0F) | 0x40;
+
+ // Set the two most significant bits (bits 6 and 7) of the
+ // clock_seq_hi_and_reserved to zero and one, respectively.
+ uuid_data[8] = (uuid_data[8] & 0xBF) | 0x80;
+
+ sprintf (urn_str,
+ "<urn:uuid:%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x>",
+ uuid_data[0], uuid_data[1], uuid_data[2], uuid_data[3], uuid_data[4],
+ uuid_data[5], uuid_data[6], uuid_data[7], uuid_data[8], uuid_data[9],
+ uuid_data[10], uuid_data[11], uuid_data[12], uuid_data[13], uuid_data[14],
+ uuid_data[15]);
+}
+#endif
+
+/* Write a warcinfo record to the current file.
+ Updates warc_current_warcinfo_uuid_str. */
+static bool
+warc_write_warcinfo_record (char *filename)
+{
+ /* Write warc-info record as the first record of the file. */
+ /* We add the record id of this info record to the other records in the
+ file. */
+ warc_current_warcinfo_uuid_str = (char *) malloc (48);
+ warc_uuid_str (warc_current_warcinfo_uuid_str);
+
+ char timestamp[22];
+ warc_timestamp (timestamp);
+
+ char *filename_copy, *filename_basename;
+ filename_copy = strdup (filename);
+ filename_basename = strdup (basename (filename_copy));
+
+ warc_write_start_record ();
+ warc_write_header ("WARC-Type", "warcinfo");
+ warc_write_header ("Content-Type", "application/warc-fields");
+ warc_write_header ("WARC-Date", timestamp);
+ warc_write_header ("WARC-Record-ID", warc_current_warcinfo_uuid_str);
+ warc_write_header ("WARC-Filename", filename_basename);
+
+ /* Create content. */
+ FILE *warc_tmp = warc_tempfile ();
+ if (warc_tmp == NULL)
+ {
+ free (filename_copy);
+ free (filename_basename);
+ return false;
+ }
+
+ fprintf (warc_tmp, "software: Wget/%s (%s)\r\n", version_string, OS_TYPE);
+ fprintf (warc_tmp, "format: WARC File Format 1.0\r\n");
+ fprintf (warc_tmp,
+"conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\r\n");
+ fprintf (warc_tmp, "robots: %s\r\n", (opt.use_robots ? "classic" : "off"));
+ fprintf (warc_tmp, "wget-arguments: %s\r\n", program_argstring);
+ /* Add the user headers, if any. */
+ if (opt.warc_user_headers)
+ {
+ int i;
+ for (i = 0; opt.warc_user_headers[i]; i++)
+ fprintf (warc_tmp, "%s\r\n", opt.warc_user_headers[i]);
+ }
+ fprintf(warc_tmp, "\r\n");
+
+ warc_write_digest_headers (warc_tmp, -1);
+ warc_write_block_from_file (warc_tmp);
+ warc_write_end_record ();
+
+ if (! warc_write_ok)
+ logprintf (LOG_NOTQUIET, _("Error writing warcinfo record to WARC file.\n"));
+
+ free (filename_copy);
+ free (filename_basename);
+ fclose (warc_tmp);
+ return warc_write_ok;
+}
+
+/* Opens a new WARC file.
+ If META is true, generates a filename ending with 'meta.warc.gz'.
+
+ This method will:
+ 1. close the current WARC file (if there is one);
+ 2. increment warc_current_file_number;
+ 3. open a new WARC file;
+ 4. write the initial warcinfo record.
+
+ Returns true on success, false otherwise.
+ */
+static bool
+warc_start_new_file (bool meta)
+{
+ if (opt.warc_filename == NULL)
+ return false;
+
+ if (warc_current_file != NULL)
+ fclose (warc_current_file);
+ if (warc_current_warcinfo_uuid_str)
+ free (warc_current_warcinfo_uuid_str);
+ if (warc_current_filename)
+ free (warc_current_filename);
+
+ warc_current_file_number++;
+
+ int base_filename_length = strlen (opt.warc_filename);
+ /* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
+ char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
+ warc_current_filename = new_filename;
+
+#ifdef HAVE_LIBZ
+ const char *extension = (opt.warc_compression_enabled ? "warc.gz" : "warc");
+#else
+ const char *extension = "warc";
+#endif
+
+ /* If max size is enabled, we add a serial number to the file names. */
+ if (meta)
+ sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
+ else if (opt.warc_maxsize > 0)
+ {
+ sprintf (new_filename, "%s-%05d.%s", opt.warc_filename,
+ warc_current_file_number, extension);
+ }
+ else
+ sprintf (new_filename, "%s.%s", opt.warc_filename, extension);
+
+ logprintf (LOG_VERBOSE, _("Opening WARC file %s.\n\n"), quote (new_filename));
+
+ /* Open the WARC file. */
+ warc_current_file = fopen (new_filename, "wb+");
+ if (warc_current_file == NULL)
+ {
+ logprintf (LOG_NOTQUIET, _("Error opening WARC file %s.\n"),
+ quote (new_filename));
+ return false;
+ }
+
+ if (! warc_write_warcinfo_record (new_filename))
+ return false;
+
+ /* Add warcinfo uuid to manifest. */
+ if (warc_manifest_fp)
+ fprintf (warc_manifest_fp, "%s\n", warc_current_warcinfo_uuid_str);
+
+ return true;
+}
+
+/* Opens the CDX file for output. */
+static bool
+warc_start_cdx_file (void)
+{
+ int filename_length = strlen (opt.warc_filename);
+ char *cdx_filename = alloca (filename_length + 4 + 1);
+ memcpy (cdx_filename, opt.warc_filename, filename_length);
+ memcpy (cdx_filename + filename_length, ".cdx", 5);
+ warc_current_cdx_file = fopen (cdx_filename, "a+");
+ if (warc_current_cdx_file == NULL)
+ return false;
+
+ /* Print the CDX header.
+ *
+ * a - original url
+ * b - date
+ * m - mime type
+ * s - response code
+ * k - new style checksum
+ * r - redirect
+ * M - meta tags
+ * V - compressed arc file offset
+ * g - file name
+ * u - record-id
+ */
+ fprintf (warc_current_cdx_file, " CDX a b a m s k r M V g u\n");
+ fflush (warc_current_cdx_file);
+
+ return true;
+}
+
+#define CDX_FIELDSEP " \t\r\n"
+
+/* Parse the CDX header and find the field numbers of the original url,
+ checksum and record ID fields. */
+static bool
+warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
+ int *field_num_checksum, int *field_num_record_id)
+{
+ *field_num_original_url = -1;
+ *field_num_checksum = -1;
+ *field_num_record_id = -1;
+
+ char *token;
+ char *save_ptr;
+ token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
+
+ if (token != NULL && strcmp (token, "CDX") == 0)
+ {
+ int field_num = 0;
+ while (token != NULL)
+ {
+ token = strtok_r (NULL, CDX_FIELDSEP, &save_ptr);
+ if (token != NULL)
+ {
+ switch (token[0])
+ {
+ case 'a':
+ *field_num_original_url = field_num;
+ break;
+ case 'k':
+ *field_num_checksum = field_num;
+ break;
+ case 'u':
+ *field_num_record_id = field_num;
+ break;
+ }
+ }
+ field_num++;
+ }
+ }
+
+ return *field_num_original_url != -1
+ && *field_num_checksum != -1
+ && *field_num_record_id != -1;
+}
+
+/* Parse the CDX record and add it to the warc_cdx_dedup_table hash table. */
+static void
+warc_process_cdx_line (char *lineptr, int field_num_original_url,
+ int field_num_checksum, int field_num_record_id)
+{
+ char *original_url = NULL;
+ char *checksum = NULL;
+ char *record_id = NULL;
+
+ char *token;
+ char *save_ptr;
+ token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
+
+ /* Read this line to get the fields we need. */
+ int field_num = 0;
+ while (token != NULL)
+ {
+ char **val;
+ if (field_num == field_num_original_url)
+ val = &original_url;
+ else if (field_num == field_num_checksum)
+ val = &checksum;
+ else if (field_num == field_num_record_id)
+ val = &record_id;
+ else
+ val = NULL;
+
+ if (val != NULL)
+ *val = strdup (token);
+
+ token = strtok_r (NULL, CDX_FIELDSEP, &save_ptr);
+ field_num++;
+ }
+
+ if (original_url != NULL && checksum != NULL && record_id != NULL)
+ {
+ /* For some extra efficiency, we decode the base32 encoded
+ checksum value. This should produce exactly SHA1_DIGEST_SIZE
+ bytes. */
+ size_t checksum_l;
+ char * checksum_v;
+ base32_decode_alloc (checksum, strlen (checksum), &checksum_v,
+ &checksum_l);
+ free (checksum);
+
+ if (checksum_v != NULL && checksum_l == SHA1_DIGEST_SIZE)
+ {
+ /* This is a valid line with a valid checksum. */
+ struct warc_cdx_record *rec;
+ rec = malloc (sizeof (struct warc_cdx_record));
+ rec->url = original_url;
+ rec->uuid = record_id;
+ memcpy (rec->digest, checksum_v, SHA1_DIGEST_SIZE);
+ hash_table_put (warc_cdx_dedup_table, rec->digest, rec);
+ free (checksum_v);
+ }
+ else
+ {
+ free (original_url);
+ if (checksum_v != NULL)
+ free (checksum_v);
+ free (record_id);
+ }
+ }
+ else
+ {
+ xfree_null(checksum);
+ xfree_null(original_url);
+ xfree_null(record_id);
+ }
+}
+
+/* Loads the CDX file from opt.warc_cdx_dedup_filename and fills
+ the warc_cdx_dedup_table. */
+static bool
+warc_load_cdx_dedup_file (void)
+{
+ FILE *f = fopen (opt.warc_cdx_dedup_filename, "r");
+ if (f == NULL)
+ return false;
+
+ int field_num_original_url = -1;
+ int field_num_checksum = -1;
+ int field_num_record_id = -1;
+
+ char *lineptr = NULL;
+ size_t n = 0;
+ ssize_t line_length;
+
+ /* The first line should contain the CDX header.
+ Format: " CDX x x x x x"
+ where x are field type indicators. For our purposes, we only
+ need 'a' (the original url), 'k' (the SHA1 checksum) and
+ 'u' (the WARC record id). */
+ line_length = getline (&lineptr, &n, f);
+ if (line_length != -1)
+ warc_parse_cdx_header (lineptr, &field_num_original_url,
+ &field_num_checksum, &field_num_record_id);
+
+ /* If the file contains all three fields, read the complete file. */
+ if (field_num_original_url == -1
+ || field_num_checksum == -1
+ || field_num_record_id == -1)
+ {
+ if (field_num_original_url == -1)
+ logprintf (LOG_NOTQUIET,
+_("CDX file does not list original urls. (Missing column 'a'.)\n"));
+ if (field_num_checksum == -1)
+ logprintf (LOG_NOTQUIET,
+_("CDX file does not list checksums. (Missing column 'k'.)\n"));
+ if (field_num_record_id == -1)
+ logprintf (LOG_NOTQUIET,
+_("CDX file does not list record ids. (Missing column 'u'.)\n"));
+ }
+ else
+ {
+ /* Initialize the table. */
+ warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
+ warc_cmp_sha1_digest);
+
+ do
+ {
+ line_length = getline (&lineptr, &n, f);
+ if (line_length != -1)
+ {
+ warc_process_cdx_line (lineptr, field_num_original_url,
+ field_num_checksum, field_num_record_id);
+ }
+
+ }
+ while (line_length != -1);
+
+ /* Print results. */
+ int nrecords = hash_table_count (warc_cdx_dedup_table);
+ logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
+ "Loaded %d records from CDX.\n\n",
+ nrecords),
+ nrecords);
+ }
+
+ free (lineptr);
+ fclose (f);
+
+ return true;
+}
+#undef CDX_FIELDSEP
+
+/* Returns the existing duplicate CDX record for the given url and payload
+ digest. Returns NULL if the url is not found or if the payload digest
+ does not match, or if CDX deduplication is disabled. */
+static struct warc_cdx_record *
+warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
+{
+ if (warc_cdx_dedup_table == NULL)
+ return NULL;
+
+ char *key;
+ struct warc_cdx_record *rec_existing;
+ int found = hash_table_get_pair (warc_cdx_dedup_table, sha1_digest_payload,
+ &key, &rec_existing);
+
+ if (found && strcmp (rec_existing->url, url) == 0)
+ return rec_existing;
+ else
+ return NULL;
+}
+
+/* Initializes the WARC writer (if opt.warc_filename is set).
+ This should be called before any WARC record is written. */
+void
+warc_init (void)
+{
+ warc_write_ok = true;
+
+ if (opt.warc_filename != NULL)
+ {
+ if (opt.warc_cdx_dedup_filename != NULL)
+ {
+ if (! warc_load_cdx_dedup_file ())
+ {
+ logprintf (LOG_NOTQUIET,
+ _("Could not read CDX file %s for deduplication.\n"),
+ quote (opt.warc_cdx_dedup_filename));
+ exit(1);
+ }
+ }
+
+ warc_manifest_fp = warc_tempfile ();
+ if (warc_manifest_fp == NULL)
+ {
+ logprintf (LOG_NOTQUIET,
+ _("Could not open temporary WARC manifest file.\n"));
+ exit(1);
+ }
+
+ if (opt.warc_keep_log)
+ {
+ warc_log_fp = warc_tempfile ();
+ if (warc_log_fp == NULL)
+ {
+ logprintf (LOG_NOTQUIET,
+ _("Could not open temporary WARC log file.\n"));
+ exit(1);
+ }
+ log_set_warc_log_fp (warc_log_fp);
+ }
+
+ warc_current_file_number = -1;
+ if (! warc_start_new_file (false))
+ {
+ logprintf (LOG_NOTQUIET, _("Could not open WARC file.\n"));
+ exit(1);
+ }
+
+ if (opt.warc_cdx_enabled)
+ {
+ if (! warc_start_cdx_file ())
+ {
+ logprintf (LOG_NOTQUIET,
+ _("Could not open CDX file for output.\n"));
+ exit(1);
+ }
+ }
+ }
+}
+
+/* Writes metadata (manifest, configuration, log file) to the WARC file. */
+static void
+warc_write_metadata (void)
+{
+ /* If there are multiple WARC files, the metadata should be written to a separate file. */
+ if (opt.warc_maxsize > 0)
+ warc_start_new_file (true);
+
+ char manifest_uuid [48];
+ warc_uuid_str (manifest_uuid);
+
+ fflush (warc_manifest_fp);
+ warc_write_resource_record (manifest_uuid,
+ "metadata://gnu.org/software/wget/warc/MANIFEST.txt",
+ NULL, NULL, NULL, "text/plain",
+ warc_manifest_fp, -1);
+ /* warc_write_resource_record has closed warc_manifest_fp. */
+
+ FILE * warc_tmp_fp = warc_tempfile ();
+ if (warc_tmp_fp == NULL)
+ {
+ logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n"));
+ exit(1);
+ }
+ fflush (warc_tmp_fp);
+ fprintf (warc_tmp_fp, "%s\n", program_argstring);
+
+ warc_write_resource_record (manifest_uuid,
+ "metadata://gnu.org/software/wget/warc/wget_arguments.txt",
+ NULL, NULL, NULL, "text/plain",
+ warc_tmp_fp, -1);
+ /* warc_write_resource_record has closed warc_tmp_fp. */
+
+ if (warc_log_fp != NULL)
+ {
+ warc_write_resource_record (NULL,
+ "metadata://gnu.org/software/wget/warc/wget.log",
+ NULL, manifest_uuid, NULL, "text/plain",
+ warc_log_fp, -1);
+ /* warc_write_resource_record has closed warc_log_fp. */
+
+ warc_log_fp = NULL;
+ log_set_warc_log_fp (NULL);
+ }
+}
+
+/* Finishes the WARC writing.
+ This should be called at the end of the program. */
+void
+warc_close (void)
+{
+ if (warc_current_file != NULL)
+ {
+ warc_write_metadata ();
+ free (warc_current_warcinfo_uuid_str);
+ fclose (warc_current_file);
+ }
+ if (warc_current_cdx_file != NULL)
+ fclose (warc_current_cdx_file);
+ if (warc_log_fp != NULL)
+ {
+ fclose (warc_log_fp);
+ log_set_warc_log_fp (NULL);
+ }
+}
+
+/* Creates a temporary file for writing WARC output.
+ The temporary file will be created in opt.warc_tempdir.
+ Returns the pointer to the temporary file, or NULL. */
+FILE *
+warc_tempfile (void)
+{
+ char filename[100];
+ if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
+ return NULL;
+
+ int fd = mkstemp (filename);
+ if (fd < 0)
+ return NULL;
+
+ if (unlink (filename) < 0)
+ return NULL;
+
+ return fdopen (fd, "wb+");
+}
+
+
+/* Writes a request record to the WARC file.
+ url is the target uri of the request,
+ timestamp_str is the timestamp of the request (generated with warc_timestamp),
+ record_uuid is the uuid of the request (generated with warc_uuid_str),
+ body is a pointer to a file containing the request headers and body.
+ ip is the ip address of the server (or NULL),
+ Calling this function will close body.
+ Returns true on success, false on error. */
+bool
+warc_write_request_record (char *url, char *timestamp_str, char *record_uuid,
+ ip_address *ip, FILE *body, off_t payload_offset)
+{
+ warc_write_start_record ();
+ warc_write_header ("WARC-Type", "request");
+ warc_write_header ("WARC-Target-URI", url);
+ warc_write_header ("Content-Type", "application/http;msgtype=request");
+ warc_write_date_header (timestamp_str);
+ warc_write_header ("WARC-Record-ID", record_uuid);
+ warc_write_ip_header (ip);
+ warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
+ warc_write_digest_headers (body, payload_offset);
+ warc_write_block_from_file (body);
+ warc_write_end_record ();
+
+ fclose (body);
+
+ return warc_write_ok;
+}
+
+/* Writes a response record to the CDX file.
+ url is the target uri of the request/response,
+ timestamp_str is the timestamp of the request that generated this response,
+ (generated with warc_timestamp),
+ mime_type is the mime type of the response body (will be printed to CDX),
+ response_code is the HTTP response code (will be printed to CDX),
+ payload_digest is the sha1 digest of the payload,
+ redirect_location is the contents of the Location: header, or NULL (will be printed to CDX),
+ offset is the position of the WARC record in the WARC file,
+ warc_filename is the filename of the WARC,
+ response_uuid is the uuid of the response.
+ Returns true on success, false on error. */
+static bool
+warc_write_cdx_record (const char *url, const char *timestamp_str,
+ const char *mime_type, int response_code,
+ const char *payload_digest, const char *redirect_location,
+ off_t offset, const char *warc_filename,
+ const char *response_uuid)
+{
+ /* Transform the timestamp. */
+ char timestamp_str_cdx [15];
+ memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */
+ memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */
+ memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */
+ memcpy (timestamp_str_cdx + 8, timestamp_str + 11, 2); /* "HH" ":" */
+ memcpy (timestamp_str_cdx + 10, timestamp_str + 14, 2); /* "MM" ":" */
+ memcpy (timestamp_str_cdx + 12, timestamp_str + 17, 2); /* "SS" "Z" */
+ timestamp_str_cdx[14] = '\0';
+
+ /* Rewrite the checksum. */
+ const char *checksum;
+ if (payload_digest != NULL)
+ checksum = payload_digest + 5; /* Skip the "sha1:" */
+ else
+ checksum = "-";
+
+ if (mime_type == NULL || strlen(mime_type) == 0)
+ mime_type = "-";
+ if (redirect_location == NULL || strlen(redirect_location) == 0)
+ redirect_location = "-";
+
+ /* Print the CDX line. */
+ fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %ld %s %s\n", url,
+ timestamp_str_cdx, url, mime_type, response_code, checksum,
+ redirect_location, offset, warc_current_filename, response_uuid);
+ fflush (warc_current_cdx_file);
+
+ return true;
+}
+
+/* Writes a revisit record to the WARC file.
+ url is the target uri of the request/response,
+ timestamp_str is the timestamp of the request that generated this response
+ (generated with warc_timestamp),
+ concurrent_to_uuid is the uuid of the request for that generated this response
+ (generated with warc_uuid_str),
+ refers_to_uuid is the uuid of the original response
+ (generated with warc_uuid_str),
+ payload_digest is the sha1 digest of the payload,
+ ip is the ip address of the server (or NULL),
+ body is a pointer to a file containing the response headers (without payload).
+ Calling this function will close body.
+ Returns true on success, false on error. */
+static bool
+warc_write_revisit_record (char *url, char *timestamp_str,
+ char *concurrent_to_uuid, char *payload_digest,
+ char *refers_to, ip_address *ip, FILE *body)
+{
+ char revisit_uuid [48];
+ warc_uuid_str (revisit_uuid);
+
+ char *block_digest = NULL;
+ char sha1_res_block[SHA1_DIGEST_SIZE];
+ sha1_stream (body, sha1_res_block);
+ block_digest = warc_base32_sha1_digest (sha1_res_block);
+
+ warc_write_start_record ();
+ warc_write_header ("WARC-Type", "revisit");
+ warc_write_header ("WARC-Record-ID", revisit_uuid);
+ warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
+ warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
+ warc_write_header ("WARC-Refers-To", refers_to);
+ warc_write_header ("WARC-Profile", "http://netpreserve.org/warc/1.0/revisit/identical-payload-digest");
+ warc_write_header ("WARC-Truncated", "length");
+ warc_write_header ("WARC-Target-URI", url);
+ warc_write_date_header (timestamp_str);
+ warc_write_ip_header (ip);
+ warc_write_header ("Content-Type", "application/http;msgtype=response");
+ warc_write_header ("WARC-Block-Digest", block_digest);
+ warc_write_header ("WARC-Payload-Digest", payload_digest);
+ warc_write_block_from_file (body);
+ warc_write_end_record ();
+
+ fclose (body);
+ free (block_digest);
+
+ return warc_write_ok;
+}
+
+/* Writes a response record to the WARC file.
+ url is the target uri of the request/response,
+ timestamp_str is the timestamp of the request that generated this response
+ (generated with warc_timestamp),
+ concurrent_to_uuid is the uuid of the request for that generated this response
+ (generated with warc_uuid_str),
+ ip is the ip address of the server (or NULL),
+ body is a pointer to a file containing the response headers and body.
+ mime_type is the mime type of the response body (will be printed to CDX),
+ response_code is the HTTP response code (will be printed to CDX),
+ redirect_location is the contents of the Location: header, or NULL (will be printed to CDX),
+ Calling this function will close body.
+ Returns true on success, false on error. */
+bool
+warc_write_response_record (char *url, char *timestamp_str,
+ char *concurrent_to_uuid, ip_address *ip,
+ FILE *body, off_t payload_offset, char *mime_type,
+ int response_code, char *redirect_location)
+{
+ char *block_digest = NULL;
+ char *payload_digest = NULL;
+ char sha1_res_block[SHA1_DIGEST_SIZE];
+ char sha1_res_payload[SHA1_DIGEST_SIZE];
+
+ if (opt.warc_digests_enabled)
+ {
+ /* Calculate the block and payload digests. */
+ rewind (body);
+ if (warc_sha1_stream_with_payload (body, sha1_res_block, sha1_res_payload,
+ payload_offset) == 0)
+ {
+ /* Decide (based on url + payload digest) if we have seen this
+ data before. */
+ struct warc_cdx_record *rec_existing;
+ rec_existing = warc_find_duplicate_cdx_record (url, sha1_res_payload);
+ if (rec_existing != NULL)
+ {
+ bool result;
+
+ /* Found an existing record. */
+ logprintf (LOG_VERBOSE,
+ _("Found exact match in CDX file. Saving revisit record to WARC.\n"));
+
+ /* Remove the payload from the file. */
+ if (payload_offset > 0)
+ {
+ if (ftruncate (fileno (body), payload_offset) == -1)
+ return false;
+ }
+
+ /* Send the original payload digest. */
+ payload_digest = warc_base32_sha1_digest (sha1_res_payload);
+ result = warc_write_revisit_record (url, timestamp_str,
+ concurrent_to_uuid, payload_digest, rec_existing->uuid,
+ ip, body);
+ free (payload_digest);
+
+ return result;
+ }
+
+ block_digest = warc_base32_sha1_digest (sha1_res_block);
+ payload_digest = warc_base32_sha1_digest (sha1_res_payload);
+ }
+ }
+
+ /* Not a revisit, just store the record. */
+
+ char response_uuid [48];
+ warc_uuid_str (response_uuid);
+
+ fseeko (warc_current_file, 0L, SEEK_END);
+ off_t offset = ftello (warc_current_file);
+
+ warc_write_start_record ();
+ warc_write_header ("WARC-Type", "response");
+ warc_write_header ("WARC-Record-ID", response_uuid);
+ warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
+ warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
+ warc_write_header ("WARC-Target-URI", url);
+ warc_write_date_header (timestamp_str);
+ warc_write_ip_header (ip);
+ warc_write_header ("WARC-Block-Digest", block_digest);
+ warc_write_header ("WARC-Payload-Digest", payload_digest);
+ warc_write_header ("Content-Type", "application/http;msgtype=response");
+ warc_write_block_from_file (body);
+ warc_write_end_record ();
+
+ fclose (body);
+
+ if (warc_write_ok && opt.warc_cdx_enabled)
+ {
+ /* Add this record to the CDX. */
+ warc_write_cdx_record (url, timestamp_str, mime_type, response_code,
+ payload_digest, redirect_location, offset, warc_current_filename,
+ response_uuid);
+ }
+
+ if (block_digest)
+ free (block_digest);
+ if (payload_digest)
+ free (payload_digest);
+
+ return warc_write_ok;
+}
+
+/* Writes a resource record to the WARC file.
+ resource_uuid is the uuid of the resource (or NULL),
+ url is the target uri of the resource,
+ timestamp_str is the timestamp (generated with warc_timestamp),
+ concurrent_to_uuid is the uuid of the request for that generated this
+ resource (generated with warc_uuid_str) or NULL,
+ ip is the ip address of the server (or NULL),
+ content_type is the mime type of the body (or NULL),
+ body is a pointer to a file containing the resource data.
+ Calling this function will close body.
+ Returns true on success, false on error. */
+bool
+warc_write_resource_record (char *resource_uuid, const char *url,
+ const char *timestamp_str, const char *concurrent_to_uuid,
+ ip_address *ip, const char *content_type, FILE *body,
+ off_t payload_offset)
+{
+ if (resource_uuid == NULL)
+ {
+ resource_uuid = alloca (48);
+ warc_uuid_str (resource_uuid);
+ }
+
+ if (content_type == NULL)
+ content_type = "application/octet-stream";
+
+ warc_write_start_record ();
+ warc_write_header ("WARC-Type", "resource");
+ warc_write_header ("WARC-Record-ID", resource_uuid);
+ warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
+ warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
+ warc_write_header ("WARC-Target-URI", url);
+ warc_write_date_header (timestamp_str);
+ warc_write_ip_header (ip);
+ warc_write_digest_headers (body, payload_offset);
+ warc_write_header ("Content-Type", content_type);
+ warc_write_block_from_file (body);
+ warc_write_end_record ();
+
+ fclose (body);
+
+ return warc_write_ok;
+}
diff --git a/src/warc.h b/src/warc.h
new file mode 100644
index 0000000..eba640d
--- /dev/null
+++ b/src/warc.h
@@ -0,0 +1,23 @@
+/* Declarations of WARC helper methods. */
+#ifndef WARC_H
+#define WARC_H
+
+#include "host.h"
+
+void warc_init (void);
+void warc_close (void);
+void warc_timestamp (char *timestamp);
+void warc_uuid_str (char *id_str);
+
+FILE * warc_tempfile (void);
+
+bool warc_write_request_record (char *url, char *timestamp_str,
+ char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset);
+bool warc_write_response_record (char *url, char *timestamp_str,
+ char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset,
+ char *mime_type, int response_code, char *redirect_location);
+bool warc_write_resource_record (char *resource_uuid, const char *url,
+ const char *timestamp_str, const char *concurrent_to_uuid, ip_address *ip,
+ const char *content_type, FILE *body, off_t payload_offset);
+
+#endif /* WARC_H */
diff --git a/src/wget.h b/src/wget.h
index c7c5e2c..ca4a702 100644
--- a/src/wget.h
+++ b/src/wget.h
@@ -353,7 +353,9 @@ typedef enum
PROXERR,
/* 50 */
AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR,
- UNLINKERR, NEWLOCATION_KEEP_POST
+ UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED,
+
+ WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR
} uerr_t;
/* 2005-02-19 SMS.