summaryrefslogtreecommitdiff
path: root/src/warc.c
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:27 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:27 +0900
commita2c7c975f0813d307b31d06da2c015916a6bb16d (patch)
tree985a31e8c860c690d9f20e6621ce5fcc05ccd244 /src/warc.c
parentdc6b8fd841f8acf37e6d3f7642e71cae175505bd (diff)
downloadwget-a2c7c975f0813d307b31d06da2c015916a6bb16d.tar.gz
wget-a2c7c975f0813d307b31d06da2c015916a6bb16d.tar.bz2
wget-a2c7c975f0813d307b31d06da2c015916a6bb16d.zip
Imported Upstream version 1.21upstream/1.21
Diffstat (limited to 'src/warc.c')
-rw-r--r--src/warc.c69
1 files changed, 44 insertions, 25 deletions
diff --git a/src/warc.c b/src/warc.c
index 74c8ad9..f602444 100644
--- a/src/warc.c
+++ b/src/warc.c
@@ -1,5 +1,5 @@
/* Utility functions for writing WARC files.
- Copyright (C) 2011-2012, 2015, 2018-2019 Free Software Foundation,
+ Copyright (C) 2011-2012, 2015, 2018-2020 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@@ -212,8 +212,19 @@ warc_write_start_record (void)
In warc_write_end_record we will fill this space
with information about the uncompressed and
compressed size of the record. */
- fseek (warc_current_file, EXTRA_GZIP_HEADER_SIZE, SEEK_CUR);
- fflush (warc_current_file);
+ if (fseek (warc_current_file, EXTRA_GZIP_HEADER_SIZE, SEEK_CUR) < 0)
+ {
+ logprintf (LOG_NOTQUIET, _("Error setting WARC file position.\n"));
+ warc_write_ok = false;
+ return false;
+ }
+
+ if (fflush (warc_current_file) != 0)
+ {
+ logprintf (LOG_NOTQUIET, _("Error flushing WARC file to disk.\n"));
+ warc_write_ok = false;
+ return false;
+ }
/* Start a new GZIP stream. */
dup_fd = dup (fileno (warc_current_file));
@@ -316,7 +327,11 @@ warc_write_block_from_file (FILE *data_in)
static bool
warc_write_end_record (void)
{
- warc_write_buffer ("\r\n\r\n", 4);
+ if (warc_write_buffer ("\r\n\r\n", 4) != 4)
+ {
+ warc_write_ok = false;
+ return false;
+ }
#ifdef HAVE_LIBZ
/* We start a new gzip stream for each record. */
@@ -357,8 +372,13 @@ warc_write_end_record (void)
compressed_size = warc_current_gzfile_uncompressed_size;
/* Go back to the static GZIP header. */
- fseeko (warc_current_file, warc_current_gzfile_offset
+ result = fseeko (warc_current_file, warc_current_gzfile_offset
+ EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
+ if (result != 0)
+ {
+ warc_write_ok = false;
+ return false;
+ }
/* Read the header. */
result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
@@ -631,7 +651,7 @@ warc_timestamp (char *timestamp, size_t timestamp_size)
The string will be 47 characters long. */
#if HAVE_LIBUUID
void
-warc_uuid_str (char *urn_str)
+warc_uuid_str (char *urn_str, size_t urn_size)
{
char uuid_str[37];
uuid_t record_id;
@@ -639,11 +659,11 @@ warc_uuid_str (char *urn_str)
uuid_generate (record_id);
uuid_unparse (record_id, uuid_str);
- sprintf (urn_str, "<urn:uuid:%s>", uuid_str);
+ snprintf (urn_str, urn_size, "<urn:uuid:%s>", uuid_str);
}
#elif HAVE_UUID_CREATE
void
-warc_uuid_str (char *urn_str)
+warc_uuid_str (char *urn_str, size_t urn_size)
{
char *uuid_str;
uuid_t record_id;
@@ -651,7 +671,7 @@ warc_uuid_str (char *urn_str)
uuid_create (&record_id, NULL);
uuid_to_string (&record_id, &uuid_str, NULL);
- sprintf (urn_str, "<urn:uuid:%s>", uuid_str);
+ snprintf (urn_str, urn_size, "<urn:uuid:%s>", uuid_str);
xfree (uuid_str);
}
#else
@@ -662,7 +682,7 @@ typedef RPC_STATUS (RPC_ENTRY * UuidToString_proc) (UUID *, unsigned char **);
typedef RPC_STATUS (RPC_ENTRY * RpcStringFree_proc) (unsigned char **);
static int
-windows_uuid_str (char *urn_str)
+windows_uuid_str (char *urn_str, size_t urn_size)
{
static UuidCreate_proc pfn_UuidCreate = NULL;
static UuidToString_proc pfn_UuidToString = NULL;
@@ -701,7 +721,7 @@ windows_uuid_str (char *urn_str)
{
if (pfn_UuidToString (&uuid, &uuid_str) == RPC_S_OK)
{
- sprintf (urn_str, "<urn:uuid:%s>", uuid_str);
+ snprintf (urn_str, urn_size, "<urn:uuid:%s>", uuid_str);
pfn_RpcStringFree (&uuid_str);
return 1;
}
@@ -719,7 +739,7 @@ windows_uuid_str (char *urn_str)
The string will be 47 characters long. */
void
-warc_uuid_str (char *urn_str)
+warc_uuid_str (char *urn_str, size_t urn_size)
{
/* RFC 4122, a version 4 UUID with only random numbers */
@@ -729,7 +749,7 @@ warc_uuid_str (char *urn_str)
#ifdef WINDOWS
/* If the native method fails (expected on older Windows versions),
use the fallback below. */
- if (windows_uuid_str (urn_str))
+ if (windows_uuid_str (urn_str, urn_size))
return;
#endif
@@ -744,7 +764,7 @@ warc_uuid_str (char *urn_str)
* clock_seq_hi_and_reserved to zero and one, respectively. */
uuid_data[8] = (uuid_data[8] & 0xBF) | 0x80;
- sprintf (urn_str,
+ snprintf (urn_str, urn_size,
"<urn:uuid:%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x>",
uuid_data[0], uuid_data[1], uuid_data[2], uuid_data[3], uuid_data[4],
uuid_data[5], uuid_data[6], uuid_data[7], uuid_data[8], uuid_data[9],
@@ -765,7 +785,7 @@ warc_write_warcinfo_record (const char *filename)
/* Write warc-info record as the first record of the file. */
/* We add the record id of this info record to the other records in the
file. */
- warc_uuid_str (warc_current_warcinfo_uuid_str);
+ warc_uuid_str (warc_current_warcinfo_uuid_str, sizeof (warc_current_warcinfo_uuid_str));
warc_timestamp (timestamp, sizeof(timestamp));
@@ -895,11 +915,10 @@ warc_start_new_file (bool meta)
static bool
warc_start_cdx_file (void)
{
- int filename_length = strlen (opt.warc_filename);
- char *cdx_filename = alloca (filename_length + 4 + 1);
- memcpy (cdx_filename, opt.warc_filename, filename_length);
- memcpy (cdx_filename + filename_length, ".cdx", 5);
+ char *cdx_filename = aprintf("%s.cdx", opt.warc_filename);
warc_current_cdx_file = fopen (cdx_filename, "a+");
+ free(cdx_filename);
+
if (warc_current_cdx_file == NULL)
return false;
@@ -1205,7 +1224,7 @@ warc_write_metadata (void)
if (opt.warc_maxsize > 0)
warc_start_new_file (true);
- warc_uuid_str (manifest_uuid);
+ warc_uuid_str (manifest_uuid, sizeof (manifest_uuid));
fflush (warc_manifest_fp);
warc_write_metadata_record (manifest_uuid,
@@ -1423,7 +1442,7 @@ warc_write_revisit_record (const char *url, const char *timestamp_str,
char block_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5];
char sha1_res_block[SHA1_DIGEST_SIZE];
- warc_uuid_str (revisit_uuid);
+ warc_uuid_str (revisit_uuid, sizeof (revisit_uuid));
sha1_stream (body, sha1_res_block);
warc_base32_sha1_digest (sha1_res_block, block_digest, sizeof(block_digest));
@@ -1518,7 +1537,7 @@ warc_write_response_record (const char *url, const char *timestamp_str,
/* Not a revisit, just store the record. */
- warc_uuid_str (response_uuid);
+ warc_uuid_str (response_uuid, sizeof (response_uuid));
fseeko (warc_current_file, 0L, SEEK_END);
offset = ftello (warc_current_file);
@@ -1569,11 +1588,11 @@ warc_write_record (const char *record_type, const char *resource_uuid,
const ip_address *ip, const char *content_type, FILE *body,
off_t payload_offset)
{
+ char uuid_buf[48];
+
if (resource_uuid == NULL)
{
- /* using uuid_buf allows const for resource_uuid in function declaration */
- char *uuid_buf = alloca (48);
- warc_uuid_str (uuid_buf);
+ warc_uuid_str (uuid_buf, sizeof (uuid_buf));
resource_uuid = uuid_buf;
}