summaryrefslogtreecommitdiff
path: root/src/warc.c
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
commit24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch)
tree371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/warc.c
parent0b86d50828d05a27de3ff840d6a06407310393c2 (diff)
downloadwget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/warc.c')
-rw-r--r--src/warc.c113
1 files changed, 52 insertions, 61 deletions
diff --git a/src/warc.c b/src/warc.c
index 55884e0..ea3ad11 100644
--- a/src/warc.c
+++ b/src/warc.c
@@ -1,5 +1,5 @@
/* Utility functions for writing WARC files.
- Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2011, 2012, 2015 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -27,10 +27,6 @@ Corresponding Source for a non-source form of such a combination
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
#include "wget.h"
#include "hash.h"
#include "utils.h"
@@ -102,7 +98,7 @@ static bool warc_write_ok;
static FILE *warc_current_cdx_file;
/* The record id of the warcinfo record of the current WARC file. */
-static char *warc_current_warcinfo_uuid_str;
+static char warc_current_warcinfo_uuid_str[48];
/* The file name of the current WARC file. */
static char *warc_current_filename;
@@ -404,7 +400,7 @@ warc_write_date_header (const char *timestamp)
the current WARC record. If IP is NULL, no header will
be written. */
static bool
-warc_write_ip_header (ip_address *ip)
+warc_write_ip_header (const ip_address *ip)
{
if (ip != NULL)
return warc_write_header ("WARC-IP-Address", print_address (ip));
@@ -435,9 +431,7 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
off_t pos;
off_t sum;
- char *buffer = malloc (BLOCKSIZE + 72);
- if (!buffer)
- return 1;
+ char *buffer = xmalloc (BLOCKSIZE + 72);
/* Initialize the computation context. */
sha1_init_ctx (&ctx_block);
@@ -542,14 +536,17 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
/* Converts the SHA1 digest to a base32-encoded string.
"sha1:DIGEST\0" (Allocates a new string for the response.) */
static char *
-warc_base32_sha1_digest (char *sha1_digest)
+warc_base32_sha1_digest (const char *sha1_digest, char *sha1_base32, size_t sha1_base32_size)
{
- /* length: "sha1:" + digest + "\0" */
- char *sha1_base32 = malloc (BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5 );
- base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5,
- BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1);
- memcpy (sha1_base32, "sha1:", 5);
- sha1_base32[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5] = '\0';
+ if (sha1_base32_size >= BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5 + 1)
+ {
+ memcpy (sha1_base32, "sha1:", 5);
+ base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5,
+ sha1_base32_size - 5);
+ }
+ else
+ *sha1_base32 = 0;
+
return sha1_base32;
}
@@ -571,18 +568,14 @@ warc_write_digest_headers (FILE *file, long payload_offset)
if (warc_sha1_stream_with_payload (file, sha1_res_block,
sha1_res_payload, payload_offset) == 0)
{
- char *digest;
+ char digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5];
- digest = warc_base32_sha1_digest (sha1_res_block);
- warc_write_header ("WARC-Block-Digest", digest);
- xfree (digest);
+ warc_write_header ("WARC-Block-Digest",
+ warc_base32_sha1_digest (sha1_res_block, digest, sizeof(digest)));
if (payload_offset >= 0)
- {
- digest = warc_base32_sha1_digest (sha1_res_payload);
- warc_write_header ("WARC-Payload-Digest", digest);
- xfree (digest);
- }
+ warc_write_header ("WARC-Payload-Digest",
+ warc_base32_sha1_digest (sha1_res_payload, digest, sizeof(digest)));
}
}
}
@@ -734,7 +727,7 @@ warc_uuid_str (char *urn_str)
/* Write a warcinfo record to the current file.
Updates warc_current_warcinfo_uuid_str. */
static bool
-warc_write_warcinfo_record (char *filename)
+warc_write_warcinfo_record (const char *filename)
{
FILE *warc_tmp;
char timestamp[22];
@@ -743,7 +736,6 @@ warc_write_warcinfo_record (char *filename)
/* Write warc-info record as the first record of the file. */
/* We add the record id of this info record to the other records in the
file. */
- warc_current_warcinfo_uuid_str = (char *) malloc (48);
warc_uuid_str (warc_current_warcinfo_uuid_str);
warc_timestamp (timestamp, sizeof(timestamp));
@@ -827,14 +819,15 @@ warc_start_new_file (bool meta)
if (warc_current_file != NULL)
fclose (warc_current_file);
- xfree (warc_current_warcinfo_uuid_str);
+ *warc_current_warcinfo_uuid_str = 0;
xfree (warc_current_filename);
warc_current_file_number++;
base_filename_length = strlen (opt.warc_filename);
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
- new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
+ new_filename = xmalloc (base_filename_length + 1 + 5 + 8 + 1);
+
warc_current_filename = new_filename;
/* If max size is enabled, we add a serial number to the file names. */
@@ -995,7 +988,7 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
{
/* This is a valid line with a valid checksum. */
struct warc_cdx_record *rec;
- rec = malloc (sizeof (struct warc_cdx_record));
+ rec = xmalloc (sizeof (struct warc_cdx_record));
rec->url = original_url;
rec->uuid = record_id;
memcpy (rec->digest, checksum_v, SHA1_DIGEST_SIZE);
@@ -1098,7 +1091,7 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
digest. Returns NULL if the url is not found or if the payload digest
does not match, or if CDX deduplication is disabled. */
static struct warc_cdx_record *
-warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
+warc_find_duplicate_cdx_record (const char *url, char *sha1_digest_payload)
{
struct warc_cdx_record *rec_existing;
@@ -1228,7 +1221,7 @@ warc_close (void)
if (warc_current_file != NULL)
{
warc_write_metadata ();
- xfree (warc_current_warcinfo_uuid_str);
+ *warc_current_warcinfo_uuid_str = 0;
fclose (warc_current_file);
}
if (warc_current_cdx_file != NULL)
@@ -1293,8 +1286,9 @@ warc_tempfile (void)
Calling this function will close body.
Returns true on success, false on error. */
bool
-warc_write_request_record (char *url, char *timestamp_str, char *record_uuid,
- ip_address *ip, FILE *body, off_t payload_offset)
+warc_write_request_record (const char *url, const char *timestamp_str,
+ const char *record_uuid, const ip_address *ip,
+ FILE *body, off_t payload_offset)
{
warc_write_start_record ();
warc_write_header ("WARC-Type", "request");
@@ -1382,18 +1376,18 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
Calling this function will close body.
Returns true on success, false on error. */
static bool
-warc_write_revisit_record (char *url, char *timestamp_str,
- char *concurrent_to_uuid, char *payload_digest,
- char *refers_to, ip_address *ip, FILE *body)
+warc_write_revisit_record (const char *url, const char *timestamp_str,
+ const char *concurrent_to_uuid, const char *payload_digest,
+ const char *refers_to, const ip_address *ip, FILE *body)
{
char revisit_uuid [48];
- char *block_digest = NULL;
+ char block_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5];
char sha1_res_block[SHA1_DIGEST_SIZE];
warc_uuid_str (revisit_uuid);
sha1_stream (body, sha1_res_block);
- block_digest = warc_base32_sha1_digest (sha1_res_block);
+ warc_base32_sha1_digest (sha1_res_block, block_digest, sizeof(block_digest));
warc_write_start_record ();
warc_write_header ("WARC-Type", "revisit");
@@ -1413,7 +1407,6 @@ warc_write_revisit_record (char *url, char *timestamp_str,
warc_write_end_record ();
fclose (body);
- xfree (block_digest);
return warc_write_ok;
}
@@ -1432,13 +1425,13 @@ warc_write_revisit_record (char *url, char *timestamp_str,
Calling this function will close body.
Returns true on success, false on error. */
bool
-warc_write_response_record (char *url, char *timestamp_str,
- char *concurrent_to_uuid, ip_address *ip,
- FILE *body, off_t payload_offset, char *mime_type,
- int response_code, char *redirect_location)
+warc_write_response_record (const char *url, const char *timestamp_str,
+ const char *concurrent_to_uuid, const ip_address *ip,
+ FILE *body, off_t payload_offset, const char *mime_type,
+ int response_code, const char *redirect_location)
{
- char *block_digest = NULL;
- char *payload_digest = NULL;
+ char block_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5];
+ char payload_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5];
char sha1_res_block[SHA1_DIGEST_SIZE];
char sha1_res_payload[SHA1_DIGEST_SIZE];
char response_uuid [48];
@@ -1471,17 +1464,16 @@ warc_write_response_record (char *url, char *timestamp_str,
}
/* Send the original payload digest. */
- payload_digest = warc_base32_sha1_digest (sha1_res_payload);
+ warc_base32_sha1_digest (sha1_res_payload, payload_digest, sizeof(payload_digest));
result = warc_write_revisit_record (url, timestamp_str,
concurrent_to_uuid, payload_digest, rec_existing->uuid,
ip, body);
- xfree (payload_digest);
return result;
}
- block_digest = warc_base32_sha1_digest (sha1_res_block);
- payload_digest = warc_base32_sha1_digest (sha1_res_payload);
+ warc_base32_sha1_digest (sha1_res_block, block_digest, sizeof(block_digest));
+ warc_base32_sha1_digest (sha1_res_payload, payload_digest, sizeof(payload_digest));
}
}
@@ -1516,9 +1508,6 @@ warc_write_response_record (char *url, char *timestamp_str,
response_uuid);
}
- xfree (block_digest);
- xfree (payload_digest);
-
return warc_write_ok;
}
@@ -1535,16 +1524,18 @@ warc_write_response_record (char *url, char *timestamp_str,
Calling this function will close body.
Returns true on success, false on error. */
static bool
-warc_write_record (const char *record_type, char *resource_uuid,
+warc_write_record (const char *record_type, const char *resource_uuid,
const char *url, const char *timestamp_str,
const char *concurrent_to_uuid,
- ip_address *ip, const char *content_type, FILE *body,
+ const ip_address *ip, const char *content_type, FILE *body,
off_t payload_offset)
{
if (resource_uuid == NULL)
{
- resource_uuid = alloca (48);
- warc_uuid_str (resource_uuid);
+ /* using uuid_buf allows const for resource_uuid in function declaration */
+ char *uuid_buf = alloca (48);
+ warc_uuid_str (uuid_buf);
+ resource_uuid = uuid_buf;
}
if (content_type == NULL)
@@ -1580,9 +1571,9 @@ warc_write_record (const char *record_type, char *resource_uuid,
Calling this function will close body.
Returns true on success, false on error. */
bool
-warc_write_resource_record (char *resource_uuid, const char *url,
+warc_write_resource_record (const char *resource_uuid, const char *url,
const char *timestamp_str, const char *concurrent_to_uuid,
- ip_address *ip, const char *content_type, FILE *body,
+ const ip_address *ip, const char *content_type, FILE *body,
off_t payload_offset)
{
return warc_write_record ("resource",
@@ -1602,7 +1593,7 @@ warc_write_resource_record (char *resource_uuid, const char *url,
Calling this function will close body.
Returns true on success, false on error. */
bool
-warc_write_metadata_record (char *record_uuid, const char *url,
+warc_write_metadata_record (const char *record_uuid, const char *url,
const char *timestamp_str, const char *concurrent_to_uuid,
ip_address *ip, const char *content_type, FILE *body,
off_t payload_offset)