diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
commit | 24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch) | |
tree | 371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/warc.c | |
parent | 0b86d50828d05a27de3ff840d6a06407310393c2 (diff) | |
download | wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2 wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip |
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/warc.c')
-rw-r--r-- | src/warc.c | 113 |
1 files changed, 52 insertions, 61 deletions
@@ -1,5 +1,5 @@ /* Utility functions for writing WARC files. - Copyright (C) 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2011, 2012, 2015 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -27,10 +27,6 @@ Corresponding Source for a non-source form of such a combination shall include the source code for the parts of OpenSSL used as well as that of the covered work. */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - #include "wget.h" #include "hash.h" #include "utils.h" @@ -102,7 +98,7 @@ static bool warc_write_ok; static FILE *warc_current_cdx_file; /* The record id of the warcinfo record of the current WARC file. */ -static char *warc_current_warcinfo_uuid_str; +static char warc_current_warcinfo_uuid_str[48]; /* The file name of the current WARC file. */ static char *warc_current_filename; @@ -404,7 +400,7 @@ warc_write_date_header (const char *timestamp) the current WARC record. If IP is NULL, no header will be written. */ static bool -warc_write_ip_header (ip_address *ip) +warc_write_ip_header (const ip_address *ip) { if (ip != NULL) return warc_write_header ("WARC-IP-Address", print_address (ip)); @@ -435,9 +431,7 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload, off_t pos; off_t sum; - char *buffer = malloc (BLOCKSIZE + 72); - if (!buffer) - return 1; + char *buffer = xmalloc (BLOCKSIZE + 72); /* Initialize the computation context. */ sha1_init_ctx (&ctx_block); @@ -542,14 +536,17 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload, /* Converts the SHA1 digest to a base32-encoded string. "sha1:DIGEST\0" (Allocates a new string for the response.) */ static char * -warc_base32_sha1_digest (char *sha1_digest) +warc_base32_sha1_digest (const char *sha1_digest, char *sha1_base32, size_t sha1_base32_size) { - /* length: "sha1:" + digest + "\0" */ - char *sha1_base32 = malloc (BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5 ); - base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5, - BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1); - memcpy (sha1_base32, "sha1:", 5); - sha1_base32[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5] = '\0'; + if (sha1_base32_size >= BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5 + 1) + { + memcpy (sha1_base32, "sha1:", 5); + base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5, + sha1_base32_size - 5); + } + else + *sha1_base32 = 0; + return sha1_base32; } @@ -571,18 +568,14 @@ warc_write_digest_headers (FILE *file, long payload_offset) if (warc_sha1_stream_with_payload (file, sha1_res_block, sha1_res_payload, payload_offset) == 0) { - char *digest; + char digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5]; - digest = warc_base32_sha1_digest (sha1_res_block); - warc_write_header ("WARC-Block-Digest", digest); - xfree (digest); + warc_write_header ("WARC-Block-Digest", + warc_base32_sha1_digest (sha1_res_block, digest, sizeof(digest))); if (payload_offset >= 0) - { - digest = warc_base32_sha1_digest (sha1_res_payload); - warc_write_header ("WARC-Payload-Digest", digest); - xfree (digest); - } + warc_write_header ("WARC-Payload-Digest", + warc_base32_sha1_digest (sha1_res_payload, digest, sizeof(digest))); } } } @@ -734,7 +727,7 @@ warc_uuid_str (char *urn_str) /* Write a warcinfo record to the current file. Updates warc_current_warcinfo_uuid_str. */ static bool -warc_write_warcinfo_record (char *filename) +warc_write_warcinfo_record (const char *filename) { FILE *warc_tmp; char timestamp[22]; @@ -743,7 +736,6 @@ warc_write_warcinfo_record (char *filename) /* Write warc-info record as the first record of the file. */ /* We add the record id of this info record to the other records in the file. */ - warc_current_warcinfo_uuid_str = (char *) malloc (48); warc_uuid_str (warc_current_warcinfo_uuid_str); warc_timestamp (timestamp, sizeof(timestamp)); @@ -827,14 +819,15 @@ warc_start_new_file (bool meta) if (warc_current_file != NULL) fclose (warc_current_file); - xfree (warc_current_warcinfo_uuid_str); + *warc_current_warcinfo_uuid_str = 0; xfree (warc_current_filename); warc_current_file_number++; base_filename_length = strlen (opt.warc_filename); /* filename format: base + "-" + 5 digit serial number + ".warc.gz" */ - new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1); + new_filename = xmalloc (base_filename_length + 1 + 5 + 8 + 1); + warc_current_filename = new_filename; /* If max size is enabled, we add a serial number to the file names. */ @@ -995,7 +988,7 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url, { /* This is a valid line with a valid checksum. */ struct warc_cdx_record *rec; - rec = malloc (sizeof (struct warc_cdx_record)); + rec = xmalloc (sizeof (struct warc_cdx_record)); rec->url = original_url; rec->uuid = record_id; memcpy (rec->digest, checksum_v, SHA1_DIGEST_SIZE); @@ -1098,7 +1091,7 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n")); digest. Returns NULL if the url is not found or if the payload digest does not match, or if CDX deduplication is disabled. */ static struct warc_cdx_record * -warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload) +warc_find_duplicate_cdx_record (const char *url, char *sha1_digest_payload) { struct warc_cdx_record *rec_existing; @@ -1228,7 +1221,7 @@ warc_close (void) if (warc_current_file != NULL) { warc_write_metadata (); - xfree (warc_current_warcinfo_uuid_str); + *warc_current_warcinfo_uuid_str = 0; fclose (warc_current_file); } if (warc_current_cdx_file != NULL) @@ -1293,8 +1286,9 @@ warc_tempfile (void) Calling this function will close body. Returns true on success, false on error. */ bool -warc_write_request_record (char *url, char *timestamp_str, char *record_uuid, - ip_address *ip, FILE *body, off_t payload_offset) +warc_write_request_record (const char *url, const char *timestamp_str, + const char *record_uuid, const ip_address *ip, + FILE *body, off_t payload_offset) { warc_write_start_record (); warc_write_header ("WARC-Type", "request"); @@ -1382,18 +1376,18 @@ warc_write_cdx_record (const char *url, const char *timestamp_str, Calling this function will close body. Returns true on success, false on error. */ static bool -warc_write_revisit_record (char *url, char *timestamp_str, - char *concurrent_to_uuid, char *payload_digest, - char *refers_to, ip_address *ip, FILE *body) +warc_write_revisit_record (const char *url, const char *timestamp_str, + const char *concurrent_to_uuid, const char *payload_digest, + const char *refers_to, const ip_address *ip, FILE *body) { char revisit_uuid [48]; - char *block_digest = NULL; + char block_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5]; char sha1_res_block[SHA1_DIGEST_SIZE]; warc_uuid_str (revisit_uuid); sha1_stream (body, sha1_res_block); - block_digest = warc_base32_sha1_digest (sha1_res_block); + warc_base32_sha1_digest (sha1_res_block, block_digest, sizeof(block_digest)); warc_write_start_record (); warc_write_header ("WARC-Type", "revisit"); @@ -1413,7 +1407,6 @@ warc_write_revisit_record (char *url, char *timestamp_str, warc_write_end_record (); fclose (body); - xfree (block_digest); return warc_write_ok; } @@ -1432,13 +1425,13 @@ warc_write_revisit_record (char *url, char *timestamp_str, Calling this function will close body. Returns true on success, false on error. */ bool -warc_write_response_record (char *url, char *timestamp_str, - char *concurrent_to_uuid, ip_address *ip, - FILE *body, off_t payload_offset, char *mime_type, - int response_code, char *redirect_location) +warc_write_response_record (const char *url, const char *timestamp_str, + const char *concurrent_to_uuid, const ip_address *ip, + FILE *body, off_t payload_offset, const char *mime_type, + int response_code, const char *redirect_location) { - char *block_digest = NULL; - char *payload_digest = NULL; + char block_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5]; + char payload_digest[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5]; char sha1_res_block[SHA1_DIGEST_SIZE]; char sha1_res_payload[SHA1_DIGEST_SIZE]; char response_uuid [48]; @@ -1471,17 +1464,16 @@ warc_write_response_record (char *url, char *timestamp_str, } /* Send the original payload digest. */ - payload_digest = warc_base32_sha1_digest (sha1_res_payload); + warc_base32_sha1_digest (sha1_res_payload, payload_digest, sizeof(payload_digest)); result = warc_write_revisit_record (url, timestamp_str, concurrent_to_uuid, payload_digest, rec_existing->uuid, ip, body); - xfree (payload_digest); return result; } - block_digest = warc_base32_sha1_digest (sha1_res_block); - payload_digest = warc_base32_sha1_digest (sha1_res_payload); + warc_base32_sha1_digest (sha1_res_block, block_digest, sizeof(block_digest)); + warc_base32_sha1_digest (sha1_res_payload, payload_digest, sizeof(payload_digest)); } } @@ -1516,9 +1508,6 @@ warc_write_response_record (char *url, char *timestamp_str, response_uuid); } - xfree (block_digest); - xfree (payload_digest); - return warc_write_ok; } @@ -1535,16 +1524,18 @@ warc_write_response_record (char *url, char *timestamp_str, Calling this function will close body. Returns true on success, false on error. */ static bool -warc_write_record (const char *record_type, char *resource_uuid, +warc_write_record (const char *record_type, const char *resource_uuid, const char *url, const char *timestamp_str, const char *concurrent_to_uuid, - ip_address *ip, const char *content_type, FILE *body, + const ip_address *ip, const char *content_type, FILE *body, off_t payload_offset) { if (resource_uuid == NULL) { - resource_uuid = alloca (48); - warc_uuid_str (resource_uuid); + /* using uuid_buf allows const for resource_uuid in function declaration */ + char *uuid_buf = alloca (48); + warc_uuid_str (uuid_buf); + resource_uuid = uuid_buf; } if (content_type == NULL) @@ -1580,9 +1571,9 @@ warc_write_record (const char *record_type, char *resource_uuid, Calling this function will close body. Returns true on success, false on error. */ bool -warc_write_resource_record (char *resource_uuid, const char *url, +warc_write_resource_record (const char *resource_uuid, const char *url, const char *timestamp_str, const char *concurrent_to_uuid, - ip_address *ip, const char *content_type, FILE *body, + const ip_address *ip, const char *content_type, FILE *body, off_t payload_offset) { return warc_write_record ("resource", @@ -1602,7 +1593,7 @@ warc_write_resource_record (char *resource_uuid, const char *url, Calling this function will close body. Returns true on success, false on error. */ bool -warc_write_metadata_record (char *record_uuid, const char *url, +warc_write_metadata_record (const char *record_uuid, const char *url, const char *timestamp_str, const char *concurrent_to_uuid, ip_address *ip, const char *content_type, FILE *body, off_t payload_offset) |