summaryrefslogtreecommitdiff
path: root/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c')
-rw-r--r--Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c1266
1 files changed, 1266 insertions, 0 deletions
diff --git a/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c b/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c
new file mode 100644
index 000000000..d15080263
--- /dev/null
+++ b/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c
@@ -0,0 +1,1266 @@
+/*-
+ * Copyright (c) 2004 Tim Kientzle
+ * Copyright (c) 2011 Michihiro NAKAJIMA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "archive_platform.h"
+__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $");
+
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_ZLIB_H
+#include <cm_zlib.h>
+#endif
+
+#include "archive.h"
+#include "archive_entry.h"
+#include "archive_entry_locale.h"
+#include "archive_private.h"
+#include "archive_read_private.h"
+#include "archive_endian.h"
+
+#ifndef HAVE_ZLIB_H
+#include "archive_crc32.h"
+#endif
+
+struct zip_entry {
+ int64_t local_header_offset;
+ int64_t compressed_size;
+ int64_t uncompressed_size;
+ int64_t gid;
+ int64_t uid;
+ struct archive_entry *entry;
+ time_t mtime;
+ time_t atime;
+ time_t ctime;
+ uint32_t crc32;
+ uint16_t mode;
+ uint16_t flags;
+ char compression;
+ char system;
+};
+
+struct zip {
+ /* Structural information about the archive. */
+ int64_t central_directory_offset;
+ size_t central_directory_size;
+ size_t central_directory_entries;
+ char have_central_directory;
+
+ /* List of entries (seekable Zip only) */
+ size_t entries_remaining;
+ struct zip_entry *zip_entries;
+ struct zip_entry *entry;
+
+ size_t unconsumed;
+
+ /* entry_bytes_remaining is the number of bytes we expect. */
+ int64_t entry_bytes_remaining;
+
+ /* These count the number of bytes actually read for the entry. */
+ int64_t entry_compressed_bytes_read;
+ int64_t entry_uncompressed_bytes_read;
+
+ /* Running CRC32 of the decompressed data */
+ unsigned long entry_crc32;
+
+ /* Flags to mark progress of decompression. */
+ char decompress_init;
+ char end_of_entry;
+
+ ssize_t filename_length;
+ ssize_t extra_length;
+
+ unsigned char *uncompressed_buffer;
+ size_t uncompressed_buffer_size;
+#ifdef HAVE_ZLIB_H
+ z_stream stream;
+ char stream_valid;
+#endif
+
+ struct archive_string extra;
+ struct archive_string_conv *sconv;
+ struct archive_string_conv *sconv_default;
+ struct archive_string_conv *sconv_utf8;
+ int init_default_conversion;
+ char format_name[64];
+};
+
+#define ZIP_LENGTH_AT_END 8
+#define ZIP_ENCRYPTED (1<<0)
+#define ZIP_STRONG_ENCRYPTED (1<<6)
+#define ZIP_UTF8_NAME (1<<11)
+
+static int archive_read_format_zip_streamable_bid(struct archive_read *, int);
+static int archive_read_format_zip_seekable_bid(struct archive_read *, int);
+static int archive_read_format_zip_options(struct archive_read *,
+ const char *, const char *);
+static int archive_read_format_zip_cleanup(struct archive_read *);
+static int archive_read_format_zip_read_data(struct archive_read *,
+ const void **, size_t *, int64_t *);
+static int archive_read_format_zip_read_data_skip(struct archive_read *a);
+static int archive_read_format_zip_seekable_read_header(struct archive_read *,
+ struct archive_entry *);
+static int archive_read_format_zip_streamable_read_header(struct archive_read *,
+ struct archive_entry *);
+#ifdef HAVE_ZLIB_H
+static int zip_read_data_deflate(struct archive_read *a, const void **buff,
+ size_t *size, int64_t *offset);
+#endif
+static int zip_read_data_none(struct archive_read *a, const void **buff,
+ size_t *size, int64_t *offset);
+static int zip_read_local_file_header(struct archive_read *a,
+ struct archive_entry *entry, struct zip *);
+static time_t zip_time(const char *);
+static const char *compression_name(int compression);
+static void process_extra(const char *, size_t, struct zip_entry *);
+
+int
+archive_read_support_format_zip_streamable(struct archive *_a)
+{
+ struct archive_read *a = (struct archive_read *)_a;
+ struct zip *zip;
+ int r;
+
+ archive_check_magic(_a, ARCHIVE_READ_MAGIC,
+ ARCHIVE_STATE_NEW, "archive_read_support_format_zip");
+
+ zip = (struct zip *)malloc(sizeof(*zip));
+ if (zip == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate zip data");
+ return (ARCHIVE_FATAL);
+ }
+ memset(zip, 0, sizeof(*zip));
+
+ r = __archive_read_register_format(a,
+ zip,
+ "zip",
+ archive_read_format_zip_streamable_bid,
+ archive_read_format_zip_options,
+ archive_read_format_zip_streamable_read_header,
+ archive_read_format_zip_read_data,
+ archive_read_format_zip_read_data_skip,
+ archive_read_format_zip_cleanup);
+
+ if (r != ARCHIVE_OK)
+ free(zip);
+ return (ARCHIVE_OK);
+}
+
+int
+archive_read_support_format_zip_seekable(struct archive *_a)
+{
+ struct archive_read *a = (struct archive_read *)_a;
+ struct zip *zip;
+ int r;
+
+ archive_check_magic(_a, ARCHIVE_READ_MAGIC,
+ ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable");
+
+ zip = (struct zip *)malloc(sizeof(*zip));
+ if (zip == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate zip data");
+ return (ARCHIVE_FATAL);
+ }
+ memset(zip, 0, sizeof(*zip));
+
+ r = __archive_read_register_format(a,
+ zip,
+ "zip",
+ archive_read_format_zip_seekable_bid,
+ archive_read_format_zip_options,
+ archive_read_format_zip_seekable_read_header,
+ archive_read_format_zip_read_data,
+ archive_read_format_zip_read_data_skip,
+ archive_read_format_zip_cleanup);
+
+ if (r != ARCHIVE_OK)
+ free(zip);
+ return (ARCHIVE_OK);
+}
+
+int
+archive_read_support_format_zip(struct archive *a)
+{
+ int r;
+ r = archive_read_support_format_zip_streamable(a);
+ if (r != ARCHIVE_OK)
+ return r;
+ return (archive_read_support_format_zip_seekable(a));
+}
+
+/*
+ * TODO: This is a performance sink because it forces
+ * the read core to drop buffered data from the start
+ * of file, which will then have to be re-read again
+ * if this bidder loses.
+ *
+ * Consider passing in the winning bid value to subsequent
+ * bidders so that this bidder in particular can avoid
+ * seeking if it knows it's going to lose anyway.
+ */
+static int
+archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
+{
+ struct zip *zip = (struct zip *)a->format->data;
+ int64_t filesize;
+ const char *p;
+
+ /* If someone has already bid more than 32, then avoid
+ trashing the look-ahead buffers with a seek. */
+ if (best_bid > 32)
+ return (-1);
+
+ filesize = __archive_read_seek(a, -22, SEEK_END);
+ /* If we can't seek, then we can't bid. */
+ if (filesize <= 0)
+ return 0;
+
+ /* TODO: More robust search for end of central directory record. */
+ if ((p = __archive_read_ahead(a, 22, NULL)) == NULL)
+ return 0;
+ /* First four bytes are signature for end of central directory
+ record. Four zero bytes ensure this isn't a multi-volume
+ Zip file (which we don't yet support). */
+ if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0)
+ return 0;
+
+ /* Since we've already done the hard work of finding the
+ end of central directory record, let's save the important
+ information. */
+ zip->central_directory_entries = archive_le16dec(p + 10);
+ zip->central_directory_size = archive_le32dec(p + 12);
+ zip->central_directory_offset = archive_le32dec(p + 16);
+
+ /* Just one volume, so central dir must all be on this volume. */
+ if (zip->central_directory_entries != archive_le16dec(p + 8))
+ return 0;
+ /* Central directory can't extend beyond end of this file. */
+ if (zip->central_directory_offset + zip->central_directory_size > filesize)
+ return 0;
+
+ /* This is just a tiny bit higher than the maximum returned by
+ the streaming Zip bidder. This ensures that the more accurate
+ seeking Zip parser wins whenever seek is available. */
+ return 32;
+}
+
+static int
+slurp_central_directory(struct archive_read *a, struct zip *zip)
+{
+ unsigned i;
+
+ __archive_read_seek(a, zip->central_directory_offset, SEEK_SET);
+
+ zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry));
+ for (i = 0; i < zip->central_directory_entries; ++i) {
+ struct zip_entry *zip_entry = &zip->zip_entries[i];
+ size_t filename_length, extra_length, comment_length;
+ uint32_t external_attributes;
+ const char *p;
+
+ if ((p = __archive_read_ahead(a, 46, NULL)) == NULL)
+ return ARCHIVE_FATAL;
+ if (memcmp(p, "PK\001\002", 4) != 0) {
+ archive_set_error(&a->archive,
+ -1, "Invalid central directory signature");
+ return ARCHIVE_FATAL;
+ }
+ zip->have_central_directory = 1;
+ /* version = p[4]; */
+ zip_entry->system = p[5];
+ /* version_required = archive_le16dec(p + 6); */
+ zip_entry->flags = archive_le16dec(p + 8);
+ zip_entry->compression = archive_le16dec(p + 10);
+ zip_entry->mtime = zip_time(p + 12);
+ zip_entry->crc32 = archive_le32dec(p + 16);
+ zip_entry->compressed_size = archive_le32dec(p + 20);
+ zip_entry->uncompressed_size = archive_le32dec(p + 24);
+ filename_length = archive_le16dec(p + 28);
+ extra_length = archive_le16dec(p + 30);
+ comment_length = archive_le16dec(p + 32);
+ /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */
+ /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */
+ external_attributes = archive_le32dec(p + 38);
+ zip_entry->local_header_offset = archive_le32dec(p + 42);
+
+ if (zip_entry->system == 3) {
+ zip_entry->mode = external_attributes >> 16;
+ } else {
+ zip_entry->mode = AE_IFREG | 0777;
+ }
+
+ /* Do we need to parse filename here? */
+ /* Or can we wait until we read the local header? */
+ __archive_read_consume(a,
+ 46 + filename_length + extra_length + comment_length);
+ }
+
+ /* TODO: Sort zip entries. */
+
+ return ARCHIVE_OK;
+}
+
+static int
+archive_read_format_zip_seekable_read_header(struct archive_read *a,
+ struct archive_entry *entry)
+{
+ struct zip *zip = (struct zip *)a->format->data;
+ int r;
+
+ a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
+ if (a->archive.archive_format_name == NULL)
+ a->archive.archive_format_name = "ZIP";
+
+ if (zip->zip_entries == NULL) {
+ r = slurp_central_directory(a, zip);
+ zip->entries_remaining = zip->central_directory_entries;
+ if (r != ARCHIVE_OK)
+ return r;
+ zip->entry = zip->zip_entries;
+ } else {
+ ++zip->entry;
+ }
+
+ if (zip->entries_remaining <= 0)
+ return ARCHIVE_EOF;
+ --zip->entries_remaining;
+
+ /* TODO: If entries are sorted by offset within the file, we
+ should be able to skip here instead of seeking. Skipping is
+ typically faster (easier for I/O layer to optimize). */
+ __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET);
+ zip->unconsumed = 0;
+ r = zip_read_local_file_header(a, entry, zip);
+ if (r != ARCHIVE_OK)
+ return r;
+ if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) {
+ const void *p;
+ size_t linkname_length = archive_entry_size(entry);
+
+ archive_entry_set_size(entry, 0);
+ p = __archive_read_ahead(a, linkname_length, NULL);
+ if (p == NULL) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "Truncated Zip file");
+ return ARCHIVE_FATAL;
+ }
+
+ if (archive_entry_copy_symlink_l(entry, p, linkname_length,
+ NULL) != 0) {
+ /* NOTE: If the last argument is NULL, this will
+ * fail only by memeory allocation failure. */
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate memory for Symlink");
+ return (ARCHIVE_FATAL);
+ }
+ /* TODO: handle character-set issues? */
+ }
+ return ARCHIVE_OK;
+}
+
+static int
+archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid)
+{
+ const char *p;
+
+ (void)best_bid; /* UNUSED */
+
+ if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
+ return (-1);
+
+ /*
+ * Bid of 30 here is: 16 bits for "PK",
+ * next 16-bit field has four options (-2 bits).
+ * 16 + 16-2 = 30.
+ */
+ if (p[0] == 'P' && p[1] == 'K') {
+ if ((p[2] == '\001' && p[3] == '\002')
+ || (p[2] == '\003' && p[3] == '\004')
+ || (p[2] == '\005' && p[3] == '\006')
+ || (p[2] == '\007' && p[3] == '\010')
+ || (p[2] == '0' && p[3] == '0'))
+ return (30);
+ }
+
+ return (0);
+}
+
+static int
+archive_read_format_zip_options(struct archive_read *a,
+ const char *key, const char *val)
+{
+ struct zip *zip;
+ int ret = ARCHIVE_FAILED;
+
+ zip = (struct zip *)(a->format->data);
+ if (strcmp(key, "compat-2x") == 0) {
+ /* Handle filnames as libarchive 2.x */
+ zip->init_default_conversion = (val != NULL) ? 1 : 0;
+ ret = ARCHIVE_OK;
+ } else if (strcmp(key, "hdrcharset") == 0) {
+ if (val == NULL || val[0] == 0)
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "zip: hdrcharset option needs a character-set name");
+ else {
+ zip->sconv = archive_string_conversion_from_charset(
+ &a->archive, val, 0);
+ if (zip->sconv != NULL) {
+ if (strcmp(val, "UTF-8") == 0)
+ zip->sconv_utf8 = zip->sconv;
+ ret = ARCHIVE_OK;
+ } else
+ ret = ARCHIVE_FATAL;
+ }
+ } else
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "zip: unknown keyword ``%s''", key);
+
+ return (ret);
+}
+
+static int
+archive_read_format_zip_streamable_read_header(struct archive_read *a,
+ struct archive_entry *entry)
+{
+ struct zip *zip;
+
+ a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
+ if (a->archive.archive_format_name == NULL)
+ a->archive.archive_format_name = "ZIP";
+
+ zip = (struct zip *)(a->format->data);
+
+ /* Make sure we have a zip_entry structure to use. */
+ if (zip->zip_entries == NULL) {
+ zip->zip_entries = malloc(sizeof(struct zip_entry));
+ if (zip->zip_entries == NULL) {
+ archive_set_error(&a->archive, ENOMEM, "Out of memory");
+ return ARCHIVE_FATAL;
+ }
+ }
+ zip->entry = zip->zip_entries;
+ memset(zip->entry, 0, sizeof(struct zip_entry));
+
+ /* Search ahead for the next local file header. */
+ __archive_read_consume(a, zip->unconsumed);
+ zip->unconsumed = 0;
+ for (;;) {
+ int64_t skipped = 0;
+ const char *p, *end;
+ ssize_t bytes;
+
+ p = __archive_read_ahead(a, 4, &bytes);
+ if (p == NULL)
+ return (ARCHIVE_FATAL);
+ end = p + bytes;
+
+ while (p + 4 <= end) {
+ if (p[0] == 'P' && p[1] == 'K') {
+ if (p[2] == '\001' && p[3] == '\002')
+ /* Beginning of central directory. */
+ return (ARCHIVE_EOF);
+
+ if (p[2] == '\003' && p[3] == '\004') {
+ /* Regular file entry. */
+ __archive_read_consume(a, skipped);
+ return zip_read_local_file_header(a, entry, zip);
+ }
+
+ if (p[2] == '\005' && p[3] == '\006')
+ /* End of central directory. */
+ return (ARCHIVE_EOF);
+ }
+ ++p;
+ ++skipped;
+ }
+ __archive_read_consume(a, skipped);
+ }
+}
+
+/*
+ * Assumes file pointer is at beginning of local file header.
+ */
+static int
+zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
+ struct zip *zip)
+{
+ const char *p;
+ const void *h;
+ const wchar_t *wp;
+ const char *cp;
+ size_t len, filename_length, extra_length;
+ struct archive_string_conv *sconv;
+ struct zip_entry *zip_entry = zip->entry;
+ uint32_t local_crc32;
+ int64_t compressed_size, uncompressed_size;
+ int ret = ARCHIVE_OK;
+ char version;
+
+ zip->decompress_init = 0;
+ zip->end_of_entry = 0;
+ zip->entry_uncompressed_bytes_read = 0;
+ zip->entry_compressed_bytes_read = 0;
+ zip->entry_crc32 = crc32(0, NULL, 0);
+
+ /* Setup default conversion. */
+ if (zip->sconv == NULL && !zip->init_default_conversion) {
+ zip->sconv_default =
+ archive_string_default_conversion_for_read(&(a->archive));
+ zip->init_default_conversion = 1;
+ }
+
+ if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file header");
+ return (ARCHIVE_FATAL);
+ }
+
+ if (memcmp(p, "PK\003\004", 4) != 0) {
+ archive_set_error(&a->archive, -1, "Damaged Zip archive");
+ return ARCHIVE_FATAL;
+ }
+ version = p[4];
+ zip_entry->system = p[5];
+ zip_entry->flags = archive_le16dec(p + 6);
+ zip_entry->compression = archive_le16dec(p + 8);
+ zip_entry->mtime = zip_time(p + 10);
+ local_crc32 = archive_le32dec(p + 14);
+ compressed_size = archive_le32dec(p + 18);
+ uncompressed_size = archive_le32dec(p + 22);
+ filename_length = archive_le16dec(p + 26);
+ extra_length = archive_le16dec(p + 28);
+
+ __archive_read_consume(a, 30);
+
+ if (zip->have_central_directory) {
+ /* If we read the central dir entry, we must have size information
+ as well, so ignore the length-at-end flag. */
+ zip_entry->flags &= ~ZIP_LENGTH_AT_END;
+ /* If we have values from both the local file header
+ and the central directory, warn about mismatches
+ which might indicate a damaged file. But some
+ writers always put zero in the local header; don't
+ bother warning about that. */
+ if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Inconsistent CRC32 values");
+ ret = ARCHIVE_WARN;
+ }
+ if (compressed_size != 0
+ && compressed_size != zip_entry->compressed_size) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Inconsistent compressed size");
+ ret = ARCHIVE_WARN;
+ }
+ if (uncompressed_size != 0
+ && uncompressed_size != zip_entry->uncompressed_size) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Inconsistent uncompressed size");
+ ret = ARCHIVE_WARN;
+ }
+ } else {
+ /* If we don't have the CD info, use whatever we do have. */
+ zip_entry->crc32 = local_crc32;
+ zip_entry->compressed_size = compressed_size;
+ zip_entry->uncompressed_size = uncompressed_size;
+ }
+
+ /* Read the filename. */
+ if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file header");
+ return (ARCHIVE_FATAL);
+ }
+ if (zip_entry->flags & ZIP_UTF8_NAME) {
+ /* The filename is stored to be UTF-8. */
+ if (zip->sconv_utf8 == NULL) {
+ zip->sconv_utf8 =
+ archive_string_conversion_from_charset(
+ &a->archive, "UTF-8", 1);
+ if (zip->sconv_utf8 == NULL)
+ return (ARCHIVE_FATAL);
+ }
+ sconv = zip->sconv_utf8;
+ } else if (zip->sconv != NULL)
+ sconv = zip->sconv;
+ else
+ sconv = zip->sconv_default;
+
+ if (archive_entry_copy_pathname_l(entry,
+ h, filename_length, sconv) != 0) {
+ if (errno == ENOMEM) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate memory for Pathname");
+ return (ARCHIVE_FATAL);
+ }
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Pathname cannot be converted "
+ "from %s to current locale.",
+ archive_string_conversion_charset_name(sconv));
+ ret = ARCHIVE_WARN;
+ }
+ __archive_read_consume(a, filename_length);
+
+ if (zip_entry->mode == 0) {
+ /* Especially in streaming mode, we can end up
+ here without having seen any mode information.
+ Guess from the filename. */
+ wp = archive_entry_pathname_w(entry);
+ if (wp != NULL) {
+ len = wcslen(wp);
+ if (len > 0 && wp[len - 1] == L'/')
+ zip_entry->mode = AE_IFDIR | 0777;
+ else
+ zip_entry->mode = AE_IFREG | 0777;
+ } else {
+ cp = archive_entry_pathname(entry);
+ len = (cp != NULL)?strlen(cp):0;
+ if (len > 0 && cp[len - 1] == '/')
+ zip_entry->mode = AE_IFDIR | 0777;
+ else
+ zip_entry->mode = AE_IFREG | 0777;
+ }
+ }
+
+ /* Read the extra data. */
+ if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file header");
+ return (ARCHIVE_FATAL);
+ }
+ process_extra(h, extra_length, zip_entry);
+ __archive_read_consume(a, extra_length);
+
+ /* Populate some additional entry fields: */
+ archive_entry_set_mode(entry, zip_entry->mode);
+ archive_entry_set_uid(entry, zip_entry->uid);
+ archive_entry_set_gid(entry, zip_entry->gid);
+ archive_entry_set_mtime(entry, zip_entry->mtime, 0);
+ archive_entry_set_ctime(entry, zip_entry->ctime, 0);
+ archive_entry_set_atime(entry, zip_entry->atime, 0);
+ /* Set the size only if it's meaningful. */
+ if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END))
+ archive_entry_set_size(entry, zip_entry->uncompressed_size);
+
+ zip->entry_bytes_remaining = zip_entry->compressed_size;
+
+ /* If there's no body, force read_data() to return EOF immediately. */
+ if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)
+ && zip->entry_bytes_remaining < 1)
+ zip->end_of_entry = 1;
+
+ /* Set up a more descriptive format name. */
+ sprintf(zip->format_name, "ZIP %d.%d (%s)",
+ version / 10, version % 10,
+ compression_name(zip->entry->compression));
+ a->archive.archive_format_name = zip->format_name;
+
+ return (ret);
+}
+
+static const char *
+compression_name(int compression)
+{
+ static const char *compression_names[] = {
+ "uncompressed",
+ "shrinking",
+ "reduced-1",
+ "reduced-2",
+ "reduced-3",
+ "reduced-4",
+ "imploded",
+ "reserved",
+ "deflation"
+ };
+
+ if (compression <
+ sizeof(compression_names)/sizeof(compression_names[0]))
+ return compression_names[compression];
+ else
+ return "??";
+}
+
+/* Convert an MSDOS-style date/time into Unix-style time. */
+static time_t
+zip_time(const char *p)
+{
+ int msTime, msDate;
+ struct tm ts;
+
+ msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]);
+ msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]);
+
+ memset(&ts, 0, sizeof(ts));
+ ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
+ ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
+ ts.tm_mday = msDate & 0x1f; /* Day of month. */
+ ts.tm_hour = (msTime >> 11) & 0x1f;
+ ts.tm_min = (msTime >> 5) & 0x3f;
+ ts.tm_sec = (msTime << 1) & 0x3e;
+ ts.tm_isdst = -1;
+ return mktime(&ts);
+}
+
+static int
+archive_read_format_zip_read_data(struct archive_read *a,
+ const void **buff, size_t *size, int64_t *offset)
+{
+ int r;
+ struct zip *zip = (struct zip *)(a->format->data);
+
+ *offset = zip->entry_uncompressed_bytes_read;
+ *size = 0;
+ *buff = NULL;
+
+ /* If we hit end-of-entry last time, return ARCHIVE_EOF. */
+ if (zip->end_of_entry)
+ return (ARCHIVE_EOF);
+
+ /* Return EOF immediately if this is a non-regular file. */
+ if (AE_IFREG != (zip->entry->mode & AE_IFMT))
+ return (ARCHIVE_EOF);
+
+ if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Encrypted file is unsupported");
+ return (ARCHIVE_FAILED);
+ }
+
+ __archive_read_consume(a, zip->unconsumed);
+ zip->unconsumed = 0;
+
+ switch(zip->entry->compression) {
+ case 0: /* No compression. */
+ r = zip_read_data_none(a, buff, size, offset);
+ break;
+#ifdef HAVE_ZLIB_H
+ case 8: /* Deflate compression. */
+ r = zip_read_data_deflate(a, buff, size, offset);
+ break;
+#endif
+ default: /* Unsupported compression. */
+ /* Return a warning. */
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Unsupported ZIP compression method (%s)",
+ compression_name(zip->entry->compression));
+ /* We can't decompress this entry, but we will
+ * be able to skip() it and try the next entry. */
+ return (ARCHIVE_FAILED);
+ break;
+ }
+ if (r != ARCHIVE_OK)
+ return (r);
+ /* Update checksum */
+ if (*size)
+ zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size);
+ /* If we hit the end, swallow any end-of-data marker. */
+ if (zip->end_of_entry) {
+ /* Check file size, CRC against these values. */
+ if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "ZIP compressed data is wrong size (read %jd, expected %jd)",
+ (intmax_t)zip->entry_compressed_bytes_read,
+ (intmax_t)zip->entry->compressed_size);
+ return (ARCHIVE_WARN);
+ }
+ /* Size field only stores the lower 32 bits of the actual
+ * size. */
+ if ((zip->entry->uncompressed_size & UINT32_MAX)
+ != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "ZIP uncompressed data is wrong size (read %jd, expected %jd)",
+ (intmax_t)zip->entry_uncompressed_bytes_read,
+ (intmax_t)zip->entry->uncompressed_size);
+ return (ARCHIVE_WARN);
+ }
+ /* Check computed CRC against header */
+ if (zip->entry->crc32 != zip->entry_crc32) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "ZIP bad CRC: 0x%lx should be 0x%lx",
+ (unsigned long)zip->entry_crc32,
+ (unsigned long)zip->entry->crc32);
+ return (ARCHIVE_WARN);
+ }
+ }
+
+ return (ARCHIVE_OK);
+}
+
+/*
+ * Read "uncompressed" data. There are three cases:
+ * 1) We know the size of the data. This is always true for the
+ * seeking reader (we've examined the Central Directory already).
+ * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred.
+ * Info-ZIP seems to do this; we know the size but have to grab
+ * the CRC from the data descriptor afterwards.
+ * 3) We're streaming and ZIP_LENGTH_AT_END was specified and
+ * we have no size information. In this case, we can do pretty
+ * well by watching for the data descriptor record. The data
+ * descriptor is 16 bytes and includes a computed CRC that should
+ * provide a strong check.
+ *
+ * TODO: Technically, the PK\007\010 signature is optional.
+ * In the original spec, the data descriptor contained CRC
+ * and size fields but had no leading signature. In practice,
+ * newer writers seem to provide the signature pretty consistently,
+ * but we might need to do something more complex here if
+ * we want to handle older archives that lack that signature.
+ *
+ * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
+ * zip->end_of_entry if it consumes all of the data.
+ */
+static int
+zip_read_data_none(struct archive_read *a, const void **_buff,
+ size_t *size, int64_t *offset)
+{
+ struct zip *zip;
+ const char *buff;
+ ssize_t bytes_avail;
+
+ zip = (struct zip *)(a->format->data);
+
+ if (zip->entry->flags & ZIP_LENGTH_AT_END) {
+ const char *p;
+
+ /* Grab at least 16 bytes. */
+ buff = __archive_read_ahead(a, 16, &bytes_avail);
+ if (bytes_avail < 16) {
+ /* Zip archives have end-of-archive markers
+ that are longer than this, so a failure to get at
+ least 16 bytes really does indicate a truncated
+ file. */
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file data");
+ return (ARCHIVE_FATAL);
+ }
+ /* Check for a complete PK\007\010 signature. */
+ p = buff;
+ if (p[0] == 'P' && p[1] == 'K'
+ && p[2] == '\007' && p[3] == '\010'
+ && archive_le32dec(p + 4) == zip->entry_crc32
+ && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read
+ && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) {
+ zip->entry->crc32 = archive_le32dec(p + 4);
+ zip->entry->compressed_size = archive_le32dec(p + 8);
+ zip->entry->uncompressed_size = archive_le32dec(p + 12);
+ zip->end_of_entry = 1;
+ zip->unconsumed = 16;
+ return (ARCHIVE_OK);
+ }
+ /* If not at EOF, ensure we consume at least one byte. */
+ ++p;
+
+ /* Scan forward until we see where a PK\007\010 signature might be. */
+ /* Return bytes up until that point. On the next call, the code
+ above will verify the data descriptor. */
+ while (p < buff + bytes_avail - 4) {
+ if (p[3] == 'P') { p += 3; }
+ else if (p[3] == 'K') { p += 2; }
+ else if (p[3] == '\007') { p += 1; }
+ else if (p[3] == '\010' && p[2] == '\007'
+ && p[1] == 'K' && p[0] == 'P') {
+ break;
+ } else { p += 4; }
+ }
+ bytes_avail = p - buff;
+ } else {
+ if (zip->entry_bytes_remaining == 0) {
+ zip->end_of_entry = 1;
+ return (ARCHIVE_OK);
+ }
+ /* Grab a bunch of bytes. */
+ buff = __archive_read_ahead(a, 1, &bytes_avail);
+ if (bytes_avail <= 0) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file data");
+ return (ARCHIVE_FATAL);
+ }
+ if (bytes_avail > zip->entry_bytes_remaining)
+ bytes_avail = zip->entry_bytes_remaining;
+ }
+ *size = bytes_avail;
+ zip->entry_bytes_remaining -= bytes_avail;
+ zip->entry_uncompressed_bytes_read += bytes_avail;
+ zip->entry_compressed_bytes_read += bytes_avail;
+ zip->unconsumed += bytes_avail;
+ *_buff = buff;
+ return (ARCHIVE_OK);
+}
+
+#ifdef HAVE_ZLIB_H
+static int
+zip_read_data_deflate(struct archive_read *a, const void **buff,
+ size_t *size, int64_t *offset)
+{
+ struct zip *zip;
+ ssize_t bytes_avail;
+ const void *compressed_buff;
+ int r;
+
+ zip = (struct zip *)(a->format->data);
+
+ /* If the buffer hasn't been allocated, allocate it now. */
+ if (zip->uncompressed_buffer == NULL) {
+ zip->uncompressed_buffer_size = 256 * 1024;
+ zip->uncompressed_buffer
+ = (unsigned char *)malloc(zip->uncompressed_buffer_size);
+ if (zip->uncompressed_buffer == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "No memory for ZIP decompression");
+ return (ARCHIVE_FATAL);
+ }
+ }
+
+ /* If we haven't yet read any data, initialize the decompressor. */
+ if (!zip->decompress_init) {
+ if (zip->stream_valid)
+ r = inflateReset(&zip->stream);
+ else
+ r = inflateInit2(&zip->stream,
+ -15 /* Don't check for zlib header */);
+ if (r != Z_OK) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "Can't initialize ZIP decompression.");
+ return (ARCHIVE_FATAL);
+ }
+ /* Stream structure has been set up. */
+ zip->stream_valid = 1;
+ /* We've initialized decompression for this stream. */
+ zip->decompress_init = 1;
+ }
+
+ /*
+ * Note: '1' here is a performance optimization.
+ * Recall that the decompression layer returns a count of
+ * available bytes; asking for more than that forces the
+ * decompressor to combine reads by copying data.
+ */
+ compressed_buff = __archive_read_ahead(a, 1, &bytes_avail);
+ if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)
+ && bytes_avail > zip->entry_bytes_remaining) {
+ bytes_avail = zip->entry_bytes_remaining;
+ }
+ if (bytes_avail <= 0) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file body");
+ return (ARCHIVE_FATAL);
+ }
+
+ /*
+ * A bug in zlib.h: stream.next_in should be marked 'const'
+ * but isn't (the library never alters data through the
+ * next_in pointer, only reads it). The result: this ugly
+ * cast to remove 'const'.
+ */
+ zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff;
+ zip->stream.avail_in = bytes_avail;
+ zip->stream.total_in = 0;
+ zip->stream.next_out = zip->uncompressed_buffer;
+ zip->stream.avail_out = zip->uncompressed_buffer_size;
+ zip->stream.total_out = 0;
+
+ r = inflate(&zip->stream, 0);
+ switch (r) {
+ case Z_OK:
+ break;
+ case Z_STREAM_END:
+ zip->end_of_entry = 1;
+ break;
+ case Z_MEM_ERROR:
+ archive_set_error(&a->archive, ENOMEM,
+ "Out of memory for ZIP decompression");
+ return (ARCHIVE_FATAL);
+ default:
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "ZIP decompression failed (%d)", r);
+ return (ARCHIVE_FATAL);
+ }
+
+ /* Consume as much as the compressor actually used. */
+ bytes_avail = zip->stream.total_in;
+ __archive_read_consume(a, bytes_avail);
+ zip->entry_bytes_remaining -= bytes_avail;
+ zip->entry_compressed_bytes_read += bytes_avail;
+
+ *size = zip->stream.total_out;
+ zip->entry_uncompressed_bytes_read += zip->stream.total_out;
+ *buff = zip->uncompressed_buffer;
+
+ if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) {
+ const char *p;
+
+ if (NULL == (p = __archive_read_ahead(a, 16, NULL))) {
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP end-of-file record");
+ return (ARCHIVE_FATAL);
+ }
+ /* Consume the optional PK\007\010 marker. */
+ if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') {
+ zip->entry->crc32 = archive_le32dec(p + 4);
+ zip->entry->compressed_size = archive_le32dec(p + 8);
+ zip->entry->uncompressed_size = archive_le32dec(p + 12);
+ zip->unconsumed = 16;
+ }
+ }
+
+ return (ARCHIVE_OK);
+}
+#endif
+
+static int
+archive_read_format_zip_read_data_skip(struct archive_read *a)
+{
+ struct zip *zip;
+
+ zip = (struct zip *)(a->format->data);
+
+ /* If we've already read to end of data, we're done. */
+ if (zip->end_of_entry)
+ return (ARCHIVE_OK);
+ /* If we're seeking, we're done. */
+ if (zip->have_central_directory)
+ return (ARCHIVE_OK);
+
+ /* So we know we're streaming... */
+ if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) {
+ /* We know the compressed length, so we can just skip. */
+ int64_t bytes_skipped = __archive_read_consume(a,
+ zip->entry_bytes_remaining + zip->unconsumed);
+ if (bytes_skipped < 0)
+ return (ARCHIVE_FATAL);
+ zip->unconsumed = 0;
+ return (ARCHIVE_OK);
+ }
+
+ /* We're streaming and we don't know the length. */
+ /* If the body is compressed and we know the format, we can
+ * find an exact end-of-entry by decompressing it. */
+ switch (zip->entry->compression) {
+#ifdef HAVE_ZLIB_H
+ case 8: /* Deflate compression. */
+ while (!zip->end_of_entry) {
+ int64_t offset = 0;
+ const void *buff = NULL;
+ size_t size = 0;
+ int r;
+ r = zip_read_data_deflate(a, &buff, &size, &offset);
+ if (r != ARCHIVE_OK)
+ return (r);
+ }
+ break;
+#endif
+ default: /* Uncompressed or unknown. */
+ /* Scan for a PK\007\010 signature. */
+ __archive_read_consume(a, zip->unconsumed);
+ zip->unconsumed = 0;
+ for (;;) {
+ const char *p, *buff;
+ ssize_t bytes_avail;
+ buff = __archive_read_ahead(a, 16, &bytes_avail);
+ if (bytes_avail < 16) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Truncated ZIP file data");
+ return (ARCHIVE_FATAL);
+ }
+ p = buff;
+ while (p < buff + bytes_avail - 16) {
+ if (p[3] == 'P') { p += 3; }
+ else if (p[3] == 'K') { p += 2; }
+ else if (p[3] == '\007') { p += 1; }
+ else if (p[3] == '\010' && p[2] == '\007'
+ && p[1] == 'K' && p[0] == 'P') {
+ __archive_read_consume(a, p - buff + 16);
+ return ARCHIVE_OK;
+ } else { p += 4; }
+ }
+ __archive_read_consume(a, p - buff);
+ }
+ }
+ return ARCHIVE_OK;
+}
+
+static int
+archive_read_format_zip_cleanup(struct archive_read *a)
+{
+ struct zip *zip;
+
+ zip = (struct zip *)(a->format->data);
+#ifdef HAVE_ZLIB_H
+ if (zip->stream_valid)
+ inflateEnd(&zip->stream);
+#endif
+ free(zip->zip_entries);
+ free(zip->uncompressed_buffer);
+ archive_string_free(&(zip->extra));
+ free(zip);
+ (a->format->data) = NULL;
+ return (ARCHIVE_OK);
+}
+
+/*
+ * The extra data is stored as a list of
+ * id1+size1+data1 + id2+size2+data2 ...
+ * triplets. id and size are 2 bytes each.
+ */
+static void
+process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry)
+{
+ unsigned offset = 0;
+
+ while (offset < extra_length - 4)
+ {
+ unsigned short headerid = archive_le16dec(p + offset);
+ unsigned short datasize = archive_le16dec(p + offset + 2);
+ offset += 4;
+ if (offset + datasize > extra_length)
+ break;
+#ifdef DEBUG
+ fprintf(stderr, "Header id 0x%x, length %d\n",
+ headerid, datasize);
+#endif
+ switch (headerid) {
+ case 0x0001:
+ /* Zip64 extended information extra field. */
+ if (datasize >= 8)
+ zip_entry->uncompressed_size =
+ archive_le64dec(p + offset);
+ if (datasize >= 16)
+ zip_entry->compressed_size =
+ archive_le64dec(p + offset + 8);
+ break;
+ case 0x5455:
+ {
+ /* Extended time field "UT". */
+ int flags = p[offset];
+ offset++;
+ datasize--;
+ /* Flag bits indicate which dates are present. */
+ if (flags & 0x01)
+ {
+#ifdef DEBUG
+ fprintf(stderr, "mtime: %lld -> %d\n",
+ (long long)zip_entry->mtime,
+ archive_le32dec(p + offset));
+#endif
+ if (datasize < 4)
+ break;
+ zip_entry->mtime = archive_le32dec(p + offset);
+ offset += 4;
+ datasize -= 4;
+ }
+ if (flags & 0x02)
+ {
+ if (datasize < 4)
+ break;
+ zip_entry->atime = archive_le32dec(p + offset);
+ offset += 4;
+ datasize -= 4;
+ }
+ if (flags & 0x04)
+ {
+ if (datasize < 4)
+ break;
+ zip_entry->ctime = archive_le32dec(p + offset);
+ offset += 4;
+ datasize -= 4;
+ }
+ break;
+ }
+ case 0x5855:
+ {
+ /* Info-ZIP Unix Extra Field (old version) "UX". */
+ if (datasize >= 8) {
+ zip_entry->atime = archive_le32dec(p + offset);
+ zip_entry->mtime = archive_le32dec(p + offset + 4);
+ }
+ if (datasize >= 12) {
+ zip_entry->uid = archive_le16dec(p + offset + 8);
+ zip_entry->gid = archive_le16dec(p + offset + 10);
+ }
+ break;
+ }
+ case 0x7855:
+ /* Info-ZIP Unix Extra Field (type 2) "Ux". */
+#ifdef DEBUG
+ fprintf(stderr, "uid %d gid %d\n",
+ archive_le16dec(p + offset),
+ archive_le16dec(p + offset + 2));
+#endif
+ if (datasize >= 2)
+ zip_entry->uid = archive_le16dec(p + offset);
+ if (datasize >= 4)
+ zip_entry->gid = archive_le16dec(p + offset + 2);
+ break;
+ case 0x7875:
+ {
+ /* Info-Zip Unix Extra Field (type 3) "ux". */
+ int uidsize = 0, gidsize = 0;
+
+ if (datasize >= 1 && p[offset] == 1) {/* version=1 */
+ if (datasize >= 4) {
+ /* get a uid size. */
+ uidsize = p[offset+1];
+ if (uidsize == 2)
+ zip_entry->uid = archive_le16dec(
+ p + offset + 2);
+ else if (uidsize == 4 && datasize >= 6)
+ zip_entry->uid = archive_le32dec(
+ p + offset + 2);
+ }
+ if (datasize >= (2 + uidsize + 3)) {
+ /* get a gid size. */
+ gidsize = p[offset+2+uidsize];
+ if (gidsize == 2)
+ zip_entry->gid = archive_le16dec(
+ p+offset+2+uidsize+1);
+ else if (gidsize == 4 &&
+ datasize >= (2 + uidsize + 5))
+ zip_entry->gid = archive_le32dec(
+ p+offset+2+uidsize+1);
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ offset += datasize;
+ }
+#ifdef DEBUG
+ if (offset != extra_length)
+ {
+ fprintf(stderr,
+ "Extra data field contents do not match reported size!\n");
+ }
+#endif
+}