diff options
Diffstat (limited to 'Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c')
-rw-r--r-- | Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c | 734 |
1 files changed, 605 insertions, 129 deletions
diff --git a/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c b/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c index d15080263..2fdc08b6a 100644 --- a/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c +++ b/Utilities/cmlibarchive/libarchive/archive_read_support_format_zip.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 2004 Tim Kientzle - * Copyright (c) 2011 Michihiro NAKAJIMA + * Copyright (c) 2011-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,23 +38,26 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #endif #include "archive.h" +#include "archive_endian.h" #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_private.h" +#include "archive_rb.h" #include "archive_read_private.h" -#include "archive_endian.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" #endif struct zip_entry { + struct archive_rb_node node; int64_t local_header_offset; int64_t compressed_size; int64_t uncompressed_size; int64_t gid; int64_t uid; struct archive_entry *entry; + struct archive_string rsrcname; time_t mtime; time_t atime; time_t ctime; @@ -67,15 +70,19 @@ struct zip_entry { struct zip { /* Structural information about the archive. */ + int64_t end_of_central_directory_offset; int64_t central_directory_offset; size_t central_directory_size; size_t central_directory_entries; char have_central_directory; + int64_t offset; /* List of entries (seekable Zip only) */ size_t entries_remaining; struct zip_entry *zip_entries; struct zip_entry *entry; + struct archive_rb_tree tree; + struct archive_rb_tree tree_rsrc; size_t unconsumed; @@ -116,29 +123,36 @@ struct zip { #define ZIP_STRONG_ENCRYPTED (1<<6) #define ZIP_UTF8_NAME (1<<11) -static int archive_read_format_zip_streamable_bid(struct archive_read *, int); -static int archive_read_format_zip_seekable_bid(struct archive_read *, int); +static int archive_read_format_zip_streamable_bid(struct archive_read *, + int); +static int archive_read_format_zip_seekable_bid(struct archive_read *, + int); static int archive_read_format_zip_options(struct archive_read *, const char *, const char *); static int archive_read_format_zip_cleanup(struct archive_read *); static int archive_read_format_zip_read_data(struct archive_read *, const void **, size_t *, int64_t *); static int archive_read_format_zip_read_data_skip(struct archive_read *a); -static int archive_read_format_zip_seekable_read_header(struct archive_read *, - struct archive_entry *); -static int archive_read_format_zip_streamable_read_header(struct archive_read *, - struct archive_entry *); +static int archive_read_format_zip_seekable_read_header( + struct archive_read *, struct archive_entry *); +static int archive_read_format_zip_streamable_read_header( + struct archive_read *, struct archive_entry *); +static ssize_t zip_get_local_file_header_size(struct archive_read *, size_t); #ifdef HAVE_ZLIB_H +static int zip_deflate_init(struct archive_read *, struct zip *); static int zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); #endif static int zip_read_data_none(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); static int zip_read_local_file_header(struct archive_read *a, - struct archive_entry *entry, struct zip *); + struct archive_entry *entry, struct zip *); static time_t zip_time(const char *); static const char *compression_name(int compression); -static void process_extra(const char *, size_t, struct zip_entry *); +static void process_extra(const char *, size_t, struct zip_entry *); + +int archive_read_support_format_zip_streamable(struct archive *); +int archive_read_support_format_zip_seekable(struct archive *); int archive_read_support_format_zip_streamable(struct archive *_a) @@ -166,6 +180,7 @@ archive_read_support_format_zip_streamable(struct archive *_a) archive_read_format_zip_streamable_read_header, archive_read_format_zip_read_data, archive_read_format_zip_read_data_skip, + NULL, archive_read_format_zip_cleanup); if (r != ARCHIVE_OK) @@ -199,6 +214,7 @@ archive_read_support_format_zip_seekable(struct archive *_a) archive_read_format_zip_seekable_read_header, archive_read_format_zip_read_data, archive_read_format_zip_read_data_skip, + NULL, archive_read_format_zip_cleanup); if (r != ARCHIVE_OK) @@ -217,14 +233,13 @@ archive_read_support_format_zip(struct archive *a) } /* - * TODO: This is a performance sink because it forces - * the read core to drop buffered data from the start - * of file, which will then have to be re-read again - * if this bidder loses. + * TODO: This is a performance sink because it forces the read core to + * drop buffered data from the start of file, which will then have to + * be re-read again if this bidder loses. * - * Consider passing in the winning bid value to subsequent - * bidders so that this bidder in particular can avoid - * seeking if it knows it's going to lose anyway. + * We workaround this a little by passing in the best bid so far so + * that later bidders can do nothing if they know they'll never + * outbid. But we can certainly do better... */ static int archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) @@ -249,8 +264,48 @@ archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) /* First four bytes are signature for end of central directory record. Four zero bytes ensure this isn't a multi-volume Zip file (which we don't yet support). */ - if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) - return 0; + if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) { + int64_t i, tail; + int found; + + /* + * If there is a comment in end of central directory + * record, 22 bytes are too short. we have to read more + * to properly detect the record. Hopefully, a length + * of the comment is not longer than 16362 bytes(16K-22). + */ + if (filesize + 22 > 1024 * 16) { + tail = 1024 * 16; + filesize = __archive_read_seek(a, tail * -1, SEEK_END); + } else { + tail = filesize + 22; + filesize = __archive_read_seek(a, 0, SEEK_SET); + } + if (filesize < 0) + return 0; + if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL) + return 0; + for (found = 0, i = 0;!found && i < tail - 22;) { + switch (p[i]) { + case 'P': + if (memcmp(p+i, + "PK\005\006\000\000\000\000", 8) == 0) { + p += i; + filesize += tail - + (22 + archive_le16dec(p+20)); + found = 1; + } else + i += 8; + break; + case 'K': i += 7; break; + case 005: i += 6; break; + case 006: i += 5; break; + default: i += 1; break; + } + } + if (!found) + return 0; + } /* Since we've already done the hard work of finding the end of central directory record, let's save the important @@ -258,12 +313,14 @@ archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) zip->central_directory_entries = archive_le16dec(p + 10); zip->central_directory_size = archive_le32dec(p + 12); zip->central_directory_offset = archive_le32dec(p + 16); + zip->end_of_central_directory_offset = filesize; /* Just one volume, so central dir must all be on this volume. */ if (zip->central_directory_entries != archive_le16dec(p + 8)) return 0; /* Central directory can't extend beyond end of this file. */ - if (zip->central_directory_offset + zip->central_directory_size > filesize) + if (zip->central_directory_offset + + (int64_t)zip->central_directory_size > filesize) return 0; /* This is just a tiny bit higher than the maximum returned by @@ -273,18 +330,117 @@ archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) } static int +cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) +{ + const struct zip_entry *e1 = (const struct zip_entry *)n1; + const struct zip_entry *e2 = (const struct zip_entry *)n2; + + return ((int)(e2->local_header_offset - e1->local_header_offset)); +} + +static int +cmp_key(const struct archive_rb_node *n, const void *key) +{ + /* This function won't be called */ + (void)n; /* UNUSED */ + (void)key; /* UNUSED */ + return 1; +} + +static int +rsrc_cmp_node(const struct archive_rb_node *n1, + const struct archive_rb_node *n2) +{ + const struct zip_entry *e1 = (const struct zip_entry *)n1; + const struct zip_entry *e2 = (const struct zip_entry *)n2; + + return (strcmp(e2->rsrcname.s, e1->rsrcname.s)); +} + +static int +rsrc_cmp_key(const struct archive_rb_node *n, const void *key) +{ + const struct zip_entry *e = (const struct zip_entry *)n; + return (strcmp((const char *)key, e->rsrcname.s)); +} + +static const char * +rsrc_basename(const char *name, size_t name_length) +{ + const char *s, *r; + + r = s = name; + for (;;) { + s = memchr(s, '/', name_length - (s - name)); + if (s == NULL) + break; + r = ++s; + } + return (r); +} + +static void +expose_parent_dirs(struct zip *zip, const char *name, size_t name_length) +{ + struct archive_string str; + struct zip_entry *dir; + char *s; + + archive_string_init(&str); + archive_strncpy(&str, name, name_length); + for (;;) { + s = strrchr(str.s, '/'); + if (s == NULL) + break; + *s = '\0'; + /* Transfer the parent directory from zip->tree_rsrc RB + * tree to zip->tree RB tree to expose. */ + dir = (struct zip_entry *) + __archive_rb_tree_find_node(&zip->tree_rsrc, str.s); + if (dir == NULL) + break; + __archive_rb_tree_remove_node(&zip->tree_rsrc, &dir->node); + archive_string_free(&dir->rsrcname); + __archive_rb_tree_insert_node(&zip->tree, &dir->node); + } + archive_string_free(&str); +} + +static int slurp_central_directory(struct archive_read *a, struct zip *zip) { unsigned i; + int64_t correction; + static const struct archive_rb_tree_ops rb_ops = { + &cmp_node, &cmp_key + }; + static const struct archive_rb_tree_ops rb_rsrc_ops = { + &rsrc_cmp_node, &rsrc_cmp_key + }; + + /* + * Consider the archive file we are reading may be SFX. + * So we have to calculate a SFX header size to revise + * ZIP header offsets. + */ + correction = zip->end_of_central_directory_offset - + (zip->central_directory_offset + zip->central_directory_size); + /* The central directory offset is relative value, and so + * we revise this offset for SFX. */ + zip->central_directory_offset += correction; __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); + zip->offset = zip->central_directory_offset; + __archive_rb_tree_init(&zip->tree, &rb_ops); + __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops); - zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry)); + zip->zip_entries = calloc(zip->central_directory_entries, + sizeof(struct zip_entry)); for (i = 0; i < zip->central_directory_entries; ++i) { struct zip_entry *zip_entry = &zip->zip_entries[i]; size_t filename_length, extra_length, comment_length; uint32_t external_attributes; - const char *p; + const char *name, *p, *r; if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) return ARCHIVE_FATAL; @@ -298,7 +454,7 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) zip_entry->system = p[5]; /* version_required = archive_le16dec(p + 6); */ zip_entry->flags = archive_le16dec(p + 8); - zip_entry->compression = archive_le16dec(p + 10); + zip_entry->compression = (char)archive_le16dec(p + 10); zip_entry->mtime = zip_time(p + 12); zip_entry->crc32 = archive_le32dec(p + 16); zip_entry->compressed_size = archive_le32dec(p + 20); @@ -309,31 +465,233 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ external_attributes = archive_le32dec(p + 38); - zip_entry->local_header_offset = archive_le32dec(p + 42); + zip_entry->local_header_offset = + archive_le32dec(p + 42) + correction; + /* If we can't guess the mode, leave it zero here; + when we read the local file header we might get + more information. */ + zip_entry->mode = 0; if (zip_entry->system == 3) { zip_entry->mode = external_attributes >> 16; + } + + /* + * Mac resource fork files are stored under the + * "__MACOSX/" directory, so we should check if + * it is. + */ + /* Make sure we have the file name. */ + if ((p = __archive_read_ahead(a, 46 + filename_length, NULL)) + == NULL) + return ARCHIVE_FATAL; + name = p + 46; + r = rsrc_basename(name, filename_length); + if (filename_length >= 9 && + strncmp("__MACOSX/", name, 9) == 0) { + /* If this file is not a resource fork nor + * a directory. We should treat it as a non + * resource fork file to expose it. */ + if (name[filename_length-1] != '/' && + (r - name < 3 || r[0] != '.' || r[1] != '_')) { + __archive_rb_tree_insert_node(&zip->tree, + &zip_entry->node); + /* Expose its parent directories. */ + expose_parent_dirs(zip, name, filename_length); + } else { + /* This file is a resource fork file or + * a directory. */ + archive_strncpy(&(zip_entry->rsrcname), name, + filename_length); + __archive_rb_tree_insert_node(&zip->tree_rsrc, + &zip_entry->node); + } } else { - zip_entry->mode = AE_IFREG | 0777; + /* Generate resource fork name to find its resource + * file at zip->tree_rsrc. */ + archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); + archive_strncat(&(zip_entry->rsrcname), name, r - name); + archive_strcat(&(zip_entry->rsrcname), "._"); + archive_strncat(&(zip_entry->rsrcname), + name + (r - name), filename_length - (r - name)); + /* Register an entry to RB tree to sort it by + * file offset. */ + __archive_rb_tree_insert_node(&zip->tree, + &zip_entry->node); } - /* Do we need to parse filename here? */ - /* Or can we wait until we read the local header? */ + /* We don't read the filename until we get to the + local file header. Reading it here would speed up + table-of-contents operations (removing the need to + find and read local file header to get the + filename) at the cost of requiring a lot of extra + space. */ + /* We don't read the extra block here. We assume it + will be duplicated at the local file header. */ __archive_read_consume(a, 46 + filename_length + extra_length + comment_length); } - /* TODO: Sort zip entries. */ - return ARCHIVE_OK; } +static int64_t +zip_read_consume(struct archive_read *a, int64_t bytes) +{ + struct zip *zip = (struct zip *)a->format->data; + int64_t skip; + + skip = __archive_read_consume(a, bytes); + if (skip > 0) + zip->offset += skip; + return (skip); +} + +static int +zip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry, + struct zip_entry *rsrc) +{ + struct zip *zip = (struct zip *)a->format->data; + unsigned char *metadata, *mp; + int64_t offset = zip->offset; + size_t remaining_bytes, metadata_bytes; + ssize_t hsize; + int ret = ARCHIVE_OK, eof; + + switch(rsrc->compression) { + case 0: /* No compression. */ +#ifdef HAVE_ZLIB_H + case 8: /* Deflate compression. */ +#endif + break; + default: /* Unsupported compression. */ + /* Return a warning. */ + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Unsupported ZIP compression method (%s)", + compression_name(rsrc->compression)); + /* We can't decompress this entry, but we will + * be able to skip() it and try the next entry. */ + return (ARCHIVE_WARN); + } + + if (rsrc->uncompressed_size > (128 * 1024)) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Mac metadata is too large: %jd > 128K bytes", + (intmax_t)rsrc->uncompressed_size); + return (ARCHIVE_WARN); + } + + metadata = malloc((size_t)rsrc->uncompressed_size); + if (metadata == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Mac metadata"); + return (ARCHIVE_FATAL); + } + + if (zip->offset < rsrc->local_header_offset) + zip_read_consume(a, rsrc->local_header_offset - zip->offset); + else if (zip->offset != rsrc->local_header_offset) { + __archive_read_seek(a, rsrc->local_header_offset, SEEK_SET); + zip->offset = zip->entry->local_header_offset; + } + + hsize = zip_get_local_file_header_size(a, 0); + zip_read_consume(a, hsize); + + remaining_bytes = (size_t)rsrc->compressed_size; + metadata_bytes = (size_t)rsrc->uncompressed_size; + mp = metadata; + eof = 0; + while (!eof && remaining_bytes) { + const unsigned char *p; + ssize_t bytes_avail; + size_t bytes_used; + + p = __archive_read_ahead(a, 1, &bytes_avail); + if (p == NULL) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file header"); + ret = ARCHIVE_WARN; + goto exit_mac_metadata; + } + if ((size_t)bytes_avail > remaining_bytes) + bytes_avail = remaining_bytes; + switch(rsrc->compression) { + case 0: /* No compression. */ + memcpy(mp, p, bytes_avail); + bytes_used = (size_t)bytes_avail; + metadata_bytes -= bytes_used; + mp += bytes_used; + if (metadata_bytes == 0) + eof = 1; + break; +#ifdef HAVE_ZLIB_H + case 8: /* Deflate compression. */ + { + int r; + + ret = zip_deflate_init(a, zip); + if (ret != ARCHIVE_OK) + goto exit_mac_metadata; + zip->stream.next_in = + (Bytef *)(uintptr_t)(const void *)p; + zip->stream.avail_in = (uInt)bytes_avail; + zip->stream.total_in = 0; + zip->stream.next_out = mp; + zip->stream.avail_out = (uInt)metadata_bytes; + zip->stream.total_out = 0; + + r = inflate(&zip->stream, 0); + switch (r) { + case Z_OK: + break; + case Z_STREAM_END: + eof = 1; + break; + case Z_MEM_ERROR: + archive_set_error(&a->archive, ENOMEM, + "Out of memory for ZIP decompression"); + ret = ARCHIVE_FATAL; + goto exit_mac_metadata; + default: + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "ZIP decompression failed (%d)", r); + ret = ARCHIVE_FATAL; + goto exit_mac_metadata; + } + bytes_used = zip->stream.total_in; + metadata_bytes -= zip->stream.total_out; + mp += zip->stream.total_out; + break; + } +#endif + default: + bytes_used = 0; + break; + } + zip_read_consume(a, bytes_used); + remaining_bytes -= bytes_used; + } + archive_entry_copy_mac_metadata(entry, metadata, + (size_t)rsrc->uncompressed_size - metadata_bytes); + + __archive_read_seek(a, offset, SEEK_SET); + zip->offset = offset; +exit_mac_metadata: + zip->decompress_init = 0; + free(metadata); + return (ret); +} + static int archive_read_format_zip_seekable_read_header(struct archive_read *a, struct archive_entry *entry) { struct zip *zip = (struct zip *)a->format->data; - int r; + struct zip_entry *rsrc; + int r, ret = ARCHIVE_OK; a->archive.archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive.archive_format_name == NULL) @@ -344,26 +702,45 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a, zip->entries_remaining = zip->central_directory_entries; if (r != ARCHIVE_OK) return r; - zip->entry = zip->zip_entries; - } else { - ++zip->entry; + /* Get first entry whose local header offset is lower than + * other entries in the archive file. */ + zip->entry = + (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree); + } else if (zip->entry != NULL) { + /* Get next entry in local header offset order. */ + zip->entry = (struct zip_entry *)__archive_rb_tree_iterate( + &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT); } - if (zip->entries_remaining <= 0) + if (zip->entries_remaining <= 0 || zip->entry == NULL) return ARCHIVE_EOF; --zip->entries_remaining; - /* TODO: If entries are sorted by offset within the file, we - should be able to skip here instead of seeking. Skipping is - typically faster (easier for I/O layer to optimize). */ - __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET); + if (zip->entry->rsrcname.s) + rsrc = (struct zip_entry *)__archive_rb_tree_find_node( + &zip->tree_rsrc, zip->entry->rsrcname.s); + else + rsrc = NULL; + + /* File entries are sorted by the header offset, we should mostly + * use zip_read_consume to advance a read point to avoid redundant + * data reading. */ + if (zip->offset < zip->entry->local_header_offset) + zip_read_consume(a, + zip->entry->local_header_offset - zip->offset); + else if (zip->offset != zip->entry->local_header_offset) { + __archive_read_seek(a, zip->entry->local_header_offset, + SEEK_SET); + zip->offset = zip->entry->local_header_offset; + } zip->unconsumed = 0; r = zip_read_local_file_header(a, entry, zip); if (r != ARCHIVE_OK) return r; if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { const void *p; - size_t linkname_length = archive_entry_size(entry); + struct archive_string_conv *sconv; + size_t linkname_length = (size_t)archive_entry_size(entry); archive_entry_set_size(entry, 0); p = __archive_read_ahead(a, linkname_length, NULL); @@ -373,17 +750,45 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a, return ARCHIVE_FATAL; } + sconv = zip->sconv; + if (sconv == NULL && (zip->entry->flags & ZIP_UTF8_NAME)) + sconv = zip->sconv_utf8; + if (sconv == NULL) + sconv = zip->sconv_default; if (archive_entry_copy_symlink_l(entry, p, linkname_length, - NULL) != 0) { - /* NOTE: If the last argument is NULL, this will - * fail only by memeory allocation failure. */ - archive_set_error(&a->archive, ENOMEM, - "Can't allocate memory for Symlink"); - return (ARCHIVE_FATAL); + sconv) != 0) { + if (errno != ENOMEM && sconv == zip->sconv_utf8 && + (zip->entry->flags & ZIP_UTF8_NAME)) + archive_entry_copy_symlink_l(entry, p, + linkname_length, NULL); + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Symlink"); + return (ARCHIVE_FATAL); + } + /* + * Since there is no character-set regulation for + * symlink name, do not report the conversion error + * in an automatic conversion. + */ + if (sconv != zip->sconv_utf8 || + (zip->entry->flags & ZIP_UTF8_NAME) == 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Symlink cannot be converted " + "from %s to current locale.", + archive_string_conversion_charset_name( + sconv)); + ret = ARCHIVE_WARN; + } } - /* TODO: handle character-set issues? */ } - return ARCHIVE_OK; + if (rsrc) { + int ret2 = zip_read_mac_metadata(a, entry, rsrc); + if (ret2 < ret) + ret = ret2; + } + return (ret); } static int @@ -410,6 +815,11 @@ archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) return (30); } + /* TODO: It's worth looking ahead a little bit for a valid + * PK signature. In particular, that would make it possible + * to read some UUEncoded SFX files or SFX files coming from + * a network socket. */ + return (0); } @@ -424,11 +834,12 @@ archive_read_format_zip_options(struct archive_read *a, if (strcmp(key, "compat-2x") == 0) { /* Handle filnames as libarchive 2.x */ zip->init_default_conversion = (val != NULL) ? 1 : 0; - ret = ARCHIVE_OK; + return (ARCHIVE_OK); } else if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "zip: hdrcharset option needs a character-set name"); + "zip: hdrcharset option needs a character-set name" + ); else { zip->sconv = archive_string_conversion_from_charset( &a->archive, val, 0); @@ -439,11 +850,13 @@ archive_read_format_zip_options(struct archive_read *a, } else ret = ARCHIVE_FATAL; } - } else - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "zip: unknown keyword ``%s''", key); + return (ret); + } - return (ret); + /* Note: The "warn" return is just to inform the options + * supervisor that we didn't handle it. It will generate + * a suitable error if no one used this option. */ + return (ARCHIVE_WARN); } static int @@ -462,7 +875,8 @@ archive_read_format_zip_streamable_read_header(struct archive_read *a, if (zip->zip_entries == NULL) { zip->zip_entries = malloc(sizeof(struct zip_entry)); if (zip->zip_entries == NULL) { - archive_set_error(&a->archive, ENOMEM, "Out of memory"); + archive_set_error(&a->archive, ENOMEM, + "Out of memory"); return ARCHIVE_FATAL; } } @@ -470,7 +884,7 @@ archive_read_format_zip_streamable_read_header(struct archive_read *a, memset(zip->entry, 0, sizeof(struct zip_entry)); /* Search ahead for the next local file header. */ - __archive_read_consume(a, zip->unconsumed); + zip_read_consume(a, zip->unconsumed); zip->unconsumed = 0; for (;;) { int64_t skipped = 0; @@ -490,8 +904,9 @@ archive_read_format_zip_streamable_read_header(struct archive_read *a, if (p[2] == '\003' && p[3] == '\004') { /* Regular file entry. */ - __archive_read_consume(a, skipped); - return zip_read_local_file_header(a, entry, zip); + zip_read_consume(a, skipped); + return zip_read_local_file_header(a, + entry, zip); } if (p[2] == '\005' && p[3] == '\006') @@ -501,8 +916,31 @@ archive_read_format_zip_streamable_read_header(struct archive_read *a, ++p; ++skipped; } - __archive_read_consume(a, skipped); + zip_read_consume(a, skipped); + } +} + +static ssize_t +zip_get_local_file_header_size(struct archive_read *a, size_t extra) +{ + const char *p; + ssize_t filename_length, extra_length; + + if ((p = __archive_read_ahead(a, extra + 30, NULL)) == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file header"); + return (ARCHIVE_WARN); + } + p += extra; + + if (memcmp(p, "PK\003\004", 4) != 0) { + archive_set_error(&a->archive, -1, "Damaged Zip archive"); + return ARCHIVE_WARN; } + filename_length = archive_le16dec(p + 26); + extra_length = archive_le16dec(p + 28); + + return (30 + filename_length + extra_length); } /* @@ -550,7 +988,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, version = p[4]; zip_entry->system = p[5]; zip_entry->flags = archive_le16dec(p + 6); - zip_entry->compression = archive_le16dec(p + 8); + zip_entry->compression = (char)archive_le16dec(p + 8); zip_entry->mtime = zip_time(p + 10); local_crc32 = archive_le32dec(p + 14); compressed_size = archive_le32dec(p + 18); @@ -558,11 +996,11 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, filename_length = archive_le16dec(p + 26); extra_length = archive_le16dec(p + 28); - __archive_read_consume(a, 30); + zip_read_consume(a, 30); if (zip->have_central_directory) { - /* If we read the central dir entry, we must have size information - as well, so ignore the length-at-end flag. */ + /* If we read the central dir entry, we must have size + * information as well, so ignore the length-at-end flag. */ zip_entry->flags &= ~ZIP_LENGTH_AT_END; /* If we have values from both the local file header and the central directory, warn about mismatches @@ -570,19 +1008,22 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, writers always put zero in the local header; don't bother warning about that. */ if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent CRC32 values"); ret = ARCHIVE_WARN; } if (compressed_size != 0 && compressed_size != zip_entry->compressed_size) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent compressed size"); ret = ARCHIVE_WARN; } if (uncompressed_size != 0 && uncompressed_size != zip_entry->uncompressed_size) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent uncompressed size"); ret = ARCHIVE_WARN; } @@ -628,7 +1069,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; } - __archive_read_consume(a, filename_length); + zip_read_consume(a, filename_length); if (zip_entry->mode == 0) { /* Especially in streaming mode, we can end up @@ -640,14 +1081,14 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, if (len > 0 && wp[len - 1] == L'/') zip_entry->mode = AE_IFDIR | 0777; else - zip_entry->mode = AE_IFREG | 0777; + zip_entry->mode = AE_IFREG | 0666; } else { cp = archive_entry_pathname(entry); len = (cp != NULL)?strlen(cp):0; if (len > 0 && cp[len - 1] == '/') zip_entry->mode = AE_IFDIR | 0777; else - zip_entry->mode = AE_IFREG | 0777; + zip_entry->mode = AE_IFREG | 0666; } } @@ -658,7 +1099,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } process_extra(h, extra_length, zip_entry); - __archive_read_consume(a, extra_length); + zip_read_consume(a, extra_length); /* Populate some additional entry fields: */ archive_entry_set_mode(entry, zip_entry->mode); @@ -702,8 +1143,8 @@ compression_name(int compression) "deflation" }; - if (compression < - sizeof(compression_names)/sizeof(compression_names[0])) + if (0 <= compression && compression < + (int)(sizeof(compression_names)/sizeof(compression_names[0]))) return compression_names[compression]; else return "??"; @@ -755,7 +1196,7 @@ archive_read_format_zip_read_data(struct archive_read *a, return (ARCHIVE_FAILED); } - __archive_read_consume(a, zip->unconsumed); + zip_read_consume(a, zip->unconsumed); zip->unconsumed = 0; switch(zip->entry->compression) { @@ -781,13 +1222,16 @@ archive_read_format_zip_read_data(struct archive_read *a, return (r); /* Update checksum */ if (*size) - zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size); + zip->entry_crc32 = crc32(zip->entry_crc32, *buff, + (unsigned)*size); /* If we hit the end, swallow any end-of-data marker. */ if (zip->end_of_entry) { /* Check file size, CRC against these values. */ - if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) { + if (zip->entry->compressed_size != + zip->entry_compressed_bytes_read) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ZIP compressed data is wrong size (read %jd, expected %jd)", + "ZIP compressed data is wrong size " + "(read %jd, expected %jd)", (intmax_t)zip->entry_compressed_bytes_read, (intmax_t)zip->entry->compressed_size); return (ARCHIVE_WARN); @@ -797,7 +1241,8 @@ archive_read_format_zip_read_data(struct archive_read *a, if ((zip->entry->uncompressed_size & UINT32_MAX) != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ZIP uncompressed data is wrong size (read %jd, expected %jd)", + "ZIP uncompressed data is wrong size " + "(read %jd, expected %jd)", (intmax_t)zip->entry_uncompressed_bytes_read, (intmax_t)zip->entry->uncompressed_size); return (ARCHIVE_WARN); @@ -846,6 +1291,8 @@ zip_read_data_none(struct archive_read *a, const void **_buff, const char *buff; ssize_t bytes_avail; + (void)offset; /* UNUSED */ + zip = (struct zip *)(a->format->data); if (zip->entry->flags & ZIP_LENGTH_AT_END) { @@ -858,7 +1305,8 @@ zip_read_data_none(struct archive_read *a, const void **_buff, that are longer than this, so a failure to get at least 16 bytes really does indicate a truncated file. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } @@ -867,8 +1315,10 @@ zip_read_data_none(struct archive_read *a, const void **_buff, if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010' && archive_le32dec(p + 4) == zip->entry_crc32 - && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read - && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) { + && archive_le32dec(p + 8) == + zip->entry_compressed_bytes_read + && archive_le32dec(p + 12) == + zip->entry_uncompressed_bytes_read) { zip->entry->crc32 = archive_le32dec(p + 4); zip->entry->compressed_size = archive_le32dec(p + 8); zip->entry->uncompressed_size = archive_le32dec(p + 12); @@ -879,9 +1329,10 @@ zip_read_data_none(struct archive_read *a, const void **_buff, /* If not at EOF, ensure we consume at least one byte. */ ++p; - /* Scan forward until we see where a PK\007\010 signature might be. */ - /* Return bytes up until that point. On the next call, the code - above will verify the data descriptor. */ + /* Scan forward until we see where a PK\007\010 signature + * might be. */ + /* Return bytes up until that point. On the next call, + * the code above will verify the data descriptor. */ while (p < buff + bytes_avail - 4) { if (p[3] == 'P') { p += 3; } else if (p[3] == 'K') { p += 2; } @@ -900,12 +1351,13 @@ zip_read_data_none(struct archive_read *a, const void **_buff, /* Grab a bunch of bytes. */ buff = __archive_read_ahead(a, 1, &bytes_avail); if (bytes_avail <= 0) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } if (bytes_avail > zip->entry_bytes_remaining) - bytes_avail = zip->entry_bytes_remaining; + bytes_avail = (ssize_t)zip->entry_bytes_remaining; } *size = bytes_avail; zip->entry_bytes_remaining -= bytes_avail; @@ -918,6 +1370,31 @@ zip_read_data_none(struct archive_read *a, const void **_buff, #ifdef HAVE_ZLIB_H static int +zip_deflate_init(struct archive_read *a, struct zip *zip) +{ + int r; + + /* If we haven't yet read any data, initialize the decompressor. */ + if (!zip->decompress_init) { + if (zip->stream_valid) + r = inflateReset(&zip->stream); + else + r = inflateInit2(&zip->stream, + -15 /* Don't check for zlib header */); + if (r != Z_OK) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can't initialize ZIP decompression."); + return (ARCHIVE_FATAL); + } + /* Stream structure has been set up. */ + zip->stream_valid = 1; + /* We've initialized decompression for this stream. */ + zip->decompress_init = 1; + } + return (ARCHIVE_OK); +} + +static int zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { @@ -926,6 +1403,8 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, const void *compressed_buff; int r; + (void)offset; /* UNUSED */ + zip = (struct zip *)(a->format->data); /* If the buffer hasn't been allocated, allocate it now. */ @@ -940,23 +1419,9 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, } } - /* If we haven't yet read any data, initialize the decompressor. */ - if (!zip->decompress_init) { - if (zip->stream_valid) - r = inflateReset(&zip->stream); - else - r = inflateInit2(&zip->stream, - -15 /* Don't check for zlib header */); - if (r != Z_OK) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Can't initialize ZIP decompression."); - return (ARCHIVE_FATAL); - } - /* Stream structure has been set up. */ - zip->stream_valid = 1; - /* We've initialized decompression for this stream. */ - zip->decompress_init = 1; - } + r = zip_deflate_init(a, zip); + if (r != ARCHIVE_OK) + return (r); /* * Note: '1' here is a performance optimization. @@ -967,7 +1432,7 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END) && bytes_avail > zip->entry_bytes_remaining) { - bytes_avail = zip->entry_bytes_remaining; + bytes_avail = (ssize_t)zip->entry_bytes_remaining; } if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, @@ -982,10 +1447,10 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, * cast to remove 'const'. */ zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; - zip->stream.avail_in = bytes_avail; + zip->stream.avail_in = (uInt)bytes_avail; zip->stream.total_in = 0; zip->stream.next_out = zip->uncompressed_buffer; - zip->stream.avail_out = zip->uncompressed_buffer_size; + zip->stream.avail_out = (uInt)zip->uncompressed_buffer_size; zip->stream.total_out = 0; r = inflate(&zip->stream, 0); @@ -1007,7 +1472,7 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, /* Consume as much as the compressor actually used. */ bytes_avail = zip->stream.total_in; - __archive_read_consume(a, bytes_avail); + zip_read_consume(a, bytes_avail); zip->entry_bytes_remaining -= bytes_avail; zip->entry_compressed_bytes_read += bytes_avail; @@ -1025,7 +1490,8 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, return (ARCHIVE_FATAL); } /* Consume the optional PK\007\010 marker. */ - if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') { + if (p[0] == 'P' && p[1] == 'K' && + p[2] == '\007' && p[3] == '\010') { zip->entry->crc32 = archive_le32dec(p + 4); zip->entry->compressed_size = archive_le32dec(p + 8); zip->entry->uncompressed_size = archive_le32dec(p + 12); @@ -1047,14 +1513,11 @@ archive_read_format_zip_read_data_skip(struct archive_read *a) /* If we've already read to end of data, we're done. */ if (zip->end_of_entry) return (ARCHIVE_OK); - /* If we're seeking, we're done. */ - if (zip->have_central_directory) - return (ARCHIVE_OK); /* So we know we're streaming... */ if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) { /* We know the compressed length, so we can just skip. */ - int64_t bytes_skipped = __archive_read_consume(a, + int64_t bytes_skipped = zip_read_consume(a, zip->entry_bytes_remaining + zip->unconsumed); if (bytes_skipped < 0) return (ARCHIVE_FATAL); @@ -1077,36 +1540,36 @@ archive_read_format_zip_read_data_skip(struct archive_read *a) if (r != ARCHIVE_OK) return (r); } - break; + return ARCHIVE_OK; #endif default: /* Uncompressed or unknown. */ /* Scan for a PK\007\010 signature. */ - __archive_read_consume(a, zip->unconsumed); + zip_read_consume(a, zip->unconsumed); zip->unconsumed = 0; for (;;) { const char *p, *buff; ssize_t bytes_avail; buff = __archive_read_ahead(a, 16, &bytes_avail); if (bytes_avail < 16) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file data"); return (ARCHIVE_FATAL); } p = buff; - while (p < buff + bytes_avail - 16) { + while (p <= buff + bytes_avail - 16) { if (p[3] == 'P') { p += 3; } else if (p[3] == 'K') { p += 2; } else if (p[3] == '\007') { p += 1; } else if (p[3] == '\010' && p[2] == '\007' && p[1] == 'K' && p[0] == 'P') { - __archive_read_consume(a, p - buff + 16); + zip_read_consume(a, p - buff + 16); return ARCHIVE_OK; } else { p += 4; } } - __archive_read_consume(a, p - buff); + zip_read_consume(a, p - buff); } } - return ARCHIVE_OK; } static int @@ -1119,6 +1582,11 @@ archive_read_format_zip_cleanup(struct archive_read *a) if (zip->stream_valid) inflateEnd(&zip->stream); #endif + if (zip->zip_entries && zip->central_directory_entries) { + unsigned i; + for (i = 0; i < zip->central_directory_entries; i++) + archive_string_free(&(zip->zip_entries[i].rsrcname)); + } free(zip->zip_entries); free(zip->uncompressed_buffer); archive_string_free(&(zip->extra)); @@ -1201,11 +1669,14 @@ process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) /* Info-ZIP Unix Extra Field (old version) "UX". */ if (datasize >= 8) { zip_entry->atime = archive_le32dec(p + offset); - zip_entry->mtime = archive_le32dec(p + offset + 4); + zip_entry->mtime = + archive_le32dec(p + offset + 4); } if (datasize >= 12) { - zip_entry->uid = archive_le16dec(p + offset + 8); - zip_entry->gid = archive_le16dec(p + offset + 10); + zip_entry->uid = + archive_le16dec(p + offset + 8); + zip_entry->gid = + archive_le16dec(p + offset + 10); } break; } @@ -1219,7 +1690,8 @@ process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) if (datasize >= 2) zip_entry->uid = archive_le16dec(p + offset); if (datasize >= 4) - zip_entry->gid = archive_le16dec(p + offset + 2); + zip_entry->gid = + archive_le16dec(p + offset + 2); break; case 0x7875: { @@ -1231,22 +1703,26 @@ process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) /* get a uid size. */ uidsize = p[offset+1]; if (uidsize == 2) - zip_entry->uid = archive_le16dec( - p + offset + 2); + zip_entry->uid = + archive_le16dec( + p + offset + 2); else if (uidsize == 4 && datasize >= 6) - zip_entry->uid = archive_le32dec( - p + offset + 2); + zip_entry->uid = + archive_le32dec( + p + offset + 2); } if (datasize >= (2 + uidsize + 3)) { /* get a gid size. */ gidsize = p[offset+2+uidsize]; if (gidsize == 2) - zip_entry->gid = archive_le16dec( - p+offset+2+uidsize+1); + zip_entry->gid = + archive_le16dec( + p+offset+2+uidsize+1); else if (gidsize == 4 && datasize >= (2 + uidsize + 5)) - zip_entry->gid = archive_le32dec( - p+offset+2+uidsize+1); + zip_entry->gid = + archive_le32dec( + p+offset+2+uidsize+1); } } break; |