summaryrefslogtreecommitdiff
path: root/zlib/examples/gun.c
diff options
context:
space:
mode:
Diffstat (limited to 'zlib/examples/gun.c')
-rw-r--r--zlib/examples/gun.c693
1 files changed, 693 insertions, 0 deletions
diff --git a/zlib/examples/gun.c b/zlib/examples/gun.c
new file mode 100644
index 000000000..bfec590a0
--- /dev/null
+++ b/zlib/examples/gun.c
@@ -0,0 +1,693 @@
+/* gun.c -- simple gunzip to give an example of the use of inflateBack()
+ * Copyright (C) 2003, 2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ Version 1.3 12 June 2005 Mark Adler */
+
+/* Version history:
+ 1.0 16 Feb 2003 First version for testing of inflateBack()
+ 1.1 21 Feb 2005 Decompress concatenated gzip streams
+ Remove use of "this" variable (C++ keyword)
+ Fix return value for in()
+ Improve allocation failure checking
+ Add typecasting for void * structures
+ Add -h option for command version and usage
+ Add a bunch of comments
+ 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
+ Copy file attributes from input file to output file
+ 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
+ */
+
+/*
+ gun [ -t ] [ name ... ]
+
+ decompresses the data in the named gzip files. If no arguments are given,
+ gun will decompress from stdin to stdout. The names must end in .gz, -gz,
+ .z, -z, _z, or .Z. The uncompressed data will be written to a file name
+ with the suffix stripped. On success, the original file is deleted. On
+ failure, the output file is deleted. For most failures, the command will
+ continue to process the remaining names on the command line. A memory
+ allocation failure will abort the command. If -t is specified, then the
+ listed files or stdin will be tested as gzip files for integrity (without
+ checking for a proper suffix), no output will be written, and no files
+ will be deleted.
+
+ Like gzip, gun allows concatenated gzip streams and will decompress them,
+ writing all of the uncompressed data to the output. Unlike gzip, gun allows
+ an empty file on input, and will produce no error writing an empty output
+ file.
+
+ gun will also decompress files made by Unix compress, which uses LZW
+ compression. These files are automatically detected by virtue of their
+ magic header bytes. Since the end of Unix compress stream is marked by the
+ end-of-file, they cannot be concantenated. If a Unix compress stream is
+ encountered in an input file, it is the last stream in that file.
+
+ Like gunzip and uncompress, the file attributes of the orignal compressed
+ file are maintained in the final uncompressed file, to the extent that the
+ user permissions allow it.
+
+ On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
+ 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
+ LZW decompression provided by gun is about twice as fast as the standard
+ Unix uncompress command.
+ */
+
+/* external functions and related types and constants */
+#include <stdio.h> /* fprintf() */
+#include <stdlib.h> /* malloc(), free() */
+#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
+#include <errno.h> /* errno */
+#include <fcntl.h> /* open() */
+#include <unistd.h> /* read(), write(), close(), chown(), unlink() */
+#include <sys/types.h>
+#include <sys/stat.h> /* stat(), chmod() */
+#include <utime.h> /* utime() */
+#include "zlib.h" /* inflateBackInit(), inflateBack(), */
+ /* inflateBackEnd(), crc32() */
+
+/* function declaration */
+#define local static
+
+/* buffer constants */
+#define SIZE 32768U /* input and output buffer sizes */
+#define PIECE 16384 /* limits i/o chunks for 16-bit int case */
+
+/* structure for infback() to pass to input function in() -- it maintains the
+ input file and a buffer of size SIZE */
+struct ind {
+ int infile;
+ unsigned char *inbuf;
+};
+
+/* Load input buffer, assumed to be empty, and return bytes loaded and a
+ pointer to them. read() is called until the buffer is full, or until it
+ returns end-of-file or error. Return 0 on error. */
+local unsigned in(void *in_desc, unsigned char **buf)
+{
+ int ret;
+ unsigned len;
+ unsigned char *next;
+ struct ind *me = (struct ind *)in_desc;
+
+ next = me->inbuf;
+ *buf = next;
+ len = 0;
+ do {
+ ret = PIECE;
+ if ((unsigned)ret > SIZE - len)
+ ret = (int)(SIZE - len);
+ ret = (int)read(me->infile, next, ret);
+ if (ret == -1) {
+ len = 0;
+ break;
+ }
+ next += ret;
+ len += ret;
+ } while (ret != 0 && len < SIZE);
+ return len;
+}
+
+/* structure for infback() to pass to output function out() -- it maintains the
+ output file, a running CRC-32 check on the output and the total number of
+ bytes output, both for checking against the gzip trailer. (The length in
+ the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
+ the output is greater than 4 GB.) */
+struct outd {
+ int outfile;
+ int check; /* true if checking crc and total */
+ unsigned long crc;
+ unsigned long total;
+};
+
+/* Write output buffer and update the CRC-32 and total bytes written. write()
+ is called until all of the output is written or an error is encountered.
+ On success out() returns 0. For a write failure, out() returns 1. If the
+ output file descriptor is -1, then nothing is written.
+ */
+local int out(void *out_desc, unsigned char *buf, unsigned len)
+{
+ int ret;
+ struct outd *me = (struct outd *)out_desc;
+
+ if (me->check) {
+ me->crc = crc32(me->crc, buf, len);
+ me->total += len;
+ }
+ if (me->outfile != -1)
+ do {
+ ret = PIECE;
+ if ((unsigned)ret > len)
+ ret = (int)len;
+ ret = (int)write(me->outfile, buf, ret);
+ if (ret == -1)
+ return 1;
+ buf += ret;
+ len -= ret;
+ } while (len != 0);
+ return 0;
+}
+
+/* next input byte macro for use inside lunpipe() and gunpipe() */
+#define NEXT() (have ? 0 : (have = in(indp, &next)), \
+ last = have ? (have--, (int)(*next++)) : -1)
+
+/* memory for gunpipe() and lunpipe() --
+ the first 256 entries of prefix[] and suffix[] are never used, could
+ have offset the index, but it's faster to waste the memory */
+unsigned char inbuf[SIZE]; /* input buffer */
+unsigned char outbuf[SIZE]; /* output buffer */
+unsigned short prefix[65536]; /* index to LZW prefix string */
+unsigned char suffix[65536]; /* one-character LZW suffix */
+unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
+ 32K sliding window */
+
+/* throw out what's left in the current bits byte buffer (this is a vestigial
+ aspect of the compressed data format derived from an implementation that
+ made use of a special VAX machine instruction!) */
+#define FLUSHCODE() \
+ do { \
+ left = 0; \
+ rem = 0; \
+ if (chunk > have) { \
+ chunk -= have; \
+ have = 0; \
+ if (NEXT() == -1) \
+ break; \
+ chunk--; \
+ if (chunk > have) { \
+ chunk = have = 0; \
+ break; \
+ } \
+ } \
+ have -= chunk; \
+ next += chunk; \
+ chunk = 0; \
+ } while (0)
+
+/* Decompress a compress (LZW) file from indp to outfile. The compress magic
+ header (two bytes) has already been read and verified. There are have bytes
+ of buffered input at next. strm is used for passing error information back
+ to gunpipe().
+
+ lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
+ file, read error, or write error (a write error indicated by strm->next_in
+ not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
+ */
+local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
+ int outfile, z_stream *strm)
+{
+ int last; /* last byte read by NEXT(), or -1 if EOF */
+ int chunk; /* bytes left in current chunk */
+ int left; /* bits left in rem */
+ unsigned rem; /* unused bits from input */
+ int bits; /* current bits per code */
+ unsigned code; /* code, table traversal index */
+ unsigned mask; /* mask for current bits codes */
+ int max; /* maximum bits per code for this stream */
+ int flags; /* compress flags, then block compress flag */
+ unsigned end; /* last valid entry in prefix/suffix tables */
+ unsigned temp; /* current code */
+ unsigned prev; /* previous code */
+ unsigned final; /* last character written for previous code */
+ unsigned stack; /* next position for reversed string */
+ unsigned outcnt; /* bytes in output buffer */
+ struct outd outd; /* output structure */
+
+ /* set up output */
+ outd.outfile = outfile;
+ outd.check = 0;
+
+ /* process remainder of compress header -- a flags byte */
+ flags = NEXT();
+ if (last == -1)
+ return Z_BUF_ERROR;
+ if (flags & 0x60) {
+ strm->msg = (char *)"unknown lzw flags set";
+ return Z_DATA_ERROR;
+ }
+ max = flags & 0x1f;
+ if (max < 9 || max > 16) {
+ strm->msg = (char *)"lzw bits out of range";
+ return Z_DATA_ERROR;
+ }
+ if (max == 9) /* 9 doesn't really mean 9 */
+ max = 10;
+ flags &= 0x80; /* true if block compress */
+
+ /* clear table */
+ bits = 9;
+ mask = 0x1ff;
+ end = flags ? 256 : 255;
+
+ /* set up: get first 9-bit code, which is the first decompressed byte, but
+ don't create a table entry until the next code */
+ if (NEXT() == -1) /* no compressed data is ok */
+ return Z_OK;
+ final = prev = (unsigned)last; /* low 8 bits of code */
+ if (NEXT() == -1) /* missing a bit */
+ return Z_BUF_ERROR;
+ if (last & 1) { /* code must be < 256 */
+ strm->msg = (char *)"invalid lzw code";
+ return Z_DATA_ERROR;
+ }
+ rem = (unsigned)last >> 1; /* remaining 7 bits */
+ left = 7;
+ chunk = bits - 2; /* 7 bytes left in this chunk */
+ outbuf[0] = (unsigned char)final; /* write first decompressed byte */
+ outcnt = 1;
+
+ /* decode codes */
+ stack = 0;
+ for (;;) {
+ /* if the table will be full after this, increment the code size */
+ if (end >= mask && bits < max) {
+ FLUSHCODE();
+ bits++;
+ mask <<= 1;
+ mask++;
+ }
+
+ /* get a code of length bits */
+ if (chunk == 0) /* decrement chunk modulo bits */
+ chunk = bits;
+ code = rem; /* low bits of code */
+ if (NEXT() == -1) { /* EOF is end of compressed data */
+ /* write remaining buffered output */
+ if (outcnt && out(&outd, outbuf, outcnt)) {
+ strm->next_in = outbuf; /* signal write error */
+ return Z_BUF_ERROR;
+ }
+ return Z_OK;
+ }
+ code += (unsigned)last << left; /* middle (or high) bits of code */
+ left += 8;
+ chunk--;
+ if (bits > left) { /* need more bits */
+ if (NEXT() == -1) /* can't end in middle of code */
+ return Z_BUF_ERROR;
+ code += (unsigned)last << left; /* high bits of code */
+ left += 8;
+ chunk--;
+ }
+ code &= mask; /* mask to current code length */
+ left -= bits; /* number of unused bits */
+ rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
+
+ /* process clear code (256) */
+ if (code == 256 && flags) {
+ FLUSHCODE();
+ bits = 9; /* initialize bits and mask */
+ mask = 0x1ff;
+ end = 255; /* empty table */
+ continue; /* get next code */
+ }
+
+ /* special code to reuse last match */
+ temp = code; /* save the current code */
+ if (code > end) {
+ /* Be picky on the allowed code here, and make sure that the code
+ we drop through (prev) will be a valid index so that random
+ input does not cause an exception. The code != end + 1 check is
+ empirically derived, and not checked in the original uncompress
+ code. If this ever causes a problem, that check could be safely
+ removed. Leaving this check in greatly improves gun's ability
+ to detect random or corrupted input after a compress header.
+ In any case, the prev > end check must be retained. */
+ if (code != end + 1 || prev > end) {
+ strm->msg = (char *)"invalid lzw code";
+ return Z_DATA_ERROR;
+ }
+ match[stack++] = (unsigned char)final;
+ code = prev;
+ }
+
+ /* walk through linked list to generate output in reverse order */
+ while (code >= 256) {
+ match[stack++] = suffix[code];
+ code = prefix[code];
+ }
+ match[stack++] = (unsigned char)code;
+ final = code;
+
+ /* link new table entry */
+ if (end < mask) {
+ end++;
+ prefix[end] = (unsigned short)prev;
+ suffix[end] = (unsigned char)final;
+ }
+
+ /* set previous code for next iteration */
+ prev = temp;
+
+ /* write output in forward order */
+ while (stack > SIZE - outcnt) {
+ while (outcnt < SIZE)
+ outbuf[outcnt++] = match[--stack];
+ if (out(&outd, outbuf, outcnt)) {
+ strm->next_in = outbuf; /* signal write error */
+ return Z_BUF_ERROR;
+ }
+ outcnt = 0;
+ }
+ do {
+ outbuf[outcnt++] = match[--stack];
+ } while (stack);
+
+ /* loop for next code with final and prev as the last match, rem and
+ left provide the first 0..7 bits of the next code, end is the last
+ valid table entry */
+ }
+}
+
+/* Decompress a gzip file from infile to outfile. strm is assumed to have been
+ successfully initialized with inflateBackInit(). The input file may consist
+ of a series of gzip streams, in which case all of them will be decompressed
+ to the output file. If outfile is -1, then the gzip stream(s) integrity is
+ checked and nothing is written.
+
+ The return value is a zlib error code: Z_MEM_ERROR if out of memory,
+ Z_DATA_ERROR if the header or the compressed data is invalid, or if the
+ trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
+ prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
+ stream) follows a valid gzip stream.
+ */
+local int gunpipe(z_stream *strm, int infile, int outfile)
+{
+ int ret, first, last;
+ unsigned have, flags, len;
+ unsigned char *next;
+ struct ind ind, *indp;
+ struct outd outd;
+
+ /* setup input buffer */
+ ind.infile = infile;
+ ind.inbuf = inbuf;
+ indp = &ind;
+
+ /* decompress concatenated gzip streams */
+ have = 0; /* no input data read in yet */
+ first = 1; /* looking for first gzip header */
+ strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
+ for (;;) {
+ /* look for the two magic header bytes for a gzip stream */
+ if (NEXT() == -1) {
+ ret = Z_OK;
+ break; /* empty gzip stream is ok */
+ }
+ if (last != 31 || (NEXT() != 139 && last != 157)) {
+ strm->msg = (char *)"incorrect header check";
+ ret = first ? Z_DATA_ERROR : Z_ERRNO;
+ break; /* not a gzip or compress header */
+ }
+ first = 0; /* next non-header is junk */
+
+ /* process a compress (LZW) file -- can't be concatenated after this */
+ if (last == 157) {
+ ret = lunpipe(have, next, indp, outfile, strm);
+ break;
+ }
+
+ /* process remainder of gzip header */
+ ret = Z_BUF_ERROR;
+ if (NEXT() != 8) { /* only deflate method allowed */
+ if (last == -1) break;
+ strm->msg = (char *)"unknown compression method";
+ ret = Z_DATA_ERROR;
+ break;
+ }
+ flags = NEXT(); /* header flags */
+ NEXT(); /* discard mod time, xflgs, os */
+ NEXT();
+ NEXT();
+ NEXT();
+ NEXT();
+ NEXT();
+ if (last == -1) break;
+ if (flags & 0xe0) {
+ strm->msg = (char *)"unknown header flags set";
+ ret = Z_DATA_ERROR;
+ break;
+ }
+ if (flags & 4) { /* extra field */
+ len = NEXT();
+ len += (unsigned)(NEXT()) << 8;
+ if (last == -1) break;
+ while (len > have) {
+ len -= have;
+ have = 0;
+ if (NEXT() == -1) break;
+ len--;
+ }
+ if (last == -1) break;
+ have -= len;
+ next += len;
+ }
+ if (flags & 8) /* file name */
+ while (NEXT() != 0 && last != -1)
+ ;
+ if (flags & 16) /* comment */
+ while (NEXT() != 0 && last != -1)
+ ;
+ if (flags & 2) { /* header crc */
+ NEXT();
+ NEXT();
+ }
+ if (last == -1) break;
+
+ /* set up output */
+ outd.outfile = outfile;
+ outd.check = 1;
+ outd.crc = crc32(0L, Z_NULL, 0);
+ outd.total = 0;
+
+ /* decompress data to output */
+ strm->next_in = next;
+ strm->avail_in = have;
+ ret = inflateBack(strm, in, indp, out, &outd);
+ if (ret != Z_STREAM_END) break;
+ next = strm->next_in;
+ have = strm->avail_in;
+ strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
+
+ /* check trailer */
+ ret = Z_BUF_ERROR;
+ if (NEXT() != (outd.crc & 0xff) ||
+ NEXT() != ((outd.crc >> 8) & 0xff) ||
+ NEXT() != ((outd.crc >> 16) & 0xff) ||
+ NEXT() != ((outd.crc >> 24) & 0xff)) {
+ /* crc error */
+ if (last != -1) {
+ strm->msg = (char *)"incorrect data check";
+ ret = Z_DATA_ERROR;
+ }
+ break;
+ }
+ if (NEXT() != (outd.total & 0xff) ||
+ NEXT() != ((outd.total >> 8) & 0xff) ||
+ NEXT() != ((outd.total >> 16) & 0xff) ||
+ NEXT() != ((outd.total >> 24) & 0xff)) {
+ /* length error */
+ if (last != -1) {
+ strm->msg = (char *)"incorrect length check";
+ ret = Z_DATA_ERROR;
+ }
+ break;
+ }
+
+ /* go back and look for another gzip stream */
+ }
+
+ /* clean up and return */
+ return ret;
+}
+
+/* Copy file attributes, from -> to, as best we can. This is best effort, so
+ no errors are reported. The mode bits, including suid, sgid, and the sticky
+ bit are copied (if allowed), the owner's user id and group id are copied
+ (again if allowed), and the access and modify times are copied. */
+local void copymeta(char *from, char *to)
+{
+ struct stat was;
+ struct utimbuf when;
+
+ /* get all of from's Unix meta data, return if not a regular file */
+ if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
+ return;
+
+ /* set to's mode bits, ignore errors */
+ (void)chmod(to, was.st_mode & 07777);
+
+ /* copy owner's user and group, ignore errors */
+ (void)chown(to, was.st_uid, was.st_gid);
+
+ /* copy access and modify times, ignore errors */
+ when.actime = was.st_atime;
+ when.modtime = was.st_mtime;
+ (void)utime(to, &when);
+}
+
+/* Decompress the file inname to the file outnname, of if test is true, just
+ decompress without writing and check the gzip trailer for integrity. If
+ inname is NULL or an empty string, read from stdin. If outname is NULL or
+ an empty string, write to stdout. strm is a pre-initialized inflateBack
+ structure. When appropriate, copy the file attributes from inname to
+ outname.
+
+ gunzip() returns 1 if there is an out-of-memory error or an unexpected
+ return code from gunpipe(). Otherwise it returns 0.
+ */
+local int gunzip(z_stream *strm, char *inname, char *outname, int test)
+{
+ int ret;
+ int infile, outfile;
+
+ /* open files */
+ if (inname == NULL || *inname == 0) {
+ inname = "-";
+ infile = 0; /* stdin */
+ }
+ else {
+ infile = open(inname, O_RDONLY, 0);
+ if (infile == -1) {
+ fprintf(stderr, "gun cannot open %s\n", inname);
+ return 0;
+ }
+ }
+ if (test)
+ outfile = -1;
+ else if (outname == NULL || *outname == 0) {
+ outname = "-";
+ outfile = 1; /* stdout */
+ }
+ else {
+ outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
+ if (outfile == -1) {
+ close(infile);
+ fprintf(stderr, "gun cannot create %s\n", outname);
+ return 0;
+ }
+ }
+ errno = 0;
+
+ /* decompress */
+ ret = gunpipe(strm, infile, outfile);
+ if (outfile > 2) close(outfile);
+ if (infile > 2) close(infile);
+
+ /* interpret result */
+ switch (ret) {
+ case Z_OK:
+ case Z_ERRNO:
+ if (infile > 2 && outfile > 2) {
+ copymeta(inname, outname); /* copy attributes */
+ unlink(inname);
+ }
+ if (ret == Z_ERRNO)
+ fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
+ inname);
+ break;
+ case Z_DATA_ERROR:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
+ break;
+ case Z_MEM_ERROR:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ return 1;
+ case Z_BUF_ERROR:
+ if (outfile > 2) unlink(outname);
+ if (strm->next_in != Z_NULL) {
+ fprintf(stderr, "gun write error on %s: %s\n",
+ outname, strerror(errno));
+ }
+ else if (errno) {
+ fprintf(stderr, "gun read error on %s: %s\n",
+ inname, strerror(errno));
+ }
+ else {
+ fprintf(stderr, "gun unexpected end of file on %s\n",
+ inname);
+ }
+ break;
+ default:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun internal error--aborting\n");
+ return 1;
+ }
+ return 0;
+}
+
+/* Process the gun command line arguments. See the command syntax near the
+ beginning of this source file. */
+int main(int argc, char **argv)
+{
+ int ret, len, test;
+ char *outname;
+ unsigned char *window;
+ z_stream strm;
+
+ /* initialize inflateBack state for repeated use */
+ window = match; /* reuse LZW match buffer */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ ret = inflateBackInit(&strm, 15, window);
+ if (ret != Z_OK) {
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ return 1;
+ }
+
+ /* decompress each file to the same name with the suffix removed */
+ argc--;
+ argv++;
+ test = 0;
+ if (argc && strcmp(*argv, "-h") == 0) {
+ fprintf(stderr, "gun 1.3 (12 Jun 2005)\n");
+ fprintf(stderr, "Copyright (c) 2005 Mark Adler\n");
+ fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
+ return 0;
+ }
+ if (argc && strcmp(*argv, "-t") == 0) {
+ test = 1;
+ argc--;
+ argv++;
+ }
+ if (argc)
+ do {
+ if (test)
+ outname = NULL;
+ else {
+ len = (int)strlen(*argv);
+ if (strcmp(*argv + len - 3, ".gz") == 0 ||
+ strcmp(*argv + len - 3, "-gz") == 0)
+ len -= 3;
+ else if (strcmp(*argv + len - 2, ".z") == 0 ||
+ strcmp(*argv + len - 2, "-z") == 0 ||
+ strcmp(*argv + len - 2, "_z") == 0 ||
+ strcmp(*argv + len - 2, ".Z") == 0)
+ len -= 2;
+ else {
+ fprintf(stderr, "gun error: no gz type on %s--skipping\n",
+ *argv);
+ continue;
+ }
+ outname = malloc(len + 1);
+ if (outname == NULL) {
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ ret = 1;
+ break;
+ }
+ memcpy(outname, *argv, len);
+ outname[len] = 0;
+ }
+ ret = gunzip(&strm, *argv, outname, test);
+ if (outname != NULL) free(outname);
+ if (ret) break;
+ } while (argv++, --argc);
+ else
+ ret = gunzip(&strm, NULL, NULL, test);
+
+ /* clean up */
+ inflateBackEnd(&strm);
+ return ret;
+}