From 0dc212471e8a6a2cf14f8127a9d1f31c0152093c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 5 May 2013 01:45:27 -0400 Subject: pristine-tar (1.28) unstable; urgency=low * Update lintian overrides. # imported from the archive --- GPL | 339 +++++++ INSTALL | 39 + Makefile.PL | 103 ++ Pristine/Tar.pm | 121 +++ Pristine/Tar/Delta.pm | 122 +++ Pristine/Tar/Delta/Tarball.pm | 63 ++ Pristine/Tar/Formats.pm | 93 ++ README | 11 + TODO | 25 + debian/changelog | 453 +++++++++ debian/compat | 1 + debian/control | 27 + debian/copyright | 84 ++ debian/docs | 1 + debian/lintian-overrides | 5 + debian/rules | 15 + delta-format.txt | 54 ++ pit/suse-bzip2/LICENSE | 42 + pit/suse-bzip2/Makefile | 252 +++++ pit/suse-bzip2/blocksort.c | 1094 +++++++++++++++++++++ pit/suse-bzip2/bzip2.c | 2038 ++++++++++++++++++++++++++++++++++++++++ pit/suse-bzip2/bzlib.c | 1572 +++++++++++++++++++++++++++++++ pit/suse-bzip2/bzlib.h | 282 ++++++ pit/suse-bzip2/bzlib_private.h | 509 ++++++++++ pit/suse-bzip2/compress.c | 672 +++++++++++++ pit/suse-bzip2/crctable.c | 104 ++ pit/suse-bzip2/decompress.c | 646 +++++++++++++ pit/suse-bzip2/huffman.c | 205 ++++ pit/suse-bzip2/randtable.c | 84 ++ pristine-bz2 | 285 ++++++ pristine-bz2.1 | 191 ++++ pristine-gz | 325 +++++++ pristine-gz.1 | 205 ++++ pristine-tar | 821 ++++++++++++++++ pristine-tar.1 | 286 ++++++ pristine-tar.spec | 57 ++ pristine-xz | 379 ++++++++ pristine-xz.1 | 184 ++++ zgz.1 | 158 ++++ zgz/gzip/bits.c | 180 ++++ zgz/gzip/deflate.c | 792 ++++++++++++++++ zgz/gzip/gzip.c | 120 +++ zgz/gzip/gzip.h | 154 +++ zgz/gzip/trees.c | 978 +++++++++++++++++++ zgz/gzip/util.c | 179 ++++ zgz/old-bzip2/blocksort.c | 1062 +++++++++++++++++++++ zgz/old-bzip2/bzip2.c | 178 ++++ zgz/old-bzip2/bzlib.c | 915 ++++++++++++++++++ zgz/old-bzip2/bzlib.h | 309 ++++++ zgz/old-bzip2/bzlib_private.h | 528 +++++++++++ zgz/old-bzip2/compress.c | 627 ++++++++++++ zgz/old-bzip2/crctable.c | 144 +++ zgz/old-bzip2/huffman.c | 228 +++++ zgz/old-bzip2/randtable.c | 124 +++ zgz/zgz.c | 624 ++++++++++++ zgz/zgz.pod | 33 + 56 files changed, 19122 insertions(+) create mode 100644 GPL create mode 100644 INSTALL create mode 100755 Makefile.PL create mode 100644 Pristine/Tar.pm create mode 100644 Pristine/Tar/Delta.pm create mode 100644 Pristine/Tar/Delta/Tarball.pm create mode 100644 Pristine/Tar/Formats.pm create mode 100644 README create mode 100644 TODO create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/docs create mode 100644 debian/lintian-overrides create mode 100755 debian/rules create mode 100644 delta-format.txt create mode 100644 pit/suse-bzip2/LICENSE create mode 100644 pit/suse-bzip2/Makefile create mode 100644 pit/suse-bzip2/blocksort.c create mode 100644 pit/suse-bzip2/bzip2.c create mode 100644 pit/suse-bzip2/bzlib.c create mode 100644 pit/suse-bzip2/bzlib.h create mode 100644 pit/suse-bzip2/bzlib_private.h create mode 100644 pit/suse-bzip2/compress.c create mode 100644 pit/suse-bzip2/crctable.c create mode 100644 pit/suse-bzip2/decompress.c create mode 100644 pit/suse-bzip2/huffman.c create mode 100644 pit/suse-bzip2/randtable.c create mode 100755 pristine-bz2 create mode 100644 pristine-bz2.1 create mode 100755 pristine-gz create mode 100644 pristine-gz.1 create mode 100755 pristine-tar create mode 100644 pristine-tar.1 create mode 100644 pristine-tar.spec create mode 100755 pristine-xz create mode 100644 pristine-xz.1 create mode 100644 zgz.1 create mode 100644 zgz/gzip/bits.c create mode 100644 zgz/gzip/deflate.c create mode 100644 zgz/gzip/gzip.c create mode 100644 zgz/gzip/gzip.h create mode 100644 zgz/gzip/trees.c create mode 100644 zgz/gzip/util.c create mode 100644 zgz/old-bzip2/blocksort.c create mode 100644 zgz/old-bzip2/bzip2.c create mode 100644 zgz/old-bzip2/bzlib.c create mode 100644 zgz/old-bzip2/bzlib.h create mode 100644 zgz/old-bzip2/bzlib_private.h create mode 100644 zgz/old-bzip2/compress.c create mode 100644 zgz/old-bzip2/crctable.c create mode 100644 zgz/old-bzip2/huffman.c create mode 100644 zgz/old-bzip2/randtable.c create mode 100644 zgz/zgz.c create mode 100644 zgz/zgz.pod diff --git a/GPL b/GPL new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/GPL @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..d1ed160 --- /dev/null +++ b/INSTALL @@ -0,0 +1,39 @@ +Run `perl Makefile.PL` to generate a Makefile, then run `make` and +`make install`. + +To configure pristine-tar for your local environment, you may specify +the following additional parameters to Makefile.PL for the creation +of the Makefile + +PERL_SHEBANG= +TAR_PROGRAM= +XDELTA_PROGRAM= + +The PERL_SHEBANG needs only to be adapted in rare cases, as it is usually +correctly set by MakeMaker. However, the TAR_PROGRAM should refer to the GNU +version of Tar, which may be called, e.g., "tar" (default), "gtar", or +"gnutar" depending on the system. The XDELTA_PROGRAM needs to refer to the +Xdelta program in version 1.x (http://xdelta.org), which may be called, e.g., +"xdelta" (default) or "xdelta1". Xdelta3 won't work, as it has a different +command line syntax. + +The installation destination (default: /usr/local) can be specified either +via the ./Makefile.PL parameter: + +PREFIX= + +or + +INSTALL_BASE= + +See the manpage (or perldoc) of ExtUtils::MakeMaker for more details. + + +EXAMPLE: + +$ perl Makefile.PL \ + TAR_PROGRAM=gnutar \ + XDELTA_PROGRAM=xdelta \ + PREFIX=/usr/local +$ make +$ make install diff --git a/Makefile.PL b/Makefile.PL new file mode 100755 index 0000000..60a5103 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,103 @@ +#!/usr/bin/env perl +use warnings; +use strict; +use ExtUtils::MakeMaker; + +# read custom parameters +my %args = map { split /\s*=\s*/ } @ARGV; +my $PERL_SHEBANG = $args{PERL_SHEBANG} || "" ; +my $TAR_PROGRAM = $args{TAR_PROGRAM} || "" ; +my $XDELTA_PROGRAM = $args{XDELTA_PROGRAM} || "" ; + +if ($PERL_SHEBANG || $TAR_PROGRAM || $XDELTA_PROGRAM) { + print <SUPER::macro(@_); + my $subst = ''; + + $subst .= q[-e "s|^\#!.*|\#!$(PERL_SHEBANG)|g" ] + unless ! ${PERL_SHEBANG}; + $subst .= q[-e "s|tar_program = \".*\";|tar_program = \"$(TAR_PROGRAM)\";|g" ] + unless ! ${TAR_PROGRAM}; + $subst .= q[-e "s|xdelta_program = \".*\";|xdelta_program = \"$(XDELTA_PROGRAM)\";|g" ] + unless ! ${XDELTA_PROGRAM}; + + $inherited .= "\nPARAM_SUBST = ${subst}"; + $inherited; +} +# Add a few more targets. +sub postamble { +q{ +all:: extra_build +clean:: extra_clean +install:: extra_install +pure_install:: extra_install + +ifneq (,$(PARAM_SUBST)) +$(EXE_FILES): %: + sed $(PARAM_SUBST) $@ > $@.new + $(MV) $@.new $@ + $(CHMOD) $(PERM_RWX) $@ +PHONY+=$(EXE_FILES) +endif + +LIBDIR ?= $(PREFIX)/lib +PKGLIBDIR ?= $(LIBDIR)/pristine-tar + +extra_build: zgz/zgz pristine-tar.spec + pod2man -c pristine-tar pristine-tar > pristine-tar.1 + pod2man -c pristine-gz pristine-gz > pristine-gz.1 + pod2man -c pristine-bz2 pristine-bz2 > pristine-bz2.1 + pod2man -c pristine-xz pristine-xz > pristine-xz.1 + pod2man -c zgz zgz/zgz.pod > zgz.1 + $(MAKE) -C pit/suse-bzip2 PREFIX=$(PREFIX) + +ZGZ_SOURCES = zgz/zgz.c zgz/gzip/*.c zgz/old-bzip2/*.c +zgz/zgz: $(ZGZ_SOURCES) + gcc -Wall -O2 -o $@ $(ZGZ_SOURCES) -lz -DPKGLIBDIR=\"$(PKGLIBDIR)\" + +extra_install: + install -d $(DESTDIR)$(PREFIX)/bin + install zgz/zgz $(DESTDIR)$(PREFIX)/bin + install -d $(DESTDIR)$(PREFIX)/share/man/man1 + install -m 0644 *.1 $(DESTDIR)$(PREFIX)/share/man/man1 + install -d $(DESTDIR)$(PKGLIBDIR)/suse-bzip2 + install pit/suse-bzip2/bzip2 pit/suse-bzip2/libbz2* $(DESTDIR)$(PKGLIBDIR)/suse-bzip2 + +extra_clean: + $(MAKE) clean -C pit/suse-bzip2 PREFIX=$(PREFIX) + +pristine-tar.spec: debian/changelog + sed "s/Version:.*/Version: $$($(PERLRUN) -e '$$_=<>;print m/\((.*?)\)/'<$<)/" \ + $@ > $@.new && $(MV) $@.new $@ +PHONY+=pristine-tar.spec + +.PHONY: $(PHONY) +} +} + +ExtUtils::MakeMaker::WriteMakefile( + NAME => 'Pristine', + AUTHOR => 'Joey Hess ', + ABSTRACT => + 'regenerate a pristine upstream tarball using only a small '. + 'binary delta file and a revision control checkout', + MAN1PODS => {}, + MAN3PODS => {}, + PMLIBDIRS => ["Pristine"], + EXE_FILES => ["pristine-tar","pristine-bz2","pristine-gz","pristine-xz"], + macro => { + PERL_SHEBANG => "${PERL_SHEBANG}", + TAR_PROGRAM => "${TAR_PROGRAM}", + XDELTA_PROGRAM => "${XDELTA_PROGRAM}" + }, + clean => { FILES => 'zgz/zgz' }, +); diff --git a/Pristine/Tar.pm b/Pristine/Tar.pm new file mode 100644 index 0000000..3f10788 --- /dev/null +++ b/Pristine/Tar.pm @@ -0,0 +1,121 @@ +#!/usr/bin/perl +# pristine-tar utility library + +package Pristine::Tar; + +use warnings; +use strict; +use File::Temp; +use Getopt::Long; +use IPC::Open2; +use Exporter q{import}; + +our @EXPORT = qw(error message debug vprint doit try_doit doit_redir + tempdir dispatch comparefiles + $verbose $debug $keep); + +our $verbose=0; +our $debug=0; +our $keep=0; + +sub progname { + my $name=$0; + $name=~s!^.*/(.+)$!$1!; + return $name +} + +sub error { + die progname().": @_\n"; +} + +sub message { + print STDERR progname().": @_\n"; +} + +sub debug { + message(@_) if $debug; +} + +sub vprint { + message(@_) if $verbose; +} + +sub doit { + if (try_doit(@_) != 0) { + error "command failed: @_"; + } +} + +sub try_doit { + vprint(@_); + return system(@_) +} + +sub doit_redir { + no warnings 'once'; + my ($in, $out, @args) = @_; + vprint(@args, "<", $in, ">", $out); + open INFILE, "<", $in or die("Could not open '$in' for reading: $!\n"); + open OUTFILE, ">", $out or die("Could not open '$out' for reading: $!\n"); + my $pid = open2(">&OUTFILE", "<&INFILE", @args); + waitpid $pid, 0; + if ($? != 0) { + error "command failed: @args"; + } +} + +sub tempdir { + return File::Temp::tempdir("pristine-tar.XXXXXXXXXX", + TMPDIR => 1, CLEANUP => !$keep); +} + +# Workaround for bug #479317 in perl 5.10. +sub END { + chdir("/"); +} + +sub dispatch { + my %params=@_; + + my %commands=%{$params{commands}}; + my %options=%{$params{options}} if exists $params{options}; + + my $command; + Getopt::Long::Configure("bundling"); + if (! GetOptions(%options, + "v|verbose!" => \$verbose, + "d|debug!" => \$debug, + "k|keep!" => \$keep) || + ! @ARGV) { + $command="usage"; + } + else { + $command=shift @ARGV; + } + + my $i=$commands{$command}; + if (! defined $i) { + error "Unknown subcommand \"$command\""; + } + + # Check that the right number of args were passed by user. + if (defined $i->[1] && @ARGV != $i->[1]) { + $command="usage"; + $i=$commands{$command}; + } + + $i->[0]->(@ARGV); +} + +sub comparefiles { + my ($old, $new) = (shift, shift); + system('cmp', '-s', $old, $new); + + if ($? == -1 || $? & 127) { + die("Failed to execute cmp: $!\n"); + } + + return $? >> 8; +} + +1 diff --git a/Pristine/Tar/Delta.pm b/Pristine/Tar/Delta.pm new file mode 100644 index 0000000..c5e6572 --- /dev/null +++ b/Pristine/Tar/Delta.pm @@ -0,0 +1,122 @@ +#!/usr/bin/perl +# pristine-tar delta file library +# See delta-format.txt for details about the contents of delta files. +package Pristine::Tar::Delta; + +use Pristine::Tar; +use warnings; +use strict; + +# Checks if a field of a delta should be stored in the delta hash using +# a filename. (Normally the hash stores the whole field value, but +# using filenames makes sense for a few fields.) +my %delta_files=map { $_ => 1 } qw(manifest delta wrapper); +sub is_filename { + my $field=shift; + return $delta_files{$field}; +} + +sub handler { + my $action=shift; + my $type=shift; + + my $class="Pristine::Tar::Delta::$type"; + eval "use $class"; + if ($@) { + error "unsupported delta file format $type"; + } + $class->$action(@_); +} + +# After the type of delta and the file to create (which can be "-" +# to send it to stdout), this takes a hashref containing the contents of +# the delta to write. +sub write { + my $type=shift; + my $deltafile=shift; + my $delta=shift; + + my $tempdir=tempdir(); + + my $stdout=0; + if ($deltafile eq "-") { + $stdout=1; + $deltafile="$tempdir/tmpout"; + } + + handler('write', $type, $deltafile, $delta); + + if ($stdout) { + doit("cat", $deltafile); + unlink($deltafile); + } + + return $delta; +} + +# Returns a hashref of the contents of the delta. +sub read { + my $type=shift; + my $deltafile=shift; + + my $tempdir=tempdir(); + + my $stdin=0; + if ($deltafile eq "-") { + $deltafile="$tempdir/tmpin"; + open (my $out, ">", $deltafile) || die "$deltafile: $!"; + while () { + print $out $_; + } + close $out; + } + + my $delta=handler('read', $type, $deltafile); + + unlink($deltafile) if $stdin; + + return $delta; +} + +# Checks the type, maxversion, minversion of a delta hashref. +# Checks that the delta contains all specified fields. +# Returns the hashref if it is ok. +sub assert { + my $delta=shift; + my %params=@_; + + if (! exists $delta->{version}) { + error "delta lacks version"; + } + if (defined $params{maxversion}) { + if ($delta->{version} > $params{maxversion}) { + error "delta is version ".$delta->{version}.", newer than maximum supported version $params{maxversion}"; + } + } + if (defined $params{minversion}) { + if ($delta->{version} < $params{minversion}) { + error "delta is version ".$delta->{version}.", older than minimum supported version $params{minversion}"; + } + } + + if (! exists $delta->{type}) { + error "delta lacks type"; + } + if (defined $params{type}) { + if ($delta->{type} ne $params{type}) { + error "delta is for a ".$delta->{type}.", not a $params{type}"; + } + } + + if ($params{fields}) { + foreach my $key (@{$params{fields}}) { + if (! exists $delta->{$key}) { + error "delta lacks $key"; + } + } + } + + return $delta; +} + +1 diff --git a/Pristine/Tar/Delta/Tarball.pm b/Pristine/Tar/Delta/Tarball.pm new file mode 100644 index 0000000..ab38935 --- /dev/null +++ b/Pristine/Tar/Delta/Tarball.pm @@ -0,0 +1,63 @@ +#!/usr/bin/perl +# pristine-tar delta files formatted as tarballs +package Pristine::Tar::Delta::Tarball; + +use Pristine::Tar; +use Pristine::Tar::Delta; +use File::Basename; +use warnings; +use strict; + +sub write { + my $class=shift; + my $deltafile=shift; + my $delta=shift; + + my $tempdir=tempdir(); + + foreach my $field (keys %$delta) { + if (Pristine::Tar::Delta::is_filename($field)) { + link($delta->{$field}, "$tempdir/$field") || die "link $tempdir/$field: $!"; + } + else { + open (my $out, ">", "$tempdir/$field") || die "$tempdir/$field: $!"; + print $out $delta->{$field}."\n"; + close $out; + } + } + + doit("tar", "czf", $deltafile, "-C", $tempdir, keys %$delta); + + return $delta; +} + +sub read { + my $class=shift; + my $deltafile=shift; + + my $tempdir=tempdir(); + doit("tar", "xf", File::Spec->rel2abs($deltafile), "-C", $tempdir); + + my %delta; + foreach my $file (glob("$tempdir/*")) { + if (-f $file) { + my $field=basename($file); + if (Pristine::Tar::Delta::is_filename($field)) { + $delta{$field}=$file; + } + else { + open (my $in, "<", $file) || die "$file: $!"; + { + local $/=undef; + $delta{$field}=<$in>; + } + chomp $delta{$field}; + close $in; + } + } + } + + return \%delta; +} + +1 diff --git a/Pristine/Tar/Formats.pm b/Pristine/Tar/Formats.pm new file mode 100644 index 0000000..42afca1 --- /dev/null +++ b/Pristine/Tar/Formats.pm @@ -0,0 +1,93 @@ +#!/usr/bin/perl +# pristine-tar file format parsing + +package Pristine::Tar::Formats; + +use warnings; +use strict; +use Exporter q{import}; +our @EXPORT=qw{is_gz is_bz2 is_xz %fconstants}; + +our %fconstants=( + # magic identification + GZIP_ID1 => 0x1F, + GZIP_ID2 => 0x8B, + BZIP2_ID1 => 0x42, + BZIP2_ID2 => 0x5a, + XZ_ID1 => 0xFD, + XZ_ID2 => 0x37, + XZ_ID3 => 0x7A, + XZ_ID4 => 0x58, + XZ_ID5 => 0x5A, + XZ_ID6 => 0x00, + + # compression methods + # 0x00-0x07 are reserved + GZIP_METHOD_DEFLATE => 0x08, + # 'h' for Bzip2 ('H'uffman coding) + BZIP2_METHOD_HUFFMAN => 0x68, + + # flags + GZIP_FLAG_FTEXT => 0, + GZIP_FLAG_FHCRC => 1, + GZIP_FLAG_FEXTRA => 2, + GZIP_FLAG_FNAME => 3, + GZIP_FLAG_FCOMMENT => 4, + # the rest are reserved + + # compression level + GZIP_COMPRESSION_NORMAL => 0, + GZIP_COMPRESSION_BEST => 2, + GZIP_COMPRESSION_FAST => 4, + + # operating systems + GZIP_OS_MSDOS => 0, + GZIP_OS_AMIGA => 1, + GZIP_OS_VMS => 2, + GZIP_OS_UNIX => 3, + GZIP_OS_VMCMS => 4, + GZIP_OS_ATARI => 5, + GZIP_OS_HPFS => 6, + GZIP_OS_MACINTOSH => 7, + GZIP_OS_ZSYSTEM => 8, + GZIP_OS_CPM => 9, + GZIP_OS_TOPS => 10, + GZIP_OS_NTFS => 11, + GZIP_OS_QDOS => 12, + GZIP_OS_RISCOS => 13, + GZIP_OS_VFAT => 14, + GZIP_OS_UNKNOWN => 255, +); + +sub magic { + my $file=shift; + + open (my $in, "<", $file) || die "$file: $!"; + my $count=$#_+1; + my ($chars, @bits); + my $ret = ( + read($in, $chars, $count) == $count && + (@bits = unpack(("C" x $count), $chars)) && + (! grep { $bits[$_] != $_[$_] } (0..$count-1)) + ); + close $in; + return $ret; +} + +sub is_gz { + magic(shift, $fconstants{GZIP_ID1}, $fconstants{GZIP_ID2}, + $fconstants{GZIP_METHOD_DEFLATE}); +} + +sub is_bz2 { + magic(shift, $fconstants{BZIP2_ID1}, $fconstants{BZIP2_ID2}, + $fconstants{BZIP2_METHOD_HUFFMAN}); +} + +sub is_xz { + magic(shift, $fconstants{XZ_ID1}, $fconstants{XZ_ID2}, + $fconstants{XZ_ID3}, $fconstants{XZ_ID4}, + $fconstants{XZ_ID5}, $fconstants{XZ_ID6}); +} + +1 diff --git a/README b/README new file mode 100644 index 0000000..f128c7a --- /dev/null +++ b/README @@ -0,0 +1,11 @@ +pristine-tar can regenerate a pristine upstream tarball using only a small +binary delta file and a revision control checkout of the upstream branch. + +The package also includes a pristine-gz command, which can regenerate a +pristine .gz file, a pristine-bz2 for .bz2 files, and a pristine-xz for .xz +files. + +The delta file is designed to be checked into revision control along-side +the upstream branch, thus allowing Debian packages to be built entirely +using sources in revision control, without the need to keep copies of +upstream tarballs. diff --git a/TODO b/TODO new file mode 100644 index 0000000..fce8e78 --- /dev/null +++ b/TODO @@ -0,0 +1,25 @@ +* Investigate files that the programs cannot reproduce. Such files are stored + in the testsuite git branch. + +* Would be nice to keep a md5sums list of the files in the manifest and + check and error if any are different. The error message from xdelta when + a source file doesn't have the right checksup is fairly obscure and + doesn't aid in fixing the problem. + + There's an implementation of this in the manifest-md5sums branch of + git://git.debian.org/git/users/kibi-guest/pristine-tar.git , but I have + not yet decided about applying it, since it would bloat the delta files + with a list of all the files and md5sums.. + +* Optimisation: Enhance zgz so it runs multiple compression alternatives + block-by-block in parallell, aborting compressors when they differ + from the target file. + +* Add binary deltas for unreproducible bz2 files. (But currently, only two + bz2 files are known that fails to reproduce: nsis and freecol in testsuite.) + +* Support checkout/checkin using other VCS than git. + + bzr-builddeb stores pristine-tar data in bzr repositories. + The delta is stored in a bzr revprop named "deb-pristine-delta" + or "deb-pristine-delta-bz2". diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..b38f259 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,453 @@ +pristine-tar (1.28) unstable; urgency=low + + * Update lintian overrides. + + -- Joey Hess Sun, 05 May 2013 01:45:27 -0400 + +pristine-tar (1.27) unstable; urgency=low + + * Now ported to Mac OS X. + - MakeFile.PL fixes + - zgz: Use DYLD_LIBRARY_PATH on OSX instead of LD_LIBRARY_PATH + - suse-bzip2/Makefile: compile shared library appropriately on OSX + - INSTALL: Added some options useful when installing on OSX + Thanks, Torsten Maehne and Philipp A. Hartmann. + * zgz: Don't hardcode /lib, avoid segfaulting when installed in a + > 128 character path. + Thanks, Colin Walters + * Fix typo that led to incorrect error message when pristine-tar + branch does not exist. Closes: #700448 Thanks, paul cannon + + -- Joey Hess Sat, 04 May 2013 23:50:40 -0400 + +pristine-tar (1.26) unstable; urgency=low + + * pristine-xz: Use xz --robot to extract information from xz files, + avoiding the need to do more expensive guessing. Support --block-list + when available to reproduce multi block files. + Closes: #677250 Thanks, Vincent Ladeuil + + -- Joey Hess Sat, 25 Aug 2012 11:54:46 -0400 + +pristine-tar (1.25) unstable; urgency=low + + * Remove unused fts.h include. Closes: #675367 + * pristine-xz: Add --check=sha256 and compression levels 0 and 0e. + Closes: #677241 Thanks, Vincent Ladeuil + + -- Joey Hess Tue, 12 Jun 2012 11:45:33 -0400 + +pristine-tar (1.24) unstable; urgency=low + + * pristine-xz: Add --check=crc32 to list of allowed parameters. + + -- Joey Hess Sun, 01 Apr 2012 12:21:24 -0400 + +pristine-tar (1.23) unstable; urgency=low + + * Detect git submodules procuded by recent git, where .git is a file, + not a directory. Closes: #666033 + * pristine-xz: Handle CRC32 files made with PylibLZMA. Thanks, Mike Miller + Closes: #662129, #660783 + + -- Joey Hess Sat, 31 Mar 2012 19:00:49 -0400 + +pristine-tar (1.22) unstable; urgency=low + + * Store the sha1 of the tree that is being committed, rather than + a sha1 of a commit or other object that points to the tree. + This makes committed tarballs more resilient against git filter-branch + when it's only used to change commit metadata. Closes: #467288 + + -- Joey Hess Sun, 11 Mar 2012 18:49:12 -0400 + +pristine-tar (1.21) unstable; urgency=low + + * Fix bug in mtime resetting code, which caused delta files to be created + that failed to regenerate the original tarball. Closes: #661902 + + -- Joey Hess Fri, 02 Mar 2012 14:57:11 -0400 + +pristine-tar (1.20) unstable; urgency=low + + * pristine-xz: Try --extreme too. While my corpus has no such xz files, + Darren Salt points out that automake 1.11.2 has started using + extreme mode. Closes: #658666 + + -- Joey Hess Sun, 12 Feb 2012 14:23:48 -0400 + +pristine-tar (1.19) unstable; urgency=low + + * Fix duplicate function oops. + + -- Joey Hess Wed, 01 Feb 2012 11:00:43 -0400 + +pristine-tar (1.18) unstable; urgency=low + + * pristine-xz: A simplistic xz recreator. + Current success rate on the Debian corpus: 70%. + Closes: #499489 + + -- Joey Hess Tue, 31 Jan 2012 16:13:35 -0400 + +pristine-tar (1.17) unstable; urgency=low + + * pristine-tar: Fail when the delta is excessively large, probably + due to the tarball being compressed with something that tar x can + auto-extract, but that pristine-tar does not support. + + -- Joey Hess Mon, 09 Jan 2012 11:44:44 -0400 + +pristine-tar (1.16) unstable; urgency=low + + * pristine-bz2: Can recreate bz2 files greated by Suse's + patched bzip2. Closes: #641019 + + -- Joey Hess Wed, 04 Jan 2012 21:44:23 -0400 + +pristine-tar (1.15) unstable; urgency=low + + * Fix Vcs-Browser url. Closes: #636926 + * Propigate nonzero exit status from doit_redir. Closes: #600724 + * 'pristine-tar list' displays a list of tarballs that are available for + checkout. + + -- Joey Hess Sat, 08 Oct 2011 17:53:03 -0400 + +pristine-tar (1.14) unstable; urgency=low + + * Add support for gzip --rsyncable output produced by gzip 1.4. + This differs from prior --rsyncable output, which was made by a Debian + specific patch and used a different rsync window size, and reset + huffman encodings less often. + * Clarify man page. + + -- Joey Hess Sat, 06 Aug 2011 21:05:15 -0400 + +pristine-tar (1.13) unstable; urgency=low + + [ Faidon Liambotis ] + * Add a Perl quirks mode for tarballs generated by Perl's IO::Zlib, which + is used among other things by Module::Build. Closes: #618284 + + -- Joey Hess Tue, 15 Mar 2011 00:11:24 -0400 + +pristine-tar (1.12) unstable; urgency=low + + * Fix gzg building with --as-needed. Closes: #604030 + + -- Joey Hess Sun, 06 Feb 2011 00:01:18 -0400 + +pristine-tar (1.11) unstable; urgency=low + + * Add workaround for Debian's tar changing its output for tarballs + containing filenames exactly 100 bytes long. Closes: #602907 + (Needs tar 1.25-2 with #603231 fixed in order for that to work.) + + -- Joey Hess Fri, 12 Nov 2010 10:25:46 -0400 + +pristine-tar (1.10) unstable; urgency=low + + * pristine-gz gengz: Bugfix: Always remove uncompressed input file. + * Large refactoring and modularization. (Thanks Gabriel de Perthuis + for inspiration for this.)) + * Remove environment variables used by tar, gz, and bzip2, to avoid + local environment settings possibly breaking things. + Closes: #498760 (probably; thanks Ralph Lange for analysis) + * Lintian fixes. + + -- Joey Hess Thu, 19 Aug 2010 16:36:25 -0400 + +pristine-tar (1.03) unstable; urgency=low + + * zgz now includes a trimmed down copy of the compressor from bzip2 0.9.5d. + That old version of bzip2 generated different compression results, + and is the overwhelming reason for failures to reproduce bz2 files + in the Debian archive. Closes: #560170, #579657, #525128, #576119 + (Special thanks to Laszlo Ersek for tracking this down.) + * Awesome new zgz man page! + * Faster generation of bzip2 deltas: Do not repeatedly decompress + input file. + * pristine-bz2 -t can be used to make it try harder to generate + a delta. Currently, this tries pbzip2 -b with various numbers. + Not the default because it's slow and I don't have any bzip2 files + from the wild that were built that way. + + -- Joey Hess Thu, 13 May 2010 02:19:32 -0400 + +pristine-tar (1.01) unstable; urgency=low + + * Use italics in man pages to highlight which parts of command + lines are file names. Closes: #561015 + * Added an example. Closes: #561016 + * Document TMPDIR in man pages. Closes: #558266 + + -- Joey Hess Fri, 05 Feb 2010 16:41:07 -0500 + +pristine-tar (1.00) unstable; urgency=low + + * pristine-gz: Fall back to storing a binary delta, in the rare + cases where the file cannot be 100% reproduced. A warning message + is printed if the binary delta is not relatively small. + * Allows generating quite small deltas for php-geoip, + xsupplicant, libgraphics-colornames-perl, + Ricoh-Aficio_2020D-Postscript.ppd.gz, and dozens on others + that zgz can closely replicate. Closes: #518972, #506874 + * For other things the deltas are not as small. + This seems an accepable tradeoff to be able to reliably use it on + anything. Closes: #475778, #509703, #509707, #515256, #515331 + * Increase gz delta version number to 3.0 if a binary delta file + is included. + * pristine-tar: Avoid dying in corner case involving tar's filename + encoding. Fixes several failures. + * pristine-tar: Avoid dying if tar tv exits nonzero. This makes + reproducing star's tarball work, even though tar gets upset + while listing it and decides to die at the end. + * pristine-gz: Avoid passing -F to zgz if --original-name + is passed. This bug prevented pristine-gz gengz from + recreating the gz in some cases. + * Add aliases ci and co for commit and checkout. Closes: #500388 + * pristine-gz: Fix generation of gz files that have a null filename field. + * pristine-tar: Fix behavior in the corner case where the tarball + puts all files in a subdir, but does not contain the subdir's directory + entry. + + Stats: Successfully generates deltas for all 14446 tarballs in the + Debian archive. Mean delta size: 19K Median: 2.6K Mode: 1.5K + + -- Joey Hess Tue, 14 Apr 2009 21:23:22 -0400 + +pristine-tar (0.22) unstable; urgency=low + + * Fix syntax mistake that could cause pristine-tar to in some cases + fail on tarballs containing files named "0". Closes: #523773 + + -- Joey Hess Sun, 12 Apr 2009 13:20:04 -0400 + +pristine-tar (0.21) unstable; urgency=low + + * Add support for GIT_DIR. Closes: #512619 + + -- Joey Hess Thu, 22 Jan 2009 15:07:33 -0500 + +pristine-tar (0.20) unstable; urgency=low + + * Avoid littering .orig.tar.gz.tmp files around by building intermediate + tarball in the temp dir. Closes: #508965 + + -- Joey Hess Mon, 29 Dec 2008 15:34:12 -0500 + +pristine-tar (0.19) unstable; urgency=low + + [ Josh Triplett ] + * Add a .gitignore file + * Various cleanups to zgz. + * Remove the unnecessary -l, -S, -t, and -v flags from zgz. + * Allow the empty string as an original filename in zgz, rather than + treating the empty string as a flag to not store an original filename. + * Fix zgz's usage message to stop identifying the program as gzip. + * Add a new -T,--timestamp flag to zgz, to explicitly set the timestamp + stored in the gzip file, rather than taking the timestamp of the input + file. Closes: #507110 + + [ Joey Hess ] + * zgz: Avoid using uninitialized data as timestamp in -c mode. + Closes: #507095 + * Document existing long options. Closes: #499488 + * Avoid exposing tar file names to the shell. Closes: #500499 + * Clarify bit in man page about what file formats are supported. + Closes: #507322 + + [ Josh Triplett ] + * zgz: Incorporate a cut-down version of GNU gzip to support the --gnu + option. The cut-down gzip includes support for explicitly setting the + timestamp and original filename. Closes: #506627 + * pristine-gz: Now that zgz supports explicitly setting the timestamp and + original filename with both --zlib and --gnu, use that support to do all + compression using -c with standard input and standard output. This + removes the need to create a temporary file with the original filename and + set its timestamp, as well as the need to continuously recreate the file + when compressed and removed. Closes: #506627 + * zgz: Remove support for files, making -c the default. This removes a lot + of complexity and code duplication. + + -- Joey Hess Fri, 12 Dec 2008 16:12:27 -0500 + +pristine-tar (0.18) unstable; urgency=low + + * pristine-gz: Avoid uncompressing the original file more than once. + Closes: #506490 + + -- Joey Hess Sat, 22 Nov 2008 18:22:40 -0500 + +pristine-tar (0.17) unstable; urgency=low + + * Correct -f order to come after --gnu. + + -- Joey Hess Fri, 04 Jul 2008 14:46:30 -0400 + +pristine-tar (0.16) unstable; urgency=low + + * pristine-gz: Always pass -f to zgz, to support cases where + the gzip header claims that the original input file had an extension + that suggests it was itself already compressed. (Although it can't really + be.) Not doing the same for gzip because -f doesn't cause it to ignore + extensions. See #475778 + + -- Joey Hess Fri, 04 Jul 2008 14:29:54 -0400 + +pristine-tar (0.15) unstable; urgency=low + + * Fix POD issues. Closes: #484165 + * zgz: Remove support for a GZIP env variable. We don't need this in + pristine-tar, and if could screw things up if a user set it. + Closes: #488986 + + -- Joey Hess Wed, 02 Jul 2008 12:46:41 -0400 + +pristine-tar (0.14) unstable; urgency=low + + * Use debhelper v7; rules file minimisation. + * Add build and install targets to Makefile. + * Put in a workaround for bug #479317, an incompatability in perl 5.10 + that causes nonzero exit if a program exits while chdired into a + subdirectory of a File::Temp temp directory. + + -- Joey Hess Sun, 04 May 2008 14:47:06 -0400 + +pristine-tar (0.13) unstable; urgency=low + + * Man page typo fix. Closes: #475698 + + -- Joey Hess Sat, 19 Apr 2008 16:51:09 -0400 + +pristine-tar (0.12) unstable; urgency=low + + * pristine-tar: Fix some bugs when run on an uncompressed tarball. + + -- Joey Hess Wed, 02 Apr 2008 13:14:45 -0400 + +pristine-tar (0.11) unstable; urgency=low + + * Unset $keep, don't keep temp dirs. Oops. + + -- Joey Hess Fri, 21 Mar 2008 14:50:21 -0400 + +pristine-tar (0.10) unstable; urgency=low + + * pristine-tar: Add -m option to specify a commit message. Closes: #465231 + + -- Joey Hess Mon, 10 Mar 2008 15:56:05 -0400 + +pristine-tar (0.9) unstable; urgency=low + + * Add smart branching for commits. (Cyril Brulebois) + * Prefer to commit to the ref that exactly matches what is specified at the + command line. + * Send all debug and verbose prints to stderr, reserving stdout for + outputting deltas etc. + * pristine-tar commit was not generating deltas against the specified + upstream branch, but against the unpacked contents of the source tarball. + Fix it to use the upstream branch, ensuring that no matter what branch is + given, the delta that is created can always recreate the source tarball. + + -- Joey Hess Wed, 06 Feb 2008 14:31:29 -0500 + +pristine-tar (0.8) unstable; urgency=low + + * If a tarball contains files all in one subdirectory, and the source + tree it's being generated from already has that subdirectory, don't + try to create the subdir, which would fail. OTOH, if the subdirectory + isn't there, proceed with creating it as before. + * Add support for bz2 files (Cyril Brulebois) + * Recommends bzip2 and pbzip2, which both might be needed when dealing + with bz2 files due to differences in their output. + + -- Joey Hess Sat, 02 Feb 2008 17:28:17 -0500 + +pristine-tar (0.7) unstable; urgency=low + + * Improve/fix handling when there is no local pristine-tar branch. + * Add a newline to the .id files committed to git. + + -- Joey Hess Fri, 01 Feb 2008 22:13:06 -0500 + +pristine-tar (0.6) unstable; urgency=low + + * Improve search for upstream branch. Default to looking for first + refs/heads/upstream, and failing that, anything with "upstream" in its + name. This way if there is a local upstream branch and a remote one, it + will use the local one. If there's only a remote one, it'll use that. + Closes: #463566 + * Handle the case where there is no local pristine-tar branch. If there + is exactly one remote branch, use it for checkout. If more than one or + none, show a nice error message. (Cyril Brulebois) + + -- Joey Hess Fri, 01 Feb 2008 21:35:37 -0500 + +pristine-tar (0.5) unstable; urgency=low + + * Moved to git, and added a Vcs-Browser field. + * pristine-tar can now "commit" deltas for a tarball directly to a git + branch, and then "checkout" the original tarball again. Git users + are recommended to use these new subcommands because they're easier, + store the deltas in a consistent location, and avoid problems with + recreating things like empty directories that git doesn't track. + Closes: #463352 + * Thanks to madcoder for help with manipulating git branches without + touching the working copy. + * Fix paranoia check in pristine-gz to correctly handle parameters + that take an option. + + -- Joey Hess Thu, 31 Jan 2008 21:52:45 -0500 + +pristine-tar (0.4) unstable; urgency=low + + * Allow the delta file to be read or written from stdio. + * Fix man page synopses formatting. + + -- Joey Hess Sat, 24 Nov 2007 16:45:04 -0500 + +pristine-tar (0.3) unstable; urgency=low + + * pristine-tar sometimes got confused about tarballs that did not unpack + all files into a subdir. Fixes sear-media and 30 others. + * tar was confused by files starting with ./ or / in the manifest, so + strip it out. Fixes erlang-doc-html and 19 others, including doc-fdc, + which is the only package to have files in / in its tarball. + * Some tarballs put files in a subdir, but don't show a "/" after the subdir + name in a manifest. Stop relying on that. (example: streamtuner) + + Current stats: Out of the whole debian archive, pristine-gz fails + on 132 files, and pristine-tar fails on 0. Success with 98.97% of + files. + + -- Joey Hess Wed, 03 Oct 2007 00:07:54 -0400 + +pristine-tar (0.2) unstable; urgency=low + + * Added pristine-gz, thanks to paravoid for making this possible! + * Added zgz, which is derived from a BSD gzip program that uses libz. + Modified by paravoid to support many options needed to reproduce gz + files. + * Renamed the subcommands extract => gentar , stash => gendelta + * Quite a lot of testing (ie, on the entire Debian archive) and bugfixes. + * Put files in the generated tarball mode 0644 pre-delta, this is a much + more likely file mode than the previous 0000 and thus makes the delta + smaller. + * File format version is up to 2.0 due to misc incompatible changes. + * Fix a bug related to tar's handling of unicode filenames. + * Work around a strange tar behavior: When run with --mode 644, tar + preserves the sgid bit on subdirectories. + + Current stats: Out of the whole debian archive, pristine-gz fails + on 131 files, and pristine-tar fails on 45. Success with 98.63% of + files. + + -- Joey Hess Tue, 02 Oct 2007 21:51:03 -0400 + +pristine-tar (0.1) unstable; urgency=low + + * First release. + + -- Joey Hess Sun, 30 Sep 2007 20:06:00 -0400 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..7f8f011 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +7 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..462b4a8 --- /dev/null +++ b/debian/control @@ -0,0 +1,27 @@ +Source: pristine-tar +Section: utils +Priority: optional +Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.9.0), zlib1g-dev, perl +Maintainer: Joey Hess +Standards-Version: 3.9.3 +Vcs-Git: git://git.kitenet.net/pristine-tar/ +Vcs-Browser: http://git.kitenet.net/?p=pristine-tar.git +Homepage: http://kitenet.net/~joey/code/pristine-tar/ + +Package: pristine-tar +Architecture: any +Section: utils +Depends: xdelta, ${shlibs:Depends}, ${misc:Depends}, perl-modules +Recommends: pbzip2, bzip2, xz-utils +Description: regenerate pristine tarballs + pristine-tar can regenerate a pristine upstream tarball using only a small + binary delta file and a revision control checkout of the upstream branch. + . + The package also includes a pristine-gz command, which can regenerate a + pristine .gz file, a pristine-bz2 for .bz2 files, and a pristine-xz for .xz + files. + . + The delta file is designed to be checked into revision control along-side + the upstream branch, thus allowing Debian packages to be built entirely + using sources in revision control, without the need to keep copies of + upstream tarballs. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..a73c51f --- /dev/null +++ b/debian/copyright @@ -0,0 +1,84 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Source: zgz is based on NetBSD's gzip + +Files: * +Copyright: © 2007-2010 Joey Hess + © 2007 Faidon Liambotis + © 2008 Cyril Brulebois + © 2008 Josh Triplett +License: GPL-2+ + +Files: zgz/zgz.c +Copyright: © 1997, 1998, 2003, 2004, 2006 Matthew R. Green + © 2007 Faidon Liambotis + © 2008 Josh Triplett + © 2010 Joey Hess +License: BSD-3 + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + +Files: zgz/gzip/* +Copyright: © 1997, 1998, 1999, 2001, 2002, 2006, 2007 Free Software Foundation, Inc. + © 1992-1993 Jean-loup Gailly + © 2008 Josh Triplett +License: GPL-2+ + +Files: zgz/old-bzip2/* zgz/pit/suse-bzip2/* +Copyright: © 1996-2007 Julian R Seward +License: other + Copyright (C) 1996-2007 Julian R Seward. All rights reserved. + . + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + . + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + . + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + . + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + . + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License: GPL-2+ + On Debian systems, the complete text of the GPL can be found in + /usr/share/common-licenses/GPL. diff --git a/debian/docs b/debian/docs new file mode 100644 index 0000000..0ccaa04 --- /dev/null +++ b/debian/docs @@ -0,0 +1 @@ +TODO delta-format.txt README diff --git a/debian/lintian-overrides b/debian/lintian-overrides new file mode 100644 index 0000000..1612c1c --- /dev/null +++ b/debian/lintian-overrides @@ -0,0 +1,5 @@ +# pristine-tar contains several embedded and modified compressors, +# which are only used to recreate original tarballs. +embedded-library usr/lib/pristine-tar/suse-bzip2/libbz2.so: bzip2 +embedded-library usr/lib/pristine-tar/suse-bzip2/libbz2.so.1.0: bzip2 +embedded-library usr/lib/pristine-tar/suse-bzip2/libbz2.so.1.0.4: bzip2 diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..4f796a3 --- /dev/null +++ b/debian/rules @@ -0,0 +1,15 @@ +#!/usr/bin/make -f +%: + dh $@ + +override_dh_auto_configure: + # keeps it out of /usr/local + dh_auto_configure -- PREFIX=/usr + +override_dh_auto_clean: + # distclean moans about MANIFEST, this is quieter + if [ -e Makefile ]; then $(MAKE) realclean; fi + +override_dh_makeshlibs: + # only contains private libraries + diff --git a/delta-format.txt b/delta-format.txt new file mode 100644 index 0000000..644a7c0 --- /dev/null +++ b/delta-format.txt @@ -0,0 +1,54 @@ +The delta file is a compressed tarball, containing the following files: + +version + Currently "2.0" or "3.0". +type + Type of file this is a delta for ("tar", "gz", or "bz2"). + + +For tar files, it contains: + +manifest + List of all files in the tarball, as output by `tar t`. + Used to order files correctly when rebuilding it. +delta + xdelta between the generated tarball and the original tarball. +wrapper + Encapsulated delta file for the .gz or .bz2 wrapper for the + tarball. Optional, if not present a pristine .gz won't be generated. + + +For gz files, wrapper contains: + +params + Parameters to pass to zgz. + ("-n 9", "-M", "--rsyncable") +timestamp + Timestamp of the original input file, in seconds from epoch. +filename + Filename of the original input file. +delta + xdelta between the generated gz file and the original gz file. + (Optional; needs version "3.0".) + + +For bzip2 files the wrapper contains: + +params + Typically, only the compression level is needed (4th byte of the + compressed file), and its matching parameter stored: -N. + In some cases a -bN parameter is detected and stored. +program + Program used to compress. Almost everytime, it is bzip2 (or another + implementation producing bit-identical results). pbzip2 might also be + detected, but several parameters might interfere (-r, -pN). + + It may also be zgz (the params will include --old-bzip2 in this + case). + +For xz files, the wrapper contains: + +params + Typically, only the compression level is needed. +program + Program used to compress. Almost everytime, it is xz. diff --git a/pit/suse-bzip2/LICENSE b/pit/suse-bzip2/LICENSE new file mode 100644 index 0000000..cc61417 --- /dev/null +++ b/pit/suse-bzip2/LICENSE @@ -0,0 +1,42 @@ + +-------------------------------------------------------------------------- + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2010 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, jseward@bzip.org +bzip2/libbzip2 version 1.0.6 of 6 September 2010 + +-------------------------------------------------------------------------- diff --git a/pit/suse-bzip2/Makefile b/pit/suse-bzip2/Makefile new file mode 100644 index 0000000..3e41319 --- /dev/null +++ b/pit/suse-bzip2/Makefile @@ -0,0 +1,252 @@ +# ------------------------------------------------------------------ +# This file is part of bzip2/libbzip2, a program and library for +# lossless, block-sorting data compression. +# +# bzip2/libbzip2 version 1.0.6 of 6 September 2010 +# Copyright (C) 1996-2010 Julian Seward +# +# Please read the WARNING, DISCLAIMER and PATENTS sections in the +# README file. +# +# This program is released under the terms of the license contained +# in the file LICENSE. +# ------------------------------------------------------------------ + +somajor=1.0 +sominor=$(somajor).4 +SHELL=/bin/sh + +# Determine the operating system name to decide whether to build a +# shared object library libbz2.so or a dynamic library libbz2.dylib. +UNAME_S := $(shell uname -s) + +# To assist in cross-compiling +CC=gcc +AR=ar +RANLIB=ranlib +LDFLAGS= + +BIGFILES=-D_FILE_OFFSET_BITS=64 +CFLAGS=-Wall -Winline -O2 -g $(BIGFILES) $(DEBCFLAGS) + +# Where you want it installed when you do 'make install' +PREFIX=/usr/local + + +OBJS= blocksort.o \ + huffman.o \ + crctable.o \ + randtable.o \ + compress.o \ + decompress.o \ + bzlib.o + +all: libbz2.a bzip2 + +ifeq ("$(UNAME_S)","Darwin") +soext=dylib +soext_major=$(somajor).$(soext) +soext_minor=$(sominor).$(soext) +LDDLFLAGS=-dynamiclib \ + -Wl,-headerpad_max_install_names,-undefined,dynamic_lookup,-compatibility_version,$(somajor),-current_version,$(sominor),-install_name,$(PREFIX)/lib/zgz/suse-bzip2/libbz2.$(soext_minor) +else +soext=so +soext_major=$(soext).$(somajor) +soext_minor=$(soext).$(sominor) +LDDLFLAGS=-shared -Wl,-soname,libbz2.$(soext_major) +endif + +bzip2: libbz2.$(soext) bzip2.o + $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2 + +bzip2recover: bzip2recover.o + $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o + +libbz2.a: $(OBJS) + rm -f libbz2.a + $(AR) cq libbz2.a $(OBJS) + @if ( test -f $(RANLIB) || test -f /usr/bin/ranlib || \ + test -f /bin/ranlib || test -f /usr/ccs/bin/ranlib ) ; then \ + echo $(RANLIB) libbz2.a ; \ + $(RANLIB) libbz2.a ; \ + fi + +libbz2.$(soext): libbz2.$(soext_major) + ln -sf $< $@ + +libbz2.$(soext_major): libbz2.$(soext_minor) + ln -sf $< $@ + +libbz2.$(soext_minor): $(OBJS:%.o=%.sho) + $(CC) $(LDDLFLAGS) -o $@ $^ -lc + +%.sho: %.c + $(CC) $(CFLAGS) -D_REENTRANT -fPIC -o $@ -c $< + +%.o: %.c + $(CC) $(CFLAGS) -D_REENTRANT -o $@ -c $< + +check: test + +ifeq ("${UNAME_S}", "Darwin") +# On Mac OS X, $DYLD_LIBRARY_PATH needs to be set to test libbz2.dylib. +LD_PATH_VAR=DYLD_LIBRARY_PATH +else +# On all other (Unix-like) systems, set $LD_LIBRARY_PATH to test libbz2.so. +LD_PATH_VAR=LD_LIBRARY_PATH +endif + +test: bzip2 + @cat words1 + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -1 < sample1.ref > sample1.rb2 + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -2 < sample2.ref > sample2.rb2 + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -3 < sample3.ref > sample3.rb2 + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -d < sample1.bz2 > sample1.tst + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -d < sample2.bz2 > sample2.tst + $(LD_PATH_VAR)=.:$$$(LD_PATH_VAR) \ + ./bzip2 -ds < sample3.bz2 > sample3.tst + cmp sample1.bz2 sample1.rb2 + cmp sample2.bz2 sample2.rb2 + cmp sample3.bz2 sample3.rb2 + cmp sample1.tst sample1.ref + cmp sample2.tst sample2.ref + cmp sample3.tst sample3.ref + @cat words3 + +install: bzip2 bzip2recover libbz2.a + if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi + if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi + if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi + if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi + if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi + cp -f bzip2 $(PREFIX)/bin/bzip2 + ln $(PREFIX)/bin/bzip2 $(PREFIX)/bin/bunzip2 + ln $(PREFIX)/bin/bzip2 $(PREFIX)/bin/bzcat + cp -f bzip2recover $(PREFIX)/bin/bzip2recover + chmod a+x $(PREFIX)/bin/bzip2 + chmod a+x $(PREFIX)/bin/bunzip2 + chmod a+x $(PREFIX)/bin/bzcat + chmod a+x $(PREFIX)/bin/bzip2recover + cp -f bzip2.1 $(PREFIX)/man/man1 + chmod a+r $(PREFIX)/man/man1/bzip2.1 + cp -f bzlib.h $(PREFIX)/include + chmod a+r $(PREFIX)/include/bzlib.h + cp -fa libbz2.a libbz2.so* $(PREFIX)/lib + chmod a+r $(PREFIX)/lib/libbz2.a + cp -f bzexe $(PREFIX)/bin/bzexe + chmod a+x $(PREFIX)/bin/bzexe + cp -f bzgrep $(PREFIX)/bin/bzgrep + ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep + ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep + chmod a+x $(PREFIX)/bin/bzgrep + cp -f bzmore $(PREFIX)/bin/bzmore + ln -s -f $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless + chmod a+x $(PREFIX)/bin/bzmore + cp -f bzdiff $(PREFIX)/bin/bzdiff + ln -s -f $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp + chmod a+x $(PREFIX)/bin/bzdiff + cp -f bzexe.1 bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1 + chmod a+r $(PREFIX)/man/man1/bzexe.1 + chmod a+r $(PREFIX)/man/man1/bzgrep.1 + chmod a+r $(PREFIX)/man/man1/bzmore.1 + chmod a+r $(PREFIX)/man/man1/bzdiff.1 + echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1 + echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1 + echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1 + echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1 + +clean: + rm -f *.o *.sho libbz2.a libbz2.so* libbz2.*.dylib \ + bzip2 bzip2recover \ + sample1.rb2 sample2.rb2 sample3.rb2 \ + sample1.tst sample2.tst sample3.tst + + +distclean: clean + #rm -f manual.ps manual.html manual.pdf + +DISTNAME=bzip2-1.0.6 +dist: check manual + rm -f $(DISTNAME) + ln -s -f . $(DISTNAME) + tar cvf $(DISTNAME).tar \ + $(DISTNAME)/blocksort.c \ + $(DISTNAME)/huffman.c \ + $(DISTNAME)/crctable.c \ + $(DISTNAME)/randtable.c \ + $(DISTNAME)/compress.c \ + $(DISTNAME)/decompress.c \ + $(DISTNAME)/bzlib.c \ + $(DISTNAME)/bzip2.c \ + $(DISTNAME)/bzip2recover.c \ + $(DISTNAME)/bzlib.h \ + $(DISTNAME)/bzlib_private.h \ + $(DISTNAME)/Makefile \ + $(DISTNAME)/LICENSE \ + $(DISTNAME)/bzip2.1 \ + $(DISTNAME)/bzip2.1.preformatted \ + $(DISTNAME)/bzip2.txt \ + $(DISTNAME)/words0 \ + $(DISTNAME)/words1 \ + $(DISTNAME)/words2 \ + $(DISTNAME)/words3 \ + $(DISTNAME)/sample1.ref \ + $(DISTNAME)/sample2.ref \ + $(DISTNAME)/sample3.ref \ + $(DISTNAME)/sample1.bz2 \ + $(DISTNAME)/sample2.bz2 \ + $(DISTNAME)/sample3.bz2 \ + $(DISTNAME)/dlltest.c \ + $(DISTNAME)/manual.html \ + $(DISTNAME)/manual.pdf \ + $(DISTNAME)/manual.ps \ + $(DISTNAME)/README \ + $(DISTNAME)/README.COMPILATION.PROBLEMS \ + $(DISTNAME)/README.XML.STUFF \ + $(DISTNAME)/CHANGES \ + $(DISTNAME)/libbz2.def \ + $(DISTNAME)/libbz2.dsp \ + $(DISTNAME)/dlltest.dsp \ + $(DISTNAME)/makefile.msc \ + $(DISTNAME)/unzcrash.c \ + $(DISTNAME)/spewG.c \ + $(DISTNAME)/mk251.c \ + $(DISTNAME)/bzdiff \ + $(DISTNAME)/bzdiff.1 \ + $(DISTNAME)/bzmore \ + $(DISTNAME)/bzmore.1 \ + $(DISTNAME)/bzexe \ + $(DISTNAME)/bzexe.1 \ + $(DISTNAME)/bzgrep \ + $(DISTNAME)/bzgrep.1 \ + $(DISTNAME)/Makefile-libbz2_so \ + $(DISTNAME)/bz-common.xsl \ + $(DISTNAME)/bz-fo.xsl \ + $(DISTNAME)/bz-html.xsl \ + $(DISTNAME)/bzip.css \ + $(DISTNAME)/entities.xml \ + $(DISTNAME)/manual.xml \ + $(DISTNAME)/format.pl \ + $(DISTNAME)/xmlproc.sh + gzip -v $(DISTNAME).tar + +# For rebuilding the manual from sources on my SuSE 9.1 box + +MANUAL_SRCS= bz-common.xsl bz-fo.xsl bz-html.xsl bzip.css \ + entities.xml manual.xml + +manual: manual.html manual.ps manual.pdf + +manual.ps: $(MANUAL_SRCS) + ./xmlproc.sh -ps manual.xml + +manual.pdf: $(MANUAL_SRCS) + ./xmlproc.sh -pdf manual.xml + +manual.html: $(MANUAL_SRCS) + ./xmlproc.sh -html manual.xml diff --git a/pit/suse-bzip2/blocksort.c b/pit/suse-bzip2/blocksort.c new file mode 100644 index 0000000..d0d662c --- /dev/null +++ b/pit/suse-bzip2/blocksort.c @@ -0,0 +1,1094 @@ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +__inline__ +void fallbackSimpleSort ( UInt32* fmap, + UInt32* eclass, + Int32 lo, + Int32 hi ) +{ + Int32 i, j, tmp; + UInt32 ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for ( i = hi-4; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for ( i = hi-1; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define fvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + fswap(fmap[yyp1], fmap[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + + +#define fmin(a,b) ((a) < (b)) ? (a) : (b) + +#define fpush(lz,hz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; } + +#define fpop(lz,hz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; } + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + + +static +void fallbackQSort3 ( UInt32* fmap, + UInt32* eclass, + Int32 loSt, + Int32 hiSt ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m; + Int32 sp, lo, hi; + UInt32 med, r, r3; + Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; + Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush ( loSt, hiSt ); + + while (sp > 0) { + + AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); + + fpop ( lo, hi ); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort ( fmap, eclass, lo, hi ); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) med = eclass[fmap[lo]]; else + if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unLo]] - (Int32)med; + if (n == 0) { + fswap(fmap[unLo], fmap[ltLo]); + ltLo++; unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unHi]] - (Int32)med; + if (n == 0) { + fswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); + + if (gtHi < ltLo) continue; + + n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); + m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush ( lo, n ); + fpush ( m, hi ); + } else { + fpush ( m, hi ); + fpush ( lo, n ); + } + } +} + +#undef fmin +#undef fpush +#undef fpop +#undef fswap +#undef fvswap +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + eclass exists for [0 .. nblock-1] + ((UChar*)eclass) [0 .. nblock-1] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UChar*)eclass) [0 .. nblock-1] holds block + All other areas of eclass destroyed + fmap [0 .. nblock-1] holds sorted order + bhtab [ 0 .. 2+(nblock/32) ] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort ( UInt32* fmap, + UInt32* eclass, + UInt32* bhtab, + Int32 nblock, + Int32 verb ) +{ + Int32 ftab[257]; + Int32 ftabCopy[256]; + Int32 H, i, j, k, l, r, cc, cc1; + Int32 nNotDone; + Int32 nBhtab; + UChar* eclass8 = (UChar*)eclass; + + /*-- + Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + if (verb >= 4) + VPrintf0 ( " bucket sorting ...\n" ); + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + (nblock / 32); + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /*-- + Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + + if (verb >= 4) + VPrintf1 ( " depth %6d has ", H ); + + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) j = i; + k = fmap[i] - H; if (k < 0) k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3 ( fmap, eclass, l, r ); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { SET_BH(i); cc = cc1; }; + } + } + } + + if (verb >= 4) + VPrintf1 ( "%6d unresolved strings\n", nNotDone ); + + H *= 2; + if (H > nblock || nNotDone == 0) break; + } + + /*-- + Reconstruct the original block in + eclass8 [0 .. nblock-1], since the + previous phase destroyed it. + --*/ + if (verb >= 4) + VPrintf0 ( " reconstructing block ...\n" ); + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (UChar)j; + } + AssertH ( j < 256, 1005 ); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +__inline__ +Bool mainGtU ( UInt32 i1, + UInt32 i2, + UChar* block, + UInt16* quadrant, + UInt32 nblock, + Int32* budget ) +{ + Int32 k; + UChar c1, c2; + UInt16 s1, s2; + + AssertD ( i1 != i2, "mainGtU" ); + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 9 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 10 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 11 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 12 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + + k = nblock + 8; + + do { + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + k -= 8; + (*budget)--; + } + while (k >= 0); + + return False; +} + + +/*---------------------------------------------*/ +/*-- + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. +--*/ +static +Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 }; + +static +void mainSimpleSort ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + Int32 nblock, + Int32 lo, + Int32 hi, + Int32 d, + Int32* budget ) +{ + Int32 i, j, h, bigN, hp; + UInt32 v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (True) { + + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/*-- + The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + +#define mswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define mvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + mswap(ptr[yyp1], ptr[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + +static +__inline__ +UChar mmed3 ( UChar a, UChar b, UChar c ) +{ + UChar t; + if (a > b) { t = a; a = b; b = t; }; + if (b > c) { + b = c; + if (a > b) b = a; + } + return b; +} + +#define mmin(a,b) ((a) < (b)) ? (a) : (b) + +#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; } + +#define mpop(lz,hz,dz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; } + + +#define mnextsize(az) (nextHi[az]-nextLo[az]) + +#define mnextswap(az,bz) \ + { Int32 tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } + + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static +void mainQSort3 ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + Int32 nblock, + Int32 loSt, + Int32 hiSt, + Int32 dSt, + Int32* budget ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m, med; + Int32 sp, lo, hi, d; + + Int32 stackLo[MAIN_QSORT_STACK_SIZE]; + Int32 stackHi[MAIN_QSORT_STACK_SIZE]; + Int32 stackD [MAIN_QSORT_STACK_SIZE]; + + Int32 nextLo[3]; + Int32 nextHi[3]; + Int32 nextD [3]; + + sp = 0; + mpush ( loSt, hiSt, dSt ); + + while (sp > 0) { + + AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); + + mpop ( lo, hi, d ); + if (hi - lo < MAIN_QSORT_SMALL_THRESH || + d > MAIN_QSORT_DEPTH_THRESH) { + mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); + if (*budget < 0) return; + continue; + } + + med = (Int32) + mmed3 ( block[ptr[ lo ]+d], + block[ptr[ hi ]+d], + block[ptr[ (lo+hi)>>1 ]+d] ); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (True) { + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; unLo++; continue; + }; + if (n > 0) break; + unLo++; + } + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; unHi--; continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "mainQSort3(2)" ); + + if (gtHi < ltLo) { + mpush(lo, hi, d+1 ); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); + + mpush (nextLo[0], nextHi[0], nextD[0]); + mpush (nextLo[1], nextHi[1], nextD[1]); + mpush (nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mswap +#undef mvswap +#undef mpush +#undef mpop +#undef mmin +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > N_OVERSHOOT + block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UChar*)block32) [0 .. nblock-1] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UChar*)block32) [0 .. nblock-1] holds block + All other areas of block32 destroyed + ftab [0 .. 65536 ] destroyed + ptr [0 .. nblock-1] holds sorted order + if (*budget < 0), sorting was abandoned +*/ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static +void mainSort ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + UInt32* ftab, + Int32 nblock, + Int32 verb, + Int32* budget ) +{ + Int32 i, j, k, ss, sb; + Int32 runningOrder[256]; + Bool bigDone[256]; + Int32 copyStart[256]; + Int32 copyEnd [256]; + UChar c1; + Int32 numQSorted; + UInt16 s; + if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); + + /*-- set up the 2-byte frequency table --*/ + for (i = 65536; i >= 0; i--) ftab[i] = 0; + + j = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); + ftab[j]++; + } + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); + + /*-- Complete the initial radix sort --*/ + for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; + + s = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-3; + } + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + } + + /*-- + Now ftab contains the first loc of every small bucket. + Calculate the running order, from smallest to largest + big bucket. + --*/ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + Int32 vv; + Int32 h = 1; + do h = 3 * h + 1; while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /*-- + The main sorting loop. + --*/ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /*-- + Process big buckets, starting with the least full. + Basically this is a 3-step process in which we call + mainQSort3 to sort the small buckets [ss, j], but + also make a big effort to avoid the calls if we can. + --*/ + ss = runningOrder[i]; + + /*-- + Step 1: + Complete the big bucket [ss] by quicksorting + any unsorted small buckets [ss, j], for j != ss. + Hopefully previous pointer-scanning phases have already + completed many of the small buckets [ss, j], so + we don't have to sort them at all. + --*/ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if ( ! (ftab[sb] & SETMASK) ) { + Int32 lo = ftab[sb] & CLEARMASK; + Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + if (verb >= 4) + VPrintf4 ( " qsort [0x%x, 0x%x] " + "done %d this %d\n", + ss, j, numQSorted, hi - lo + 1 ); + mainQSort3 ( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + numQSorted += (hi - lo + 1); + if (*budget < 0) return; + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH ( !bigDone[ss], 1006 ); + + /*-- + Step 2: + Now scan this big bucket [ss] so as to synthesise the + sorted order for small buckets [t, ss] for all t, + including, magically, the bucket [ss,ss] too. + This will avoid doing Real Work in subsequent Step 1's. + --*/ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[ copyStart[c1]++ ] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[ copyEnd[c1]-- ] = k; + } + } + + AssertH ( (copyStart[ss]-1 == copyEnd[ss]) + || + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + Necessity for this case is demonstrated by compressing + a sequence of approximately 48.5 million of character + 251; 1.0.0/1.0.1 will then die here. */ + (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), + 1007 ) + + for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; + + /*-- + Step 3: + The [ss] big bucket is now done. Record this fact, + and update the quadrant descriptors. Remember to + update quadrants in the overshoot area too, if + necessary. The "if (i < 255)" test merely skips + this updating for the last bucket processed, since + updating for the last bucket is pointless. + + The quadrant array provides a way to incrementally + cache sort orderings, as they appear, so as to + make subsequent comparisons in fullGtU() complete + faster. For repetitive blocks this makes a big + difference (but not big enough to be able to avoid + the fallback sorting mechanism, exponential radix sort). + + The precise meaning is: at all times: + + for 0 <= i < nblock and 0 <= j <= nblock + + if block[i] != block[j], + + then the relative values of quadrant[i] and + quadrant[j] are meaningless. + + else { + if quadrant[i] < quadrant[j] + then the string starting at i lexicographically + precedes the string starting at j + + else if quadrant[i] > quadrant[j] + then the string starting at j lexicographically + precedes the string starting at i + + else + the relative ordering of the strings starting + at i and j has not yet been determined. + } + --*/ + bigDone[ss] = True; + + if (i < 255) { + Int32 bbStart = ftab[ss << 8] & CLEARMASK; + Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + Int32 shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + Int32 a2update = ptr[bbStart + j]; + UInt16 qVal = (UInt16)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); + } + + } + + if (verb >= 4) + VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", + nblock, numQSorted, nblock - numQSorted ); +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UChar*)arr2) [0 .. nblock-1] holds block + arr1 exists for [0 .. nblock-1] + + Post: + ((UChar*)arr2) [0 .. nblock-1] holds block + All other areas of block destroyed + ftab [ 0 .. 65536 ] destroyed + arr1 [0 .. nblock-1] holds sorted order +*/ +void BZ2_blockSort ( EState* s ) +{ + UInt32* ptr = s->ptr; + UChar* block = s->block; + UInt32* ftab = s->ftab; + Int32 nblock = s->nblock; + Int32 verb = s->verbosity; + Int32 wfact = s->workFactor; + UInt16* quadrant; + Int32 budget; + Int32 budgetInit; + Int32 i; + + if (nblock < 10000) { + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } else { + /* Calculate the location for quadrant, remembering to get + the alignment right. Assumes that &(block[0]) is at least + 2-byte aligned -- this should be ok since block is really + the first section of arr2. + */ + i = nblock+BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (UInt16*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + transition point at very roughly the same place as + with v0.1 and v0.9.0. + Not that it particularly matters any more, since the + resulting compressed stream is now the same regardless + of whether or not we use the main sort or fallback sort. + */ + if (wfact < 1 ) wfact = 1; + if (wfact > 100) wfact = 100; + budgetInit = nblock * ((wfact-1) / 3); + budget = budgetInit; + + mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); + if (verb >= 3) + VPrintf3 ( " %d work, %d block, ratio %5.2f\n", + budgetInit - budget, + nblock, + (float)(budgetInit - budget) / + (float)(nblock==0 ? 1 : nblock) ); + if (budget < 0) { + if (verb >= 2) + VPrintf0 ( " too repetitive; using fallback" + " sorting algorithm\n" ); + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) + { s->origPtr = i; break; }; + + AssertH( s->origPtr != -1, 1003 ); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/bzip2.c b/pit/suse-bzip2/bzip2.c new file mode 100644 index 0000000..608df64 --- /dev/null +++ b/pit/suse-bzip2/bzip2.c @@ -0,0 +1,2038 @@ + +/*-----------------------------------------------------------*/ +/*--- A block-sorting, lossless compressor bzip2.c ---*/ +/*-----------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +/* Place a 1 beside your platform, and 0 elsewhere. + Generic 32-bit Unix. + Also works on 64-bit Unix boxes. + This is the default. +*/ +#define BZ_UNIX 1 + +/*-- + Win32, as seen by Jacob Navia's excellent + port of (Chris Fraser & David Hanson)'s excellent + lcc compiler. Or with MS Visual C. + This is selected automatically if compiled by a compiler which + defines _WIN32, not including the Cygwin GCC. +--*/ +#define BZ_LCCWIN32 0 + +#if defined(_WIN32) && !defined(__CYGWIN__) +#undef BZ_LCCWIN32 +#define BZ_LCCWIN32 1 +#undef BZ_UNIX +#define BZ_UNIX 0 +#endif + + +/*---------------------------------------------*/ +/*-- + Some stuff for all platforms. +--*/ + +#include +#include +#include +#include +#include +#include +#include +#include "bzlib.h" + +#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } +#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } +#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); } + + +/*---------------------------------------------*/ +/*-- + Platform-specific stuff. +--*/ + +#if BZ_UNIX +# include +# include +# include +# include +# include +# include + +# define PATH_SEP '/' +# define MY_LSTAT lstat +# define MY_STAT stat +# define MY_S_ISREG S_ISREG +# define MY_S_ISDIR S_ISDIR + +# define APPEND_FILESPEC(root, name) \ + root=snocString((root), (name)) + +# define APPEND_FLAG(root, name) \ + root=snocString((root), (name)) + +# define SET_BINARY_MODE(fd) /**/ + +# ifdef __GNUC__ +# define NORETURN __attribute__ ((noreturn)) +# else +# define NORETURN /**/ +# endif + +# ifdef __DJGPP__ +# include +# include +# undef MY_LSTAT +# undef MY_STAT +# define MY_LSTAT stat +# define MY_STAT stat +# undef SET_BINARY_MODE +# define SET_BINARY_MODE(fd) \ + do { \ + int retVal = setmode ( fileno ( fd ), \ + O_BINARY ); \ + ERROR_IF_MINUS_ONE ( retVal ); \ + } while ( 0 ) +# endif + +# ifdef __CYGWIN__ +# include +# include +# undef SET_BINARY_MODE +# define SET_BINARY_MODE(fd) \ + do { \ + int retVal = setmode ( fileno ( fd ), \ + O_BINARY ); \ + ERROR_IF_MINUS_ONE ( retVal ); \ + } while ( 0 ) +# endif +#endif /* BZ_UNIX */ + + + +#if BZ_LCCWIN32 +# include +# include +# include + +# define NORETURN /**/ +# define PATH_SEP '\\' +# define MY_LSTAT _stat +# define MY_STAT _stat +# define MY_S_ISREG(x) ((x) & _S_IFREG) +# define MY_S_ISDIR(x) ((x) & _S_IFDIR) + +# define APPEND_FLAG(root, name) \ + root=snocString((root), (name)) + +# define APPEND_FILESPEC(root, name) \ + root = snocString ((root), (name)) + +# define SET_BINARY_MODE(fd) \ + do { \ + int retVal = setmode ( fileno ( fd ), \ + O_BINARY ); \ + ERROR_IF_MINUS_ONE ( retVal ); \ + } while ( 0 ) + +#endif /* BZ_LCCWIN32 */ + + +/*---------------------------------------------*/ +/*-- + Some more stuff for all platforms :-) +--*/ + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +/*-- + IntNative is your platform's `native' int size. + Only here to avoid probs with 64-bit platforms. +--*/ +typedef int IntNative; + + +/*---------------------------------------------------*/ +/*--- Misc (file handling) data decls ---*/ +/*---------------------------------------------------*/ + +Int32 verbosity; +Bool keepInputFiles, smallMode, deleteOutputOnInterrupt; +Bool forceOverwrite, testFailsExist, unzFailsExist, noisy; +Int32 numFileNames, numFilesProcessed, blockSize100k; +Int32 exitValue; + +/*-- source modes; F==file, I==stdin, O==stdout --*/ +#define SM_I2O 1 +#define SM_F2O 2 +#define SM_F2F 3 + +/*-- operation modes --*/ +#define OM_Z 1 +#define OM_UNZ 2 +#define OM_TEST 3 + +Int32 opMode; +Int32 srcMode; + +#define FILE_NAME_LEN 1034 + +Int32 longestFileName; +Char inName [FILE_NAME_LEN]; +Char outName[FILE_NAME_LEN]; +Char tmpName[FILE_NAME_LEN]; +Char *progName; +Char progNameReally[FILE_NAME_LEN]; +FILE *outputHandleJustInCase; +Int32 workFactor; + +static void panic ( const Char* ) NORETURN; +static void ioError ( void ) NORETURN; +static void outOfMemory ( void ) NORETURN; +static void configError ( void ) NORETURN; +static void crcError ( void ) NORETURN; +static void cleanUpAndFail ( Int32 ) NORETURN; +static void compressedStreamEOF ( void ) NORETURN; + +static void copyFileName ( Char*, Char* ); +static void* myMalloc ( Int32 ); +static void applySavedFileAttrToOutputFile ( IntNative fd ); + + + +/*---------------------------------------------------*/ +/*--- An implementation of 64-bit ints. Sigh. ---*/ +/*--- Roll on widespread deployment of ANSI C9X ! ---*/ +/*---------------------------------------------------*/ + +typedef + struct { UChar b[8]; } + UInt64; + + +static +void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) +{ + n->b[7] = (UChar)((hi32 >> 24) & 0xFF); + n->b[6] = (UChar)((hi32 >> 16) & 0xFF); + n->b[5] = (UChar)((hi32 >> 8) & 0xFF); + n->b[4] = (UChar) (hi32 & 0xFF); + n->b[3] = (UChar)((lo32 >> 24) & 0xFF); + n->b[2] = (UChar)((lo32 >> 16) & 0xFF); + n->b[1] = (UChar)((lo32 >> 8) & 0xFF); + n->b[0] = (UChar) (lo32 & 0xFF); +} + + +static +double uInt64_to_double ( UInt64* n ) +{ + Int32 i; + double base = 1.0; + double sum = 0.0; + for (i = 0; i < 8; i++) { + sum += base * (double)(n->b[i]); + base *= 256.0; + } + return sum; +} + + +static +Bool uInt64_isZero ( UInt64* n ) +{ + Int32 i; + for (i = 0; i < 8; i++) + if (n->b[i] != 0) return 0; + return 1; +} + + +/* Divide *n by 10, and return the remainder. */ +static +Int32 uInt64_qrm10 ( UInt64* n ) +{ + UInt32 rem, tmp; + Int32 i; + rem = 0; + for (i = 7; i >= 0; i--) { + tmp = rem * 256 + n->b[i]; + n->b[i] = tmp / 10; + rem = tmp % 10; + } + return rem; +} + + +/* ... and the Whole Entire Point of all this UInt64 stuff is + so that we can supply the following function. +*/ +static +void uInt64_toAscii ( char* outbuf, UInt64* n ) +{ + Int32 i, q; + UChar buf[32]; + Int32 nBuf = 0; + UInt64 n_copy = *n; + do { + q = uInt64_qrm10 ( &n_copy ); + buf[nBuf] = q + '0'; + nBuf++; + } while (!uInt64_isZero(&n_copy)); + outbuf[nBuf] = 0; + for (i = 0; i < nBuf; i++) + outbuf[i] = buf[nBuf-i-1]; +} + + +/*---------------------------------------------------*/ +/*--- Processing of complete files and streams ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------*/ +static +Bool myfeof ( FILE* f ) +{ + Int32 c = fgetc ( f ); + if (c == EOF) return True; + ungetc ( c, f ); + return False; +} + + +/*---------------------------------------------*/ +static +void compressStream ( FILE *stream, FILE *zStream ) +{ + BZFILE* bzf = NULL; + UChar ibuf[5000]; + Int32 nIbuf; + UInt32 nbytes_in_lo32, nbytes_in_hi32; + UInt32 nbytes_out_lo32, nbytes_out_hi32; + Int32 bzerr, bzerr_dummy, ret; + + SET_BINARY_MODE(stream); + SET_BINARY_MODE(zStream); + + if (ferror(stream)) goto errhandler_io; + if (ferror(zStream)) goto errhandler_io; + + bzf = BZ2_bzWriteOpen ( &bzerr, zStream, + blockSize100k, verbosity, workFactor ); + if (bzerr != BZ_OK) goto errhandler; + + if (verbosity >= 2) fprintf ( stderr, "\n" ); + + while (True) { + + if (myfeof(stream)) break; + nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream ); + if (ferror(stream)) goto errhandler_io; + if (nIbuf > 0) BZ2_bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); + if (bzerr != BZ_OK) goto errhandler; + + } + + BZ2_bzWriteClose64 ( &bzerr, bzf, 0, + &nbytes_in_lo32, &nbytes_in_hi32, + &nbytes_out_lo32, &nbytes_out_hi32 ); + if (bzerr != BZ_OK) goto errhandler; + + if (ferror(zStream)) goto errhandler_io; + ret = fflush ( zStream ); + if (ret == EOF) goto errhandler_io; + if (zStream != stdout) { + Int32 fd = fileno ( zStream ); + if (fd < 0) goto errhandler_io; + applySavedFileAttrToOutputFile ( fd ); + ret = fclose ( zStream ); + outputHandleJustInCase = NULL; + if (ret == EOF) goto errhandler_io; + } + outputHandleJustInCase = NULL; + if (ferror(stream)) goto errhandler_io; + ret = fclose ( stream ); + if (ret == EOF) goto errhandler_io; + + if (verbosity >= 1) { + if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) { + fprintf ( stderr, " no data compressed.\n"); + } else { + Char buf_nin[32], buf_nout[32]; + UInt64 nbytes_in, nbytes_out; + double nbytes_in_d, nbytes_out_d; + uInt64_from_UInt32s ( &nbytes_in, + nbytes_in_lo32, nbytes_in_hi32 ); + uInt64_from_UInt32s ( &nbytes_out, + nbytes_out_lo32, nbytes_out_hi32 ); + nbytes_in_d = uInt64_to_double ( &nbytes_in ); + nbytes_out_d = uInt64_to_double ( &nbytes_out ); + uInt64_toAscii ( buf_nin, &nbytes_in ); + uInt64_toAscii ( buf_nout, &nbytes_out ); + fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " + "%5.2f%% saved, %s in, %s out.\n", + nbytes_in_d / nbytes_out_d, + (8.0 * nbytes_out_d) / nbytes_in_d, + 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), + buf_nin, + buf_nout + ); + } + } + + return; + + errhandler: + BZ2_bzWriteClose64 ( &bzerr_dummy, bzf, 1, + &nbytes_in_lo32, &nbytes_in_hi32, + &nbytes_out_lo32, &nbytes_out_hi32 ); + switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; + case BZ_MEM_ERROR: + outOfMemory (); break; + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + default: + panic ( "compress:unexpected error" ); + } + + panic ( "compress:end" ); + /*notreached*/ +} + + + +/*---------------------------------------------*/ +static +Bool uncompressStream ( FILE *zStream, FILE *stream ) +{ + BZFILE* bzf = NULL; + Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; + UChar obuf[5000]; + UChar unused[BZ_MAX_UNUSED]; + Int32 nUnused; + void* unusedTmpV; + UChar* unusedTmp; + + nUnused = 0; + streamNo = 0; + + SET_BINARY_MODE(stream); + SET_BINARY_MODE(zStream); + + if (ferror(stream)) goto errhandler_io; + if (ferror(zStream)) goto errhandler_io; + + while (True) { + + bzf = BZ2_bzReadOpen ( + &bzerr, zStream, verbosity, + (int)smallMode, unused, nUnused + ); + if (bzf == NULL || bzerr != BZ_OK) goto errhandler; + streamNo++; + + while (bzerr == BZ_OK) { + nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); + if (bzerr == BZ_DATA_ERROR_MAGIC) goto trycat; + if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) + fwrite ( obuf, sizeof(UChar), nread, stream ); + if (ferror(stream)) goto errhandler_io; + } + if (bzerr != BZ_STREAM_END) goto errhandler; + + BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); + if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); + + unusedTmp = (UChar*)unusedTmpV; + for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; + + BZ2_bzReadClose ( &bzerr, bzf ); + if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); + + if (nUnused == 0 && myfeof(zStream)) break; + } + + closeok: + if (ferror(zStream)) goto errhandler_io; + if (stream != stdout) { + Int32 fd = fileno ( stream ); + if (fd < 0) goto errhandler_io; + applySavedFileAttrToOutputFile ( fd ); + } + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; + + if (ferror(stream)) goto errhandler_io; + ret = fflush ( stream ); + if (ret != 0) goto errhandler_io; + if (stream != stdout) { + ret = fclose ( stream ); + outputHandleJustInCase = NULL; + if (ret == EOF) goto errhandler_io; + } + outputHandleJustInCase = NULL; + if (verbosity >= 2) fprintf ( stderr, "\n " ); + return True; + + trycat: + if (forceOverwrite) { + rewind(zStream); + while (True) { + if (myfeof(zStream)) break; + nread = fread ( obuf, sizeof(UChar), 5000, zStream ); + if (ferror(zStream)) goto errhandler_io; + if (nread > 0) fwrite ( obuf, sizeof(UChar), nread, stream ); + if (ferror(stream)) goto errhandler_io; + } + goto closeok; + } + + errhandler: + BZ2_bzReadClose ( &bzerr_dummy, bzf ); + switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + case BZ_DATA_ERROR: + crcError(); + case BZ_MEM_ERROR: + outOfMemory(); + case BZ_UNEXPECTED_EOF: + compressedStreamEOF(); + case BZ_DATA_ERROR_MAGIC: + if (zStream != stdin) fclose(zStream); + if (stream != stdout) fclose(stream); + if (streamNo == 1) { + return False; + } else { + if (noisy) + fprintf ( stderr, + "\n%s: %s: trailing garbage after EOF ignored\n", + progName, inName ); + return True; + } + default: + panic ( "decompress:unexpected error" ); + } + + panic ( "decompress:end" ); + return True; /*notreached*/ +} + + +/*---------------------------------------------*/ +static +Bool testStream ( FILE *zStream ) +{ + BZFILE* bzf = NULL; + Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; + UChar obuf[5000]; + UChar unused[BZ_MAX_UNUSED]; + Int32 nUnused; + void* unusedTmpV; + UChar* unusedTmp; + + nUnused = 0; + streamNo = 0; + + SET_BINARY_MODE(zStream); + if (ferror(zStream)) goto errhandler_io; + + while (True) { + + bzf = BZ2_bzReadOpen ( + &bzerr, zStream, verbosity, + (int)smallMode, unused, nUnused + ); + if (bzf == NULL || bzerr != BZ_OK) goto errhandler; + streamNo++; + + while (bzerr == BZ_OK) { + nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); + if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; + } + if (bzerr != BZ_STREAM_END) goto errhandler; + + BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); + if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); + + unusedTmp = (UChar*)unusedTmpV; + for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; + + BZ2_bzReadClose ( &bzerr, bzf ); + if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); + if (nUnused == 0 && myfeof(zStream)) break; + + } + + if (ferror(zStream)) goto errhandler_io; + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; + + if (verbosity >= 2) fprintf ( stderr, "\n " ); + return True; + + errhandler: + BZ2_bzReadClose ( &bzerr_dummy, bzf ); + if (verbosity == 0) + fprintf ( stderr, "%s: %s: ", progName, inName ); + switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + case BZ_DATA_ERROR: + fprintf ( stderr, + "data integrity (CRC) error in data\n" ); + return False; + case BZ_MEM_ERROR: + outOfMemory(); + case BZ_UNEXPECTED_EOF: + fprintf ( stderr, + "file ends unexpectedly\n" ); + return False; + case BZ_DATA_ERROR_MAGIC: + if (zStream != stdin) fclose(zStream); + if (streamNo == 1) { + fprintf ( stderr, + "bad magic number (file not created by bzip2)\n" ); + return False; + } else { + if (noisy) + fprintf ( stderr, + "trailing garbage after EOF ignored\n" ); + return True; + } + default: + panic ( "test:unexpected error" ); + } + + panic ( "test:end" ); + return True; /*notreached*/ +} + + +/*---------------------------------------------------*/ +/*--- Error [non-] handling grunge ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------*/ +static +void setExit ( Int32 v ) +{ + if (v > exitValue) exitValue = v; +} + + +/*---------------------------------------------*/ +static +void cadvise ( void ) +{ + if (noisy) + fprintf ( + stderr, + "\nIt is possible that the compressed file(s) have become corrupted.\n" + "You can use the -tvv option to test integrity of such files.\n\n" + "You can use the `bzip2recover' program to attempt to recover\n" + "data from undamaged sections of corrupted files.\n\n" + ); +} + + +/*---------------------------------------------*/ +static +void showFileNames ( void ) +{ + if (noisy) + fprintf ( + stderr, + "\tInput file = %s, output file = %s\n", + inName, outName + ); +} + + +/*---------------------------------------------*/ +static +void cleanUpAndFail ( Int32 ec ) +{ + IntNative retVal; + struct MY_STAT statBuf; + + if ( srcMode == SM_F2F + && opMode != OM_TEST + && deleteOutputOnInterrupt ) { + + /* Check whether input file still exists. Delete output file + only if input exists to avoid loss of data. Joerg Prante, 5 + January 2002. (JRS 06-Jan-2002: other changes in 1.0.2 mean + this is less likely to happen. But to be ultra-paranoid, we + do the check anyway.) */ + retVal = MY_STAT ( inName, &statBuf ); + if (retVal == 0) { + if (noisy) + fprintf ( stderr, + "%s: Deleting output file %s, if it exists.\n", + progName, outName ); + if (outputHandleJustInCase != NULL) + fclose ( outputHandleJustInCase ); + retVal = remove ( outName ); + if (retVal != 0) + fprintf ( stderr, + "%s: WARNING: deletion of output file " + "(apparently) failed.\n", + progName ); + } else { + fprintf ( stderr, + "%s: WARNING: deletion of output file suppressed\n", + progName ); + fprintf ( stderr, + "%s: since input file no longer exists. Output file\n", + progName ); + fprintf ( stderr, + "%s: `%s' may be incomplete.\n", + progName, outName ); + fprintf ( stderr, + "%s: I suggest doing an integrity test (bzip2 -tv)" + " of it.\n", + progName ); + } + } + + if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { + fprintf ( stderr, + "%s: WARNING: some files have not been processed:\n" + "%s: %d specified on command line, %d not processed yet.\n\n", + progName, progName, + numFileNames, numFileNames - numFilesProcessed ); + } + setExit(ec); + exit(exitValue); +} + + +/*---------------------------------------------*/ +static +void panic ( const Char* s ) +{ + fprintf ( stderr, + "\n%s: PANIC -- internal consistency error:\n" + "\t%s\n" + "\tThis is a BUG. Please report it to me at:\n" + "\tjseward@bzip.org\n", + progName, s ); + showFileNames(); + cleanUpAndFail( 3 ); +} + + +/*---------------------------------------------*/ +static +void crcError ( void ) +{ + fprintf ( stderr, + "\n%s: Data integrity error when decompressing.\n", + progName ); + showFileNames(); + cadvise(); + cleanUpAndFail( 2 ); +} + + +/*---------------------------------------------*/ +static +void compressedStreamEOF ( void ) +{ + if (noisy) { + fprintf ( stderr, + "\n%s: Compressed file ends unexpectedly;\n\t" + "perhaps it is corrupted? *Possible* reason follows.\n", + progName ); + perror ( progName ); + showFileNames(); + cadvise(); + } + cleanUpAndFail( 2 ); +} + + +/*---------------------------------------------*/ +static +void ioError ( void ) +{ + fprintf ( stderr, + "\n%s: I/O or other error, bailing out. " + "Possible reason follows.\n", + progName ); + perror ( progName ); + showFileNames(); + cleanUpAndFail( 1 ); +} + + +/*---------------------------------------------*/ +static +void mySignalCatcher ( IntNative n ) +{ + fprintf ( stderr, + "\n%s: Control-C or similar caught, quitting.\n", + progName ); + cleanUpAndFail(1); +} + + +/*---------------------------------------------*/ +static +void mySIGSEGVorSIGBUScatcher ( IntNative n ) +{ + if (opMode == OM_Z) + fprintf ( + stderr, + "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing.\n" + "\n" + " Possible causes are (most likely first):\n" + " (1) This computer has unreliable memory or cache hardware\n" + " (a surprisingly common problem; try a different machine.)\n" + " (2) A bug in the compiler used to create this executable\n" + " (unlikely, if you didn't compile bzip2 yourself.)\n" + " (3) A real bug in bzip2 -- I hope this should never be the case.\n" + " The user's manual, Section 4.3, has more info on (1) and (2).\n" + " \n" + " If you suspect this is a bug in bzip2, or are unsure about (1)\n" + " or (2), feel free to report it to me at: jseward@bzip.org.\n" + " Section 4.3 of the user's manual describes the info a useful\n" + " bug report should have. If the manual is available on your\n" + " system, please try and read it before mailing me. If you don't\n" + " have the manual or can't be bothered to read it, mail me anyway.\n" + "\n", + progName ); + else + fprintf ( + stderr, + "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n" + "\n" + " Possible causes are (most likely first):\n" + " (1) The compressed data is corrupted, and bzip2's usual checks\n" + " failed to detect this. Try bzip2 -tvv my_file.bz2.\n" + " (2) This computer has unreliable memory or cache hardware\n" + " (a surprisingly common problem; try a different machine.)\n" + " (3) A bug in the compiler used to create this executable\n" + " (unlikely, if you didn't compile bzip2 yourself.)\n" + " (4) A real bug in bzip2 -- I hope this should never be the case.\n" + " The user's manual, Section 4.3, has more info on (2) and (3).\n" + " \n" + " If you suspect this is a bug in bzip2, or are unsure about (2)\n" + " or (3), feel free to report it to me at: jseward@bzip.org.\n" + " Section 4.3 of the user's manual describes the info a useful\n" + " bug report should have. If the manual is available on your\n" + " system, please try and read it before mailing me. If you don't\n" + " have the manual or can't be bothered to read it, mail me anyway.\n" + "\n", + progName ); + + showFileNames(); + if (opMode == OM_Z) + cleanUpAndFail( 3 ); else + { cadvise(); cleanUpAndFail( 2 ); } +} + + +/*---------------------------------------------*/ +static +void outOfMemory ( void ) +{ + fprintf ( stderr, + "\n%s: couldn't allocate enough memory\n", + progName ); + showFileNames(); + cleanUpAndFail(1); +} + + +/*---------------------------------------------*/ +static +void configError ( void ) +{ + fprintf ( stderr, + "bzip2: I'm not configured correctly for this platform!\n" + "\tI require Int32, Int16 and Char to have sizes\n" + "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" + "\tProbably you can fix this by defining them correctly,\n" + "\tand recompiling. Bye!\n" ); + setExit(3); + exit(exitValue); +} + + +/*---------------------------------------------------*/ +/*--- The main driver machinery ---*/ +/*---------------------------------------------------*/ + +/* All rather crufty. The main problem is that input files + are stat()d multiple times before use. This should be + cleaned up. +*/ + +/*---------------------------------------------*/ +static +void pad ( Char *s ) +{ + Int32 i; + if ( (Int32)strlen(s) >= longestFileName ) return; + for (i = 1; i <= longestFileName - (Int32)strlen(s); i++) + fprintf ( stderr, " " ); +} + + +/*---------------------------------------------*/ +static +void copyFileName ( Char* to, Char* from ) +{ + if ( strlen(from) > FILE_NAME_LEN-10 ) { + fprintf ( + stderr, + "bzip2: file name\n`%s'\n" + "is suspiciously (more than %d chars) long.\n" + "Try using a reasonable file name instead. Sorry! :-)\n", + from, FILE_NAME_LEN-10 + ); + setExit(1); + exit(exitValue); + } + + strncpy(to,from,FILE_NAME_LEN-10); + to[FILE_NAME_LEN-10]='\0'; +} + + +/*---------------------------------------------*/ +static +Bool fileExists ( Char* name ) +{ + FILE *tmp = fopen ( name, "rb" ); + Bool exists = (tmp != NULL); + if (tmp != NULL) fclose ( tmp ); + return exists; +} + + +/*---------------------------------------------*/ +/* Open an output file safely with O_EXCL and good permissions. + This avoids a race condition in versions < 1.0.2, in which + the file was first opened and then had its interim permissions + set safely. We instead use open() to create the file with + the interim permissions required. (--- --- rw-). + + For non-Unix platforms, if we are not worrying about + security issues, simple this simply behaves like fopen. +*/ +static +FILE* fopen_output_safely ( Char* name, const char* mode ) +{ +# if BZ_UNIX + FILE* fp; + IntNative fh; + fh = open(name, O_WRONLY|O_CREAT|O_EXCL, S_IWUSR|S_IRUSR); + if (fh == -1) return NULL; + fp = fdopen(fh, mode); + if (fp == NULL) close(fh); + return fp; +# else + return fopen(name, mode); +# endif +} + + +/*---------------------------------------------*/ +/*-- + if in doubt, return True +--*/ +static +Bool notAStandardFile ( Char* name ) +{ + IntNative i; + struct MY_STAT statBuf; + + i = MY_LSTAT ( name, &statBuf ); + if (i != 0) return True; + if (MY_S_ISREG(statBuf.st_mode)) return False; + return True; +} + + +/*---------------------------------------------*/ +/*-- + rac 11/21/98 see if file has hard links to it +--*/ +static +Int32 countHardLinks ( Char* name ) +{ + IntNative i; + struct MY_STAT statBuf; + + i = MY_LSTAT ( name, &statBuf ); + if (i != 0) return 0; + return (statBuf.st_nlink - 1); +} + + +/*---------------------------------------------*/ +/* Copy modification date, access date, permissions and owner from the + source to destination file. We have to copy this meta-info off + into fileMetaInfo before starting to compress / decompress it, + because doing it afterwards means we get the wrong access time. + + To complicate matters, in compress() and decompress() below, the + sequence of tests preceding the call to saveInputFileMetaInfo() + involves calling fileExists(), which in turn establishes its result + by attempting to fopen() the file, and if successful, immediately + fclose()ing it again. So we have to assume that the fopen() call + does not cause the access time field to be updated. + + Reading of the man page for stat() (man 2 stat) on RedHat 7.2 seems + to imply that merely doing open() will not affect the access time. + Therefore we merely need to hope that the C library only does + open() as a result of fopen(), and not any kind of read()-ahead + cleverness. + + It sounds pretty fragile to me. Whether this carries across + robustly to arbitrary Unix-like platforms (or even works robustly + on this one, RedHat 7.2) is unknown to me. Nevertheless ... +*/ +#if BZ_UNIX +static +struct MY_STAT fileMetaInfo; +#endif + +static +void saveInputFileMetaInfo ( Char *srcName ) +{ +# if BZ_UNIX + IntNative retVal; + /* Note use of stat here, not lstat. */ + retVal = MY_STAT( srcName, &fileMetaInfo ); + ERROR_IF_NOT_ZERO ( retVal ); +# endif +} + + +static +void applySavedTimeInfoToOutputFile ( Char *dstName ) +{ +# if BZ_UNIX + IntNative retVal; + struct utimbuf uTimBuf; + + uTimBuf.actime = fileMetaInfo.st_atime; + uTimBuf.modtime = fileMetaInfo.st_mtime; + + retVal = utime ( dstName, &uTimBuf ); + ERROR_IF_NOT_ZERO ( retVal ); +# endif +} + +static +void applySavedFileAttrToOutputFile ( IntNative fd ) +{ +# if BZ_UNIX + IntNative retVal; + + retVal = fchmod ( fd, fileMetaInfo.st_mode ); + ERROR_IF_NOT_ZERO ( retVal ); + + (void) fchown ( fd, fileMetaInfo.st_uid, fileMetaInfo.st_gid ); + /* chown() will in many cases return with EPERM, which can + be safely ignored. + */ +# endif +} + + +/*---------------------------------------------*/ +static +Bool containsDubiousChars ( Char* name ) +{ +# if BZ_UNIX + /* On unix, files can contain any characters and the file expansion + * is performed by the shell. + */ + return False; +# else /* ! BZ_UNIX */ + /* On non-unix (Win* platforms), wildcard characters are not allowed in + * filenames. + */ + for (; *name != '\0'; name++) + if (*name == '?' || *name == '*') return True; + return False; +# endif /* BZ_UNIX */ +} + + +/*---------------------------------------------*/ +#define BZ_N_SUFFIX_PAIRS 4 + +const Char* zSuffix[BZ_N_SUFFIX_PAIRS] + = { ".bz2", ".bz", ".tbz2", ".tbz" }; +const Char* unzSuffix[BZ_N_SUFFIX_PAIRS] + = { "", "", ".tar", ".tar" }; + +static +Bool hasSuffix ( Char* s, const Char* suffix ) +{ + Int32 ns = strlen(s); + Int32 nx = strlen(suffix); + if (ns < nx) return False; + if (strcmp(s + ns - nx, suffix) == 0) return True; + return False; +} + +static +Bool mapSuffix ( Char* name, + const Char* oldSuffix, + const Char* newSuffix ) +{ + if (!hasSuffix(name,oldSuffix)) return False; + name[strlen(name)-strlen(oldSuffix)] = 0; + strcat ( name, newSuffix ); + return True; +} + + +/*---------------------------------------------*/ +static +void compress ( Char *name ) +{ + FILE *inStr; + FILE *outStr; + Int32 n, i; + struct MY_STAT statBuf; + + deleteOutputOnInterrupt = False; + + if (name == NULL && srcMode != SM_I2O) + panic ( "compress: bad modes\n" ); + + switch (srcMode) { + case SM_I2O: + copyFileName ( inName, (Char*)"(stdin)" ); + copyFileName ( outName, (Char*)"(stdout)" ); + break; + case SM_F2F: + copyFileName ( inName, name ); + copyFileName ( outName, name ); + strcat ( outName, ".bz2" ); + break; + case SM_F2O: + copyFileName ( inName, name ); + copyFileName ( outName, (Char*)"(stdout)" ); + break; + } + + if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { + if (noisy) + fprintf ( stderr, "%s: There are no files matching `%s'.\n", + progName, inName ); + setExit(1); + return; + } + if ( srcMode != SM_I2O && !fileExists ( inName ) ) { + fprintf ( stderr, "%s: Can't open input file %s: %s.\n", + progName, inName, strerror(errno) ); + setExit(1); + return; + } + for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) { + if (hasSuffix(inName, zSuffix[i])) { + if (noisy) + fprintf ( stderr, + "%s: Input file %s already has %s suffix.\n", + progName, inName, zSuffix[i] ); + setExit(1); + return; + } + } + if ( srcMode == SM_F2F || srcMode == SM_F2O ) { + MY_STAT(inName, &statBuf); + if ( MY_S_ISDIR(statBuf.st_mode) ) { + fprintf( stderr, + "%s: Input file %s is a directory.\n", + progName,inName); + setExit(1); + return; + } + } + if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { + if (noisy) + fprintf ( stderr, "%s: Input file %s is not a normal file.\n", + progName, inName ); + setExit(1); + return; + } + if ( srcMode == SM_F2F && fileExists ( outName ) ) { + if (forceOverwrite) { + remove(outName); + } else { + fprintf ( stderr, "%s: Output file %s already exists.\n", + progName, outName ); + setExit(1); + return; + } + } + if ( srcMode == SM_F2F && !forceOverwrite && + (n=countHardLinks ( inName )) > 0) { + fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", + progName, inName, n, n > 1 ? "s" : "" ); + setExit(1); + return; + } + + if ( srcMode == SM_F2F ) { + /* Save the file's meta-info before we open it. Doing it later + means we mess up the access times. */ + saveInputFileMetaInfo ( inName ); + } + + switch ( srcMode ) { + + case SM_I2O: + inStr = stdin; + outStr = stdout; + if ( isatty ( fileno ( stdout ) ) ) { + fprintf ( stderr, + "%s: I won't write compressed data to a terminal.\n", + progName ); + fprintf ( stderr, "%s: For help, type: `%s --help'.\n", + progName, progName ); + setExit(1); + return; + }; + break; + + case SM_F2O: + inStr = fopen ( inName, "rb" ); + outStr = stdout; + if ( isatty ( fileno ( stdout ) ) ) { + fprintf ( stderr, + "%s: I won't write compressed data to a terminal.\n", + progName ); + fprintf ( stderr, "%s: For help, type: `%s --help'.\n", + progName, progName ); + if ( inStr != NULL ) fclose ( inStr ); + setExit(1); + return; + }; + if ( inStr == NULL ) { + fprintf ( stderr, "%s: Can't open input file %s: %s.\n", + progName, inName, strerror(errno) ); + setExit(1); + return; + }; + break; + + case SM_F2F: + inStr = fopen ( inName, "rb" ); + outStr = fopen_output_safely ( outName, "wb" ); + if ( outStr == NULL) { + fprintf ( stderr, "%s: Can't create output file %s: %s.\n", + progName, outName, strerror(errno) ); + if ( inStr != NULL ) fclose ( inStr ); + setExit(1); + return; + } + if ( inStr == NULL ) { + fprintf ( stderr, "%s: Can't open input file %s: %s.\n", + progName, inName, strerror(errno) ); + if ( outStr != NULL ) fclose ( outStr ); + setExit(1); + return; + }; + break; + + default: + panic ( "compress: bad srcMode" ); + break; + } + + if (verbosity >= 1) { + fprintf ( stderr, " %s: ", inName ); + pad ( inName ); + fflush ( stderr ); + } + + /*--- Now the input and output handles are sane. Do the Biz. ---*/ + outputHandleJustInCase = outStr; + deleteOutputOnInterrupt = True; + compressStream ( inStr, outStr ); + outputHandleJustInCase = NULL; + + /*--- If there was an I/O error, we won't get here. ---*/ + if ( srcMode == SM_F2F ) { + applySavedTimeInfoToOutputFile ( outName ); + deleteOutputOnInterrupt = False; + if ( !keepInputFiles ) { + IntNative retVal = remove ( inName ); + ERROR_IF_NOT_ZERO ( retVal ); + } + } + + deleteOutputOnInterrupt = False; +} + + +/*---------------------------------------------*/ +static +void uncompress ( Char *name ) +{ + FILE *inStr; + FILE *outStr; + Int32 n, i; + Bool magicNumberOK; + Bool cantGuess; + struct MY_STAT statBuf; + + deleteOutputOnInterrupt = False; + + if (name == NULL && srcMode != SM_I2O) + panic ( "uncompress: bad modes\n" ); + + cantGuess = False; + switch (srcMode) { + case SM_I2O: + copyFileName ( inName, (Char*)"(stdin)" ); + copyFileName ( outName, (Char*)"(stdout)" ); + break; + case SM_F2F: + copyFileName ( inName, name ); + copyFileName ( outName, name ); + for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) + if (mapSuffix(outName,zSuffix[i],unzSuffix[i])) + goto zzz; + cantGuess = True; + strcat ( outName, ".out" ); + break; + case SM_F2O: + copyFileName ( inName, name ); + copyFileName ( outName, (Char*)"(stdout)" ); + break; + } + + zzz: + if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { + if (noisy) + fprintf ( stderr, "%s: There are no files matching `%s'.\n", + progName, inName ); + setExit(1); + return; + } + if ( srcMode != SM_I2O && !fileExists ( inName ) ) { + fprintf ( stderr, "%s: Can't open input file %s: %s.\n", + progName, inName, strerror(errno) ); + setExit(1); + return; + } + if ( srcMode == SM_F2F || srcMode == SM_F2O ) { + MY_STAT(inName, &statBuf); + if ( MY_S_ISDIR(statBuf.st_mode) ) { + fprintf( stderr, + "%s: Input file %s is a directory.\n", + progName,inName); + setExit(1); + return; + } + } + if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { + if (noisy) + fprintf ( stderr, "%s: Input file %s is not a normal file.\n", + progName, inName ); + setExit(1); + return; + } + if ( /* srcMode == SM_F2F implied && */ cantGuess ) { + if (noisy) + fprintf ( stderr, + "%s: Can't guess original name for %s -- using %s\n", + progName, inName, outName ); + /* just a warning, no return */ + } + if ( srcMode == SM_F2F && fileExists ( outName ) ) { + if (forceOverwrite) { + remove(outName); + } else { + fprintf ( stderr, "%s: Output file %s already exists.\n", + progName, outName ); + setExit(1); + return; + } + } + if ( srcMode == SM_F2F && !forceOverwrite && + (n=countHardLinks ( inName ) ) > 0) { + fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", + progName, inName, n, n > 1 ? "s" : "" ); + setExit(1); + return; + } + + if ( srcMode == SM_F2F ) { + /* Save the file's meta-info before we open it. Doing it later + means we mess up the access times. */ + saveInputFileMetaInfo ( inName ); + } + + switch ( srcMode ) { + + case SM_I2O: + inStr = stdin; + outStr = stdout; + if ( isatty ( fileno ( stdin ) ) ) { + fprintf ( stderr, + "%s: I won't read compressed data from a terminal.\n", + progName ); + fprintf ( stderr, "%s: For help, type: `%s --help'.\n", + progName, progName ); + setExit(1); + return; + }; + break; + + case SM_F2O: + inStr = fopen ( inName, "rb" ); + outStr = stdout; + if ( inStr == NULL ) { + fprintf ( stderr, "%s: Can't open input file %s:%s.\n", + progName, inName, strerror(errno) ); + if ( inStr != NULL ) fclose ( inStr ); + setExit(1); + return; + }; + break; + + case SM_F2F: + inStr = fopen ( inName, "rb" ); + outStr = fopen_output_safely ( outName, "wb" ); + if ( outStr == NULL) { + fprintf ( stderr, "%s: Can't create output file %s: %s.\n", + progName, outName, strerror(errno) ); + if ( inStr != NULL ) fclose ( inStr ); + setExit(1); + return; + } + if ( inStr == NULL ) { + fprintf ( stderr, "%s: Can't open input file %s: %s.\n", + progName, inName, strerror(errno) ); + if ( outStr != NULL ) fclose ( outStr ); + setExit(1); + return; + }; + break; + + default: + panic ( "uncompress: bad srcMode" ); + break; + } + + if (verbosity >= 1) { + fprintf ( stderr, " %s: ", inName ); + pad ( inName ); + fflush ( stderr ); + } + + /*--- Now the input and output handles are sane. Do the Biz. ---*/ + outputHandleJustInCase = outStr; + deleteOutputOnInterrupt = True; + magicNumberOK = uncompressStream ( inStr, outStr ); + outputHandleJustInCase = NULL; + + /*--- If there was an I/O error, we won't get here. ---*/ + if ( magicNumberOK ) { + if ( srcMode == SM_F2F ) { + applySavedTimeInfoToOutputFile ( outName ); + deleteOutputOnInterrupt = False; + if ( !keepInputFiles ) { + IntNative retVal = remove ( inName ); + ERROR_IF_NOT_ZERO ( retVal ); + } + } + } else { + unzFailsExist = True; + deleteOutputOnInterrupt = False; + if ( srcMode == SM_F2F ) { + IntNative retVal = remove ( outName ); + ERROR_IF_NOT_ZERO ( retVal ); + } + } + deleteOutputOnInterrupt = False; + + if ( magicNumberOK ) { + if (verbosity >= 1) + fprintf ( stderr, "done\n" ); + } else { + setExit(2); + if (verbosity >= 1) + fprintf ( stderr, "not a bzip2 file.\n" ); else + fprintf ( stderr, + "%s: %s is not a bzip2 file.\n", + progName, inName ); + } + +} + + +/*---------------------------------------------*/ +static +void testf ( Char *name ) +{ + FILE *inStr; + Bool allOK; + struct MY_STAT statBuf; + + deleteOutputOnInterrupt = False; + + if (name == NULL && srcMode != SM_I2O) + panic ( "testf: bad modes\n" ); + + copyFileName ( outName, (Char*)"(none)" ); + switch (srcMode) { + case SM_I2O: copyFileName ( inName, (Char*)"(stdin)" ); break; + case SM_F2F: copyFileName ( inName, name ); break; + case SM_F2O: copyFileName ( inName, name ); break; + } + + if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { + if (noisy) + fprintf ( stderr, "%s: There are no files matching `%s'.\n", + progName, inName ); + setExit(1); + return; + } + if ( srcMode != SM_I2O && !fileExists ( inName ) ) { + fprintf ( stderr, "%s: Can't open input %s: %s.\n", + progName, inName, strerror(errno) ); + setExit(1); + return; + } + if ( srcMode != SM_I2O ) { + MY_STAT(inName, &statBuf); + if ( MY_S_ISDIR(statBuf.st_mode) ) { + fprintf( stderr, + "%s: Input file %s is a directory.\n", + progName,inName); + setExit(1); + return; + } + } + + switch ( srcMode ) { + + case SM_I2O: + if ( isatty ( fileno ( stdin ) ) ) { + fprintf ( stderr, + "%s: I won't read compressed data from a terminal.\n", + progName ); + fprintf ( stderr, "%s: For help, type: `%s --help'.\n", + progName, progName ); + setExit(1); + return; + }; + inStr = stdin; + break; + + case SM_F2O: case SM_F2F: + inStr = fopen ( inName, "rb" ); + if ( inStr == NULL ) { + fprintf ( stderr, "%s: Can't open input file %s:%s.\n", + progName, inName, strerror(errno) ); + setExit(1); + return; + }; + break; + + default: + panic ( "testf: bad srcMode" ); + break; + } + + if (verbosity >= 1) { + fprintf ( stderr, " %s: ", inName ); + pad ( inName ); + fflush ( stderr ); + } + + /*--- Now the input handle is sane. Do the Biz. ---*/ + outputHandleJustInCase = NULL; + allOK = testStream ( inStr ); + + if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" ); + if (!allOK) testFailsExist = True; +} + + +/*---------------------------------------------*/ +static +void license ( void ) +{ + fprintf ( stderr, + + "bzip2, a block-sorting file compressor. " + "Version %s.\n" + " \n" + " Copyright (C) 1996-2010 by Julian Seward.\n" + " \n" + " This program is free software; you can redistribute it and/or modify\n" + " it under the terms set out in the LICENSE file, which is included\n" + " in the bzip2-1.0.6 source distribution.\n" + " \n" + " This program is distributed in the hope that it will be useful,\n" + " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" + " LICENSE file for more details.\n" + " \n", + BZ2_bzlibVersion() + ); +} + + +/*---------------------------------------------*/ +static +void usage ( Char *fullProgName ) +{ + fprintf ( + stderr, + "bzip2, a block-sorting file compressor. " + "Version %s.\n" + "\n usage: %s [flags and input files in any order]\n" + "\n" + " -h --help print this message\n" + " -d --decompress force decompression\n" + " -z --compress force compression\n" + " -k --keep keep (don't delete) input files\n" + " -f --force overwrite existing output files\n" + " -t --test test compressed file integrity\n" + " -c --stdout output to standard out\n" + " -q --quiet suppress noncritical error messages\n" + " -v --verbose be verbose (a 2nd -v gives more)\n" + " -L --license display software version & license\n" + " -V --version display software version & license\n" + " -s --small use less memory (at most 2500k)\n" + " -1 .. -9 set block size to 100k .. 900k\n" + " --fast alias for -1\n" + " --best alias for -9\n" + "\n" + " If invoked as `bzip2', default action is to compress.\n" + " as `bunzip2', default action is to decompress.\n" + " as `bzcat', default action is to decompress to stdout.\n" + "\n" + " If no file names are given, bzip2 compresses or decompresses\n" + " from standard input to standard output. You can combine\n" + " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" +# if BZ_UNIX + "\n" +# endif + , + + BZ2_bzlibVersion(), + fullProgName + ); +} + + +/*---------------------------------------------*/ +static +void redundant ( Char* flag ) +{ + fprintf ( + stderr, + "%s: %s is redundant in versions 0.9.5 and above\n", + progName, flag ); +} + + +/*---------------------------------------------*/ +/*-- + All the garbage from here to main() is purely to + implement a linked list of command-line arguments, + into which main() copies argv[1 .. argc-1]. + + The purpose of this exercise is to facilitate + the expansion of wildcard characters * and ? in + filenames for OSs which don't know how to do it + themselves, like MSDOS, Windows 95 and NT. + + The actual Dirty Work is done by the platform- + specific macro APPEND_FILESPEC. +--*/ + +typedef + struct zzzz { + Char *name; + struct zzzz *link; + } + Cell; + + +/*---------------------------------------------*/ +static +void *myMalloc ( Int32 n ) +{ + void* p; + + p = malloc ( (size_t)n ); + if (p == NULL) outOfMemory (); + return p; +} + + +/*---------------------------------------------*/ +static +Cell *mkCell ( void ) +{ + Cell *c; + + c = (Cell*) myMalloc ( sizeof ( Cell ) ); + c->name = NULL; + c->link = NULL; + return c; +} + + +/*---------------------------------------------*/ +static +Cell *snocString ( Cell *root, Char *name ) +{ + if (root == NULL) { + Cell *tmp = mkCell(); + tmp->name = (Char*) myMalloc ( 5 + strlen(name) ); + strcpy ( tmp->name, name ); + return tmp; + } else { + Cell *tmp = root; + while (tmp->link != NULL) tmp = tmp->link; + tmp->link = snocString ( tmp->link, name ); + return root; + } +} + + +/*---------------------------------------------*/ +static +void addFlagsFromEnvVar ( Cell** argList, Char* varName ) +{ + Int32 i, j, k; + Char *envbase, *p; + + envbase = getenv(varName); + if (envbase != NULL) { + p = envbase; + i = 0; + while (True) { + if (p[i] == 0) break; + p += i; + i = 0; + while (isspace((Int32)(p[0]))) p++; + while (p[i] != 0 && !isspace((Int32)(p[i]))) i++; + if (i > 0) { + k = i; if (k > FILE_NAME_LEN-10) k = FILE_NAME_LEN-10; + for (j = 0; j < k; j++) tmpName[j] = p[j]; + tmpName[k] = 0; + APPEND_FLAG(*argList, tmpName); + } + } + } +} + + +/*---------------------------------------------*/ +#define ISFLAG(s) (strcmp(aa->name, (s))==0) + +IntNative main ( IntNative argc, Char *argv[] ) +{ + Int32 i, j; + Char *tmp; + Cell *argList; + Cell *aa; + Bool decode; + + /*-- Be really really really paranoid :-) --*/ + if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || + sizeof(Int16) != 2 || sizeof(UInt16) != 2 || + sizeof(Char) != 1 || sizeof(UChar) != 1) + configError(); + + /*-- Initialise --*/ + outputHandleJustInCase = NULL; + smallMode = False; + keepInputFiles = False; + forceOverwrite = False; + noisy = True; + verbosity = 0; + blockSize100k = 9; + testFailsExist = False; + unzFailsExist = False; + numFileNames = 0; + numFilesProcessed = 0; + workFactor = 30; + deleteOutputOnInterrupt = False; + exitValue = 0; + i = j = 0; /* avoid bogus warning from egcs-1.1.X */ + + /*-- Set up signal handlers for mem access errors --*/ + signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); +# if BZ_UNIX +# ifndef __DJGPP__ + signal (SIGBUS, mySIGSEGVorSIGBUScatcher); +# endif +# endif + + copyFileName ( inName, (Char*)"(none)" ); + copyFileName ( outName, (Char*)"(none)" ); + + copyFileName ( progNameReally, argv[0] ); + progName = &progNameReally[0]; + for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++) + if (*tmp == PATH_SEP) progName = tmp + 1; + + + /*-- Copy flags from env var BZIP2, and + expand filename wildcards in arg list. + --*/ + argList = NULL; + addFlagsFromEnvVar ( &argList, (Char*)"BZIP2" ); + addFlagsFromEnvVar ( &argList, (Char*)"BZIP" ); + for (i = 1; i <= argc-1; i++) + APPEND_FILESPEC(argList, argv[i]); + + + /*-- Find the length of the longest filename --*/ + longestFileName = 7; + numFileNames = 0; + decode = True; + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) { decode = False; continue; } + if (aa->name[0] == '-' && decode) continue; + numFileNames++; + if (longestFileName < (Int32)strlen(aa->name) ) + longestFileName = (Int32)strlen(aa->name); + } + + + /*-- Determine source modes; flag handling may change this too. --*/ + if (numFileNames == 0) + srcMode = SM_I2O; else srcMode = SM_F2F; + + + /*-- Determine what to do (compress/uncompress/test/cat). --*/ + /*-- Note that subsequent flag handling may change this. --*/ + opMode = OM_Z; + + if ( (strstr ( progName, "unzip" ) != 0) || + (strstr ( progName, "UNZIP" ) != 0) ) + opMode = OM_UNZ; + + if ( (strstr ( progName, "z2cat" ) != 0) || + (strstr ( progName, "Z2CAT" ) != 0) || + (strstr ( progName, "zcat" ) != 0) || + (strstr ( progName, "ZCAT" ) != 0) ) { + opMode = OM_UNZ; + srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O; + } + + + /*-- Look at the flags. --*/ + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) break; + if (aa->name[0] == '-' && aa->name[1] != '-') { + for (j = 1; aa->name[j] != '\0'; j++) { + switch (aa->name[j]) { + case 'c': srcMode = SM_F2O; break; + case 'd': opMode = OM_UNZ; break; + case 'z': opMode = OM_Z; break; + case 'f': forceOverwrite = True; break; + case 't': opMode = OM_TEST; break; + case 'k': keepInputFiles = True; break; + case 's': smallMode = True; break; + case 'q': noisy = False; break; + case '1': blockSize100k = 1; break; + case '2': blockSize100k = 2; break; + case '3': blockSize100k = 3; break; + case '4': blockSize100k = 4; break; + case '5': blockSize100k = 5; break; + case '6': blockSize100k = 6; break; + case '7': blockSize100k = 7; break; + case '8': blockSize100k = 8; break; + case '9': blockSize100k = 9; break; + case 'V': + case 'L': license(); + exit ( 0 ); + break; + case 'v': verbosity++; break; + case 'h': usage ( progName ); + exit ( 0 ); + break; + default: fprintf ( stderr, "%s: Bad flag `%s'\n", + progName, aa->name ); + usage ( progName ); + exit ( 1 ); + break; + } + } + } + } + + /*-- And again ... --*/ + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) break; + if (ISFLAG("--stdout")) srcMode = SM_F2O; else + if (ISFLAG("--decompress")) opMode = OM_UNZ; else + if (ISFLAG("--compress")) opMode = OM_Z; else + if (ISFLAG("--force")) forceOverwrite = True; else + if (ISFLAG("--test")) opMode = OM_TEST; else + if (ISFLAG("--keep")) keepInputFiles = True; else + if (ISFLAG("--small")) smallMode = True; else + if (ISFLAG("--quiet")) noisy = False; else + if (ISFLAG("--version")) { license(); exit ( 0 ); } else + if (ISFLAG("--license")) { license(); exit ( 0 ); } else + if (ISFLAG("--exponential")) workFactor = 1; else + if (ISFLAG("--repetitive-best")) redundant(aa->name); else + if (ISFLAG("--repetitive-fast")) redundant(aa->name); else + if (ISFLAG("--fast")) blockSize100k = 1; else + if (ISFLAG("--best")) blockSize100k = 9; else + if (ISFLAG("--verbose")) verbosity++; else + if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); } + else + if (strncmp ( aa->name, "--", 2) == 0) { + fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name ); + usage ( progName ); + exit ( 1 ); + } + } + + if (verbosity > 4) verbosity = 4; + if (opMode == OM_Z && smallMode && blockSize100k > 2) + blockSize100k = 2; + + if (opMode == OM_TEST && srcMode == SM_F2O) { + fprintf ( stderr, "%s: -c and -t cannot be used together.\n", + progName ); + exit ( 1 ); + } + + if (srcMode == SM_F2O && numFileNames == 0) + srcMode = SM_I2O; + + if (opMode != OM_Z) blockSize100k = 0; + + if (srcMode == SM_F2F) { + signal (SIGINT, mySignalCatcher); + signal (SIGTERM, mySignalCatcher); +# if BZ_UNIX + signal (SIGHUP, mySignalCatcher); +# endif + } + + if (opMode == OM_Z) { + if (srcMode == SM_I2O) { + compress ( NULL ); + } else { + decode = True; + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) { decode = False; continue; } + if (aa->name[0] == '-' && decode) continue; + numFilesProcessed++; + compress ( aa->name ); + } + } + } + else + + if (opMode == OM_UNZ) { + unzFailsExist = False; + if (srcMode == SM_I2O) { + uncompress ( NULL ); + } else { + decode = True; + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) { decode = False; continue; } + if (aa->name[0] == '-' && decode) continue; + numFilesProcessed++; + uncompress ( aa->name ); + } + } + if (unzFailsExist) { + setExit(2); + exit(exitValue); + } + } + + else { + testFailsExist = False; + if (srcMode == SM_I2O) { + testf ( NULL ); + } else { + decode = True; + for (aa = argList; aa != NULL; aa = aa->link) { + if (ISFLAG("--")) { decode = False; continue; } + if (aa->name[0] == '-' && decode) continue; + numFilesProcessed++; + testf ( aa->name ); + } + } + if (testFailsExist) { + if (noisy) { + fprintf ( stderr, + "\n" + "You can use the `bzip2recover' program to attempt to recover\n" + "data from undamaged sections of corrupted files.\n\n" + ); + } + setExit(2); + exit(exitValue); + } + } + + /* Free the argument list memory to mollify leak detectors + (eg) Purify, Checker. Serves no other useful purpose. + */ + aa = argList; + while (aa != NULL) { + Cell* aa2 = aa->link; + if (aa->name != NULL) free(aa->name); + free(aa); + aa = aa2; + } + + return exitValue; +} + + +/*-----------------------------------------------------------*/ +/*--- end bzip2.c ---*/ +/*-----------------------------------------------------------*/ diff --git a/pit/suse-bzip2/bzlib.c b/pit/suse-bzip2/bzlib.c new file mode 100644 index 0000000..bd358a7 --- /dev/null +++ b/pit/suse-bzip2/bzlib.c @@ -0,0 +1,1572 @@ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + +/* CHANGES + 0.9.0 -- original version. + 0.9.0a/b -- no changes in this file. + 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + fixed bzWrite/bzRead to ignore zero-length requests. + fixed bzread to correctly handle read requests after EOF. + wrong parameter order in call to bzDecompressInit in + bzBuffToBuffDecompress. Fixed. +*/ + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + + +/*---------------------------------------------------*/ +#ifndef BZ_NO_STDIO +void BZ2_bz__AssertH__fail ( int errcode ) +{ + fprintf(stderr, + "\n\nbzip2/libbzip2: internal error number %d.\n" + "This is a bug in bzip2/libbzip2, %s.\n" + "Please report it to me at: jseward@bzip.org. If this happened\n" + "when you were using some program which uses libbzip2 as a\n" + "component, you should also report this bug to the author(s)\n" + "of that program. Please make an effort to report this bug;\n" + "timely and accurate bug reports eventually lead to higher\n" + "quality software. Thanks. Julian Seward, 10 December 2007.\n\n", + errcode, + BZ2_bzlibVersion() + ); + + if (errcode == 1007) { + fprintf(stderr, + "\n*** A special note about internal error number 1007 ***\n" + "\n" + "Experience suggests that a common cause of i.e. 1007\n" + "is unreliable memory or other hardware. The 1007 assertion\n" + "just happens to cross-check the results of huge numbers of\n" + "memory reads/writes, and so acts (unintendedly) as a stress\n" + "test of your memory system.\n" + "\n" + "I suggest the following: try compressing the file again,\n" + "possibly monitoring progress in detail with the -vv flag.\n" + "\n" + "* If the error cannot be reproduced, and/or happens at different\n" + " points in compression, you may have a flaky memory system.\n" + " Try a memory-test program. I have used Memtest86\n" + " (www.memtest86.com). At the time of writing it is free (GPLd).\n" + " Memtest86 tests memory much more thorougly than your BIOSs\n" + " power-on test, and may find failures that the BIOS doesn't.\n" + "\n" + "* If the error can be repeatably reproduced, this is a bug in\n" + " bzip2, and I would very much like to hear about it. Please\n" + " let me know, and, ideally, save a copy of the file causing the\n" + " problem -- without which I will be unable to investigate it.\n" + "\n" + ); + } + + exit(3); +} +#endif + + +/*---------------------------------------------------*/ +static +int bz_config_ok ( void ) +{ + if (sizeof(int) != 4) return 0; + if (sizeof(short) != 2) return 0; + if (sizeof(char) != 1) return 0; + return 1; +} + + +/*---------------------------------------------------*/ +static +void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) +{ + void* v = malloc ( items * size ); + return v; +} + +static +void default_bzfree ( void* opaque, void* addr ) +{ + if (addr != NULL) free ( addr ); +} + + +/*---------------------------------------------------*/ +static +void prepare_new_block ( EState* s ) +{ + Int32 i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC ( s->blockCRC ); + for (i = 0; i < 256; i++) s->inUse[i] = False; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +void init_RL ( EState* s ) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +Bool isempty_RL ( EState* s ) +{ + if (s->state_in_ch < 256 && s->state_in_len > 0) + return False; else + return True; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompressInit) + ( bz_stream* strm, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 n; + EState* s; + + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + + if (strm == NULL || + blockSize100k < 1 || blockSize100k > 9 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; + if (strm->bzfree == NULL) strm->bzfree = default_bzfree; + + s = BZALLOC( sizeof(EState) ); + if (s == NULL) return BZ_MEM_ERROR; + s->strm = strm; + + s->arr1 = NULL; + s->arr2 = NULL; + s->ftab = NULL; + + n = 100000 * blockSize100k; + s->arr1 = BZALLOC( n * sizeof(UInt32) ); + s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); + s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); + + if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + if (s != NULL) BZFREE(s); + return BZ_MEM_ERROR; + } + + s->blockNo = 0; + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->combinedCRC = 0; + s->blockSize100k = blockSize100k; + s->nblockMAX = 100000 * blockSize100k - 19; + s->verbosity = verbosity; + s->workFactor = workFactor; + + s->block = (UChar*)s->arr2; + s->mtfv = (UInt16*)s->arr1; + s->zbits = NULL; + s->ptr = (UInt32*)s->arr1; + + strm->state = s; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; + init_RL ( s ); + prepare_new_block ( s ); + return BZ_OK; +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block ( EState* s ) +{ + Int32 i; + UChar ch = (UChar)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC( s->blockCRC, ch ); + } + s->inUse[s->state_in_ch] = True; + switch (s->state_in_len) { + case 1: + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 2: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 3: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len-4] = True; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = ((UChar)(s->state_in_len-4)); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL ( EState* s ) +{ + if (s->state_in_ch < 256) add_pair_to_block ( s ); + init_RL ( s ); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ +{ \ + UInt32 zchh = (UInt32)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && \ + zs->state_in_len == 1) { \ + UChar ch = (UChar)(zs->state_in_ch); \ + BZ_UPDATE_CRC( zs->blockCRC, ch ); \ + zs->inUse[zs->state_in_ch] = True; \ + zs->block[zs->nblock] = (UChar)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || \ + zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block ( zs ); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +Bool copy_input_until_stop ( EState* s ) +{ + Bool progress_in = False; + + if (s->mode == BZ_M_RUNNING) { + + /*-- fast track the common case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; + } + + } else { + + /*-- general, uncommon case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- flush/finish end? --*/ + if (s->avail_in_expect == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; + s->avail_in_expect--; + } + } + return progress_in; +} + + +/*---------------------------------------------------*/ +static +Bool copy_output_until_stop ( EState* s ) +{ + Bool progress_out = False; + + while (True) { + + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + progress_out = True; + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + return progress_out; +} + + +/*---------------------------------------------------*/ +static +Bool handle_compress ( bz_stream* strm ) +{ + Bool progress_in = False; + Bool progress_out = False; + EState* s = strm->state; + + while (True) { + + if (s->state == BZ_S_OUTPUT) { + progress_out |= copy_output_until_stop ( s ); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + prepare_new_block ( s ); + s->state = BZ_S_INPUT; + if (s->mode == BZ_M_FLUSHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + } + + if (s->state == BZ_S_INPUT) { + progress_in |= copy_input_until_stop ( s ); + if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + flush_RL ( s ); + BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); + s->state = BZ_S_OUTPUT; + } + else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock ( s, False ); + s->state = BZ_S_OUTPUT; + } + else + if (s->strm->avail_in == 0) { + break; + } + } + + } + + return progress_in || progress_out; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) +{ + Bool progress; + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + preswitch: + switch (s->mode) { + + case BZ_M_IDLE: + return BZ_SEQUENCE_ERROR; + + case BZ_M_RUNNING: + if (action == BZ_RUN) { + progress = handle_compress ( strm ); + return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; + } + else + if (action == BZ_FLUSH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto preswitch; + } + else + if (action == BZ_FINISH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto preswitch; + } + else + return BZ_PARAM_ERROR; + + case BZ_M_FLUSHING: + if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; + + case BZ_M_FINISHING: + if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (!progress) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FINISH_OK; + s->mode = BZ_M_IDLE; + return BZ_STREAM_END; + } + return BZ_OK; /*--not reached--*/ +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) +{ + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + BZFREE(strm->state); + + strm->state = NULL; + + return BZ_OK; +} + + +/*---------------------------------------------------*/ +/*--- Decompression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompressInit) + ( bz_stream* strm, + int verbosity, + int small ) +{ + DState* s; + + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + + if (strm == NULL) return BZ_PARAM_ERROR; + if (small != 0 && small != 1) return BZ_PARAM_ERROR; + if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; + + if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; + if (strm->bzfree == NULL) strm->bzfree = default_bzfree; + + s = BZALLOC( sizeof(DState) ); + if (s == NULL) return BZ_MEM_ERROR; + s->strm = strm; + strm->state = s; + s->state = BZ_X_MAGIC_1; + s->bsLive = 0; + s->bsBuff = 0; + s->calculatedCombinedCRC = 0; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; + s->smallDecompress = (Bool)small; + s->ll4 = NULL; + s->ll16 = NULL; + s->tt = NULL; + s->currBlockNo = 0; + s->verbosity = verbosity; + + return BZ_OK; +} + + +/*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ +static +Bool unRLE_obuf_to_output_FAST ( DState* s ) +{ + UChar k1; + + if (s->blockRandomised) { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; + s->k0 ^= BZ_RAND_MASK; s->nblock_used++; + } + + } else { + + /* restore */ + UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC; + UChar c_state_out_ch = s->state_out_ch; + Int32 c_state_out_len = s->state_out_len; + Int32 c_nblock_used = s->nblock_used; + Int32 c_k0 = s->k0; + UInt32* c_tt = s->tt; + UInt32 c_tPos = s->tPos; + char* cs_next_out = s->strm->next_out; + unsigned int cs_avail_out = s->strm->avail_out; + Int32 ro_blockSize100k = s->blockSize100k; + /* end restore */ + + UInt32 avail_out_INIT = cs_avail_out; + Int32 s_save_nblockPP = s->save_nblock+1; + unsigned int total_out_lo32_old; + + while (True) { + + /* try to finish existing run */ + if (c_state_out_len > 0) { + while (True) { + if (cs_avail_out == 0) goto return_notr; + if (c_state_out_len == 1) break; + *( (UChar*)(cs_next_out) ) = c_state_out_ch; + BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); + c_state_out_len--; + cs_next_out++; + cs_avail_out--; + } + s_state_out_len_eq_one: + { + if (cs_avail_out == 0) { + c_state_out_len = 1; goto return_notr; + }; + *( (UChar*)(cs_next_out) ) = c_state_out_ch; + BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); + cs_next_out++; + cs_avail_out--; + } + } + /* Only caused by corrupt data stream? */ + if (c_nblock_used > s_save_nblockPP) + return True; + + /* can a new run be started? */ + if (c_nblock_used == s_save_nblockPP) { + c_state_out_len = 0; goto return_notr; + }; + c_state_out_ch = c_k0; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (k1 != c_k0) { + c_k0 = k1; goto s_state_out_len_eq_one; + }; + if (c_nblock_used == s_save_nblockPP) + goto s_state_out_len_eq_one; + + c_state_out_len = 2; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (c_nblock_used == s_save_nblockPP) continue; + if (k1 != c_k0) { c_k0 = k1; continue; }; + + c_state_out_len = 3; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (c_nblock_used == s_save_nblockPP) continue; + if (k1 != c_k0) { c_k0 = k1; continue; }; + + BZ_GET_FAST_C(k1); c_nblock_used++; + c_state_out_len = ((Int32)k1) + 4; + BZ_GET_FAST_C(c_k0); c_nblock_used++; + } + + return_notr: + total_out_lo32_old = s->strm->total_out_lo32; + s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); + if (s->strm->total_out_lo32 < total_out_lo32_old) + s->strm->total_out_hi32++; + + /* save */ + s->calculatedBlockCRC = c_calculatedBlockCRC; + s->state_out_ch = c_state_out_ch; + s->state_out_len = c_state_out_len; + s->nblock_used = c_nblock_used; + s->k0 = c_k0; + s->tt = c_tt; + s->tPos = c_tPos; + s->strm->next_out = cs_next_out; + s->strm->avail_out = cs_avail_out; + /* end save */ + } + return False; +} + + + +/*---------------------------------------------------*/ +__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) +{ + Int32 nb, na, mid; + nb = 0; + na = 256; + do { + mid = (nb + na) >> 1; + if (indx >= cftab[mid]) nb = mid; else na = mid; + } + while (na - nb != 1); + return nb; +} + + +/*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ +static +Bool unRLE_obuf_to_output_SMALL ( DState* s ) +{ + UChar k1; + + if (s->blockRandomised) { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; + s->k0 ^= BZ_RAND_MASK; s->nblock_used++; + } + + } else { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_SMALL(k1); s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_SMALL(s->k0); s->nblock_used++; + } + + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) +{ + Bool corrupt; + DState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + while (True) { + if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; + if (s->state == BZ_X_OUTPUT) { + if (s->smallDecompress) + corrupt = unRLE_obuf_to_output_SMALL ( s ); else + corrupt = unRLE_obuf_to_output_FAST ( s ); + if (corrupt) return BZ_DATA_ERROR; + if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { + BZ_FINALISE_CRC ( s->calculatedBlockCRC ); + if (s->verbosity >= 3) + VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, + s->calculatedBlockCRC ); + if (s->verbosity >= 2) VPrintf0 ( "]" ); + if (s->calculatedBlockCRC != s->storedBlockCRC) + return BZ_DATA_ERROR; + s->calculatedCombinedCRC + = (s->calculatedCombinedCRC << 1) | + (s->calculatedCombinedCRC >> 31); + s->calculatedCombinedCRC ^= s->calculatedBlockCRC; + s->state = BZ_X_BLKHDR_1; + } else { + return BZ_OK; + } + } + if (s->state >= BZ_X_MAGIC_1) { + Int32 r = BZ2_decompress ( s ); + if (r == BZ_STREAM_END) { + if (s->verbosity >= 3) + VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", + s->storedCombinedCRC, s->calculatedCombinedCRC ); + if (s->calculatedCombinedCRC != s->storedCombinedCRC) + return BZ_DATA_ERROR; + return r; + } + if (s->state != BZ_X_OUTPUT) return r; + } + } + + AssertH ( 0, 6001 ); + + return 0; /*NOTREACHED*/ +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) +{ + DState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->tt != NULL) BZFREE(s->tt); + if (s->ll16 != NULL) BZFREE(s->ll16); + if (s->ll4 != NULL) BZFREE(s->ll4); + + BZFREE(strm->state); + strm->state = NULL; + + return BZ_OK; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ +/*--- File I/O stuff ---*/ +/*---------------------------------------------------*/ + +#define BZ_SETERR(eee) \ +{ \ + if (bzerror != NULL) *bzerror = eee; \ + if (bzf != NULL) bzf->lastErr = eee; \ +} + +typedef + struct { + FILE* handle; + Char buf[BZ_MAX_UNUSED]; + Int32 bufN; + Bool writing; + bz_stream strm; + Int32 lastErr; + Bool initialisedOk; + } + bzFile; + + +/*---------------------------------------------*/ +static Bool myfeof ( FILE* f ) +{ + Int32 c = fgetc ( f ); + if (c == EOF) return True; + ungetc ( c, f ); + return False; +} + + +/*---------------------------------------------------*/ +BZFILE* BZ_API(BZ2_bzWriteOpen) + ( int* bzerror, + FILE* f, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 ret; + bzFile* bzf = NULL; + + BZ_SETERR(BZ_OK); + + if (f == NULL || + (blockSize100k < 1 || blockSize100k > 9) || + (workFactor < 0 || workFactor > 250) || + (verbosity < 0 || verbosity > 4)) + { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; + + if (ferror(f)) + { BZ_SETERR(BZ_IO_ERROR); return NULL; }; + + bzf = malloc ( sizeof(bzFile) ); + if (bzf == NULL) + { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; + + BZ_SETERR(BZ_OK); + bzf->initialisedOk = False; + bzf->bufN = 0; + bzf->handle = f; + bzf->writing = True; + bzf->strm.bzalloc = NULL; + bzf->strm.bzfree = NULL; + bzf->strm.opaque = NULL; + + if (workFactor == 0) workFactor = 30; + ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) + { BZ_SETERR(ret); free(bzf); return NULL; }; + + bzf->strm.avail_in = 0; + bzf->initialisedOk = True; + return bzf; +} + + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzWrite) + ( int* bzerror, + BZFILE* b, + void* buf, + int len ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + if (bzf == NULL || buf == NULL || len < 0) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (len == 0) + { BZ_SETERR(BZ_OK); return; }; + + bzf->strm.avail_in = len; + bzf->strm.next_in = buf; + + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); + if (ret != BZ_RUN_OK) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (bzf->strm.avail_in == 0) + { BZ_SETERR(BZ_OK); return; }; + } +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzWriteClose) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out ) +{ + BZ2_bzWriteClose64 ( bzerror, b, abandon, + nbytes_in, NULL, nbytes_out, NULL ); +} + + +void BZ_API(BZ2_bzWriteClose64) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + if (bzf == NULL) + { BZ_SETERR(BZ_OK); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; + if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; + if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; + if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; + + if ((!abandon) && bzf->lastErr == BZ_OK) { + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); + if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (ret == BZ_STREAM_END) break; + } + } + + if ( !abandon && !ferror ( bzf->handle ) ) { + fflush ( bzf->handle ); + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (nbytes_in_lo32 != NULL) + *nbytes_in_lo32 = bzf->strm.total_in_lo32; + if (nbytes_in_hi32 != NULL) + *nbytes_in_hi32 = bzf->strm.total_in_hi32; + if (nbytes_out_lo32 != NULL) + *nbytes_out_lo32 = bzf->strm.total_out_lo32; + if (nbytes_out_hi32 != NULL) + *nbytes_out_hi32 = bzf->strm.total_out_hi32; + + BZ_SETERR(BZ_OK); + BZ2_bzCompressEnd ( &(bzf->strm) ); + free ( bzf ); +} + + +/*---------------------------------------------------*/ +BZFILE* BZ_API(BZ2_bzReadOpen) + ( int* bzerror, + FILE* f, + int verbosity, + int small, + void* unused, + int nUnused ) +{ + bzFile* bzf = NULL; + int ret; + + BZ_SETERR(BZ_OK); + + if (f == NULL || + (small != 0 && small != 1) || + (verbosity < 0 || verbosity > 4) || + (unused == NULL && nUnused != 0) || + (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) + { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; + + if (ferror(f)) + { BZ_SETERR(BZ_IO_ERROR); return NULL; }; + + bzf = malloc ( sizeof(bzFile) ); + if (bzf == NULL) + { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; + + BZ_SETERR(BZ_OK); + + bzf->initialisedOk = False; + bzf->handle = f; + bzf->bufN = 0; + bzf->writing = False; + bzf->strm.bzalloc = NULL; + bzf->strm.bzfree = NULL; + bzf->strm.opaque = NULL; + + while (nUnused > 0) { + bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; + unused = ((void*)( 1 + ((UChar*)(unused)) )); + nUnused--; + } + + ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); + if (ret != BZ_OK) + { BZ_SETERR(ret); free(bzf); return NULL; }; + + bzf->strm.avail_in = bzf->bufN; + bzf->strm.next_in = bzf->buf; + + bzf->initialisedOk = True; + return bzf; +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) +{ + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + if (bzf == NULL) + { BZ_SETERR(BZ_OK); return; }; + + if (bzf->writing) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + + if (bzf->initialisedOk) + (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); + free ( bzf ); +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzRead) + ( int* bzerror, + BZFILE* b, + void* buf, + int len ) +{ + Int32 n, ret; + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + + if (bzf == NULL || buf == NULL || len < 0) + { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; + + if (bzf->writing) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; + + if (len == 0) + { BZ_SETERR(BZ_OK); return 0; }; + + bzf->strm.avail_out = len; + bzf->strm.next_out = buf; + + while (True) { + + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return 0; }; + + if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { + n = fread ( bzf->buf, sizeof(UChar), + BZ_MAX_UNUSED, bzf->handle ); + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return 0; }; + bzf->bufN = n; + bzf->strm.avail_in = bzf->bufN; + bzf->strm.next_in = bzf->buf; + } + + ret = BZ2_bzDecompress ( &(bzf->strm) ); + + if (ret != BZ_OK && ret != BZ_STREAM_END) + { BZ_SETERR(ret); return 0; }; + + if (ret == BZ_OK && myfeof(bzf->handle) && + bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) + { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; + + if (ret == BZ_STREAM_END) + { BZ_SETERR(BZ_STREAM_END); + return len - bzf->strm.avail_out; }; + if (bzf->strm.avail_out == 0) + { BZ_SETERR(BZ_OK); return len; }; + + } + + return 0; /*not reached*/ +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzReadGetUnused) + ( int* bzerror, + BZFILE* b, + void** unused, + int* nUnused ) +{ + bzFile* bzf = (bzFile*)b; + if (bzf == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (bzf->lastErr != BZ_STREAM_END) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (unused == NULL || nUnused == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + + BZ_SETERR(BZ_OK); + *nUnused = bzf->strm.avail_in; + *unused = bzf->strm.next_in; +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzBuffToBuffCompress) + ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor ) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + blockSize100k < 1 || blockSize100k > 9 || + verbosity < 0 || verbosity > 4 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + strm.bzalloc = NULL; + strm.bzfree = NULL; + strm.opaque = NULL; + ret = BZ2_bzCompressInit ( &strm, blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) return ret; + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress ( &strm, BZ_FINISH ); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd ( &strm ); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd ( &strm ); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd ( &strm ); + return ret; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzBuffToBuffDecompress) + ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity ) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + (small != 0 && small != 1) || + verbosity < 0 || verbosity > 4) + return BZ_PARAM_ERROR; + + strm.bzalloc = NULL; + strm.bzfree = NULL; + strm.opaque = NULL; + ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); + if (ret != BZ_OK) return ret; + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzDecompress ( &strm ); + if (ret == BZ_OK) goto output_overflow_or_eof; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzDecompressEnd ( &strm ); + return BZ_OK; + + output_overflow_or_eof: + if (strm.avail_out > 0) { + BZ2_bzDecompressEnd ( &strm ); + return BZ_UNEXPECTED_EOF; + } else { + BZ2_bzDecompressEnd ( &strm ); + return BZ_OUTBUFF_FULL; + }; + + errhandler: + BZ2_bzDecompressEnd ( &strm ); + return ret; +} + + +/*---------------------------------------------------*/ +/*-- + Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +/*-- + return version like "0.9.5d, 4-Sept-1999". +--*/ +const char * BZ_API(BZ2_bzlibVersion)(void) +{ + return BZ_VERSION; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ + +#if defined(_WIN32) || defined(OS2) || defined(MSDOS) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif +static +BZFILE * bzopen_or_bzdopen + ( const char *path, /* no use when bzdopen */ + int fd, /* no use when bzdopen */ + const char *mode, + int open_mode) /* bzopen: 0, bzdopen:1 */ +{ + int bzerr; + char unused[BZ_MAX_UNUSED]; + int blockSize100k = 9; + int writing = 0; + char mode2[10] = ""; + FILE *fp = NULL; + BZFILE *bzfp = NULL; + int verbosity = 0; + int workFactor = 30; + int smallMode = 0; + int nUnused = 0; + + if (mode == NULL) return NULL; + while (*mode) { + switch (*mode) { + case 'r': + writing = 0; break; + case 'w': + writing = 1; break; + case 's': + smallMode = 1; break; + default: + if (isdigit((int)(*mode))) { + blockSize100k = *mode-BZ_HDR_0; + } + } + mode++; + } + strcat(mode2, writing ? "w" : "r" ); + strcat(mode2,"b"); /* binary mode */ + + if (open_mode==0) { + if (path==NULL || strcmp(path,"")==0) { + fp = (writing ? stdout : stdin); + SET_BINARY_MODE(fp); + } else { + fp = fopen(path,mode2); + } + } else { +#ifdef BZ_STRICT_ANSI + fp = NULL; +#else + fp = fdopen(fd,mode2); +#endif + } + if (fp == NULL) return NULL; + + if (writing) { + /* Guard against total chaos and anarchy -- JRS */ + if (blockSize100k < 1) blockSize100k = 1; + if (blockSize100k > 9) blockSize100k = 9; + bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, + verbosity,workFactor); + } else { + bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, + unused,nUnused); + } + if (bzfp == NULL) { + if (fp != stdin && fp != stdout) fclose(fp); + return NULL; + } + return bzfp; +} + + +/*---------------------------------------------------*/ +/*-- + open file for read or write. + ex) bzopen("file","w9") + case path="" or NULL => use stdin or stdout. +--*/ +BZFILE * BZ_API(BZ2_bzopen) + ( const char *path, + const char *mode ) +{ + return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); +} + + +/*---------------------------------------------------*/ +BZFILE * BZ_API(BZ2_bzdopen) + ( int fd, + const char *mode ) +{ + return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) +{ + int bzerr, nread; + if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; + nread = BZ2_bzRead(&bzerr,b,buf,len); + if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { + return nread; + } else { + return -1; + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) +{ + int bzerr; + + BZ2_bzWrite(&bzerr,b,buf,len); + if(bzerr == BZ_OK){ + return len; + }else{ + return -1; + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzflush) (BZFILE *b) +{ + /* do nothing now... */ + return 0; +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzclose) (BZFILE* b) +{ + int bzerr; + FILE *fp; + + if (b==NULL) {return;} + fp = ((bzFile *)b)->handle; + if(((bzFile*)b)->writing){ + BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); + if(bzerr != BZ_OK){ + BZ2_bzWriteClose(NULL,b,1,NULL,NULL); + } + }else{ + BZ2_bzReadClose(&bzerr,b); + } + if(fp!=stdin && fp!=stdout){ + fclose(fp); + } +} + + +/*---------------------------------------------------*/ +/*-- + return last error code +--*/ +static const char *bzerrorstrings[] = { + "OK" + ,"SEQUENCE_ERROR" + ,"PARAM_ERROR" + ,"MEM_ERROR" + ,"DATA_ERROR" + ,"DATA_ERROR_MAGIC" + ,"IO_ERROR" + ,"UNEXPECTED_EOF" + ,"OUTBUFF_FULL" + ,"CONFIG_ERROR" + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ +}; + + +const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) +{ + int err = ((bzFile *)b)->lastErr; + + if(err>0) err = 0; + *errnum = err; + return bzerrorstrings[err*-1]; +} +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/bzlib.h b/pit/suse-bzip2/bzlib.h new file mode 100644 index 0000000..8277123 --- /dev/null +++ b/pit/suse-bzip2/bzlib.h @@ -0,0 +1,282 @@ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#ifndef _BZLIB_H +#define _BZLIB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + +typedef + struct { + char *next_in; + unsigned int avail_in; + unsigned int total_in_lo32; + unsigned int total_in_hi32; + + char *next_out; + unsigned int avail_out; + unsigned int total_out_lo32; + unsigned int total_out_hi32; + + void *state; + + void *(*bzalloc)(void *,int,int); + void (*bzfree)(void *,void *); + void *opaque; + } + bz_stream; + + +#ifndef BZ_IMPORT +#define BZ_EXPORT +#endif + +#ifndef BZ_NO_STDIO +/* Need a definitition for FILE */ +#include +#endif + +#ifdef _WIN32 +# include +# ifdef small + /* windows.h define small to char */ +# undef small +# endif +# ifdef BZ_EXPORT +# define BZ_API(func) WINAPI func +# define BZ_EXTERN extern +# else + /* import windows dll dynamically */ +# define BZ_API(func) (WINAPI * func) +# define BZ_EXTERN +# endif +#else +# define BZ_API(func) func +# define BZ_EXTERN extern +#endif + + +/*-- Core (low-level) library functions --*/ + +BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( + bz_stream* strm, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN int BZ_API(BZ2_bzCompress) ( + bz_stream* strm, + int action + ); + +BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( + bz_stream* strm + ); + +BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( + bz_stream *strm, + int verbosity, + int small + ); + +BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( + bz_stream* strm + ); + +BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( + bz_stream *strm + ); + + + +/*-- High(er) level library functions --*/ + +#ifndef BZ_NO_STDIO +#define BZ_MAX_UNUSED 5000 + +typedef void BZFILE; + +BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( + int* bzerror, + FILE* f, + int verbosity, + int small, + void* unused, + int nUnused + ); + +BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( + int* bzerror, + BZFILE* b + ); + +BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( + int* bzerror, + BZFILE* b, + void** unused, + int* nUnused + ); + +BZ_EXTERN int BZ_API(BZ2_bzRead) ( + int* bzerror, + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( + int* bzerror, + FILE* f, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN void BZ_API(BZ2_bzWrite) ( + int* bzerror, + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out + ); + +BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 + ); +#endif + + +/*-- Utility functions --*/ + +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( + char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( + char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity + ); + + +/*-- + Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ + +BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( + void + ); + +#ifndef BZ_NO_STDIO +BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( + const char *path, + const char *mode + ); + +BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( + int fd, + const char *mode + ); + +BZ_EXTERN int BZ_API(BZ2_bzread) ( + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN int BZ_API(BZ2_bzwrite) ( + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN int BZ_API(BZ2_bzflush) ( + BZFILE* b + ); + +BZ_EXTERN void BZ_API(BZ2_bzclose) ( + BZFILE* b + ); + +BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( + BZFILE *b, + int *errnum + ); +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/bzlib_private.h b/pit/suse-bzip2/bzlib_private.h new file mode 100644 index 0000000..5d0217f --- /dev/null +++ b/pit/suse-bzip2/bzlib_private.h @@ -0,0 +1,509 @@ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#ifndef _BZLIB_PRIVATE_H +#define _BZLIB_PRIVATE_H + +#include + +#ifndef BZ_NO_STDIO +#include +#include +#include +#endif + +#include "bzlib.h" + + + +/*-- General stuff. --*/ + +#define BZ_VERSION "1.0.6, 6-Sept-2010" + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +#ifndef __GNUC__ +#define __inline__ /* */ +#endif + +#ifndef BZ_NO_STDIO + +extern void BZ2_bz__AssertH__fail ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } + +#if BZ_DEBUG +#define AssertD(cond,msg) \ + { if (!(cond)) { \ + fprintf ( stderr, \ + "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ + exit(1); \ + }} +#else +#define AssertD(cond,msg) /* */ +#endif + +#define VPrintf0(zf) \ + fprintf(stderr,zf) +#define VPrintf1(zf,za1) \ + fprintf(stderr,zf,za1) +#define VPrintf2(zf,za1,za2) \ + fprintf(stderr,zf,za1,za2) +#define VPrintf3(zf,za1,za2,za3) \ + fprintf(stderr,zf,za1,za2,za3) +#define VPrintf4(zf,za1,za2,za3,za4) \ + fprintf(stderr,zf,za1,za2,za3,za4) +#define VPrintf5(zf,za1,za2,za3,za4,za5) \ + fprintf(stderr,zf,za1,za2,za3,za4,za5) + +#else + +extern void bz_internal_error ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) bz_internal_error ( errcode ); } +#define AssertD(cond,msg) do { } while (0) +#define VPrintf0(zf) do { } while (0) +#define VPrintf1(zf,za1) do { } while (0) +#define VPrintf2(zf,za1,za2) do { } while (0) +#define VPrintf3(zf,za1,za2,za3) do { } while (0) +#define VPrintf4(zf,za1,za2,za3,za4) do { } while (0) +#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0) + +#endif + + +#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) +#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + + +/*-- Stuff for randomising repetitive blocks. --*/ + +extern Int32 BZ2_rNums[512]; + +#define BZ_RAND_DECLS \ + Int32 rNToGo; \ + Int32 rTPos \ + +#define BZ_RAND_INIT_MASK \ + s->rNToGo = 0; \ + s->rTPos = 0 \ + +#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) + +#define BZ_RAND_UPD_MASK \ + if (s->rNToGo == 0) { \ + s->rNToGo = BZ2_rNums[s->rTPos]; \ + s->rTPos++; \ + if (s->rTPos == 512) s->rTPos = 0; \ + } \ + s->rNToGo--; + + + +/*-- Stuff for doing CRCs. --*/ + +extern UInt32 BZ2_crc32Table[256]; + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(crcVar,cha) \ +{ \ + crcVar = (crcVar << 8) ^ \ + BZ2_crc32Table[(crcVar >> 24) ^ \ + ((UChar)cha)]; \ +} + + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + Int32 mode; + Int32 state; + + /* remembers avail_in when flush/finish requested */ + UInt32 avail_in_expect; + + /* for doing the block sorting */ + UInt32* arr1; + UInt32* arr2; + UInt32* ftab; + Int32 origPtr; + + /* aliases for arr1 and arr2 */ + UInt32* ptr; + UChar* block; + UInt16* mtfv; + UChar* zbits; + + /* for deciding when to use the fallback sorting algorithm */ + Int32 workFactor; + + /* run-length-encoding of the input */ + UInt32 state_in_ch; + Int32 state_in_len; + BZ_RAND_DECLS; + + /* input and output limits and current posns */ + Int32 nblock; + Int32 nblockMAX; + Int32 numZ; + Int32 state_out_pos; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + UChar unseqToSeq[256]; + + /* the buffer for bit stream creation */ + UInt32 bsBuff; + Int32 bsLive; + + /* block and combined CRCs */ + UInt32 blockCRC; + UInt32 combinedCRC; + + /* misc administratium */ + Int32 verbosity; + Int32 blockNo; + Int32 blockSize100k; + + /* stuff for coding the MTF values */ + Int32 nMTF; + Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + /* second dimension: only 3 needed; 4 makes index calculations faster */ + UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; + + } + EState; + + + +/*-- externs for compression. --*/ + +extern void +BZ2_blockSort ( EState* ); + +extern void +BZ2_compressBlock ( EState*, Bool ); + +extern void +BZ2_bsInitWrite ( EState* ); + +extern void +BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); + +extern void +BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); + + + +/*-- states for decompression. --*/ + +#define BZ_X_IDLE 1 +#define BZ_X_OUTPUT 2 + +#define BZ_X_MAGIC_1 10 +#define BZ_X_MAGIC_2 11 +#define BZ_X_MAGIC_3 12 +#define BZ_X_MAGIC_4 13 +#define BZ_X_BLKHDR_1 14 +#define BZ_X_BLKHDR_2 15 +#define BZ_X_BLKHDR_3 16 +#define BZ_X_BLKHDR_4 17 +#define BZ_X_BLKHDR_5 18 +#define BZ_X_BLKHDR_6 19 +#define BZ_X_BCRC_1 20 +#define BZ_X_BCRC_2 21 +#define BZ_X_BCRC_3 22 +#define BZ_X_BCRC_4 23 +#define BZ_X_RANDBIT 24 +#define BZ_X_ORIGPTR_1 25 +#define BZ_X_ORIGPTR_2 26 +#define BZ_X_ORIGPTR_3 27 +#define BZ_X_MAPPING_1 28 +#define BZ_X_MAPPING_2 29 +#define BZ_X_SELECTOR_1 30 +#define BZ_X_SELECTOR_2 31 +#define BZ_X_SELECTOR_3 32 +#define BZ_X_CODING_1 33 +#define BZ_X_CODING_2 34 +#define BZ_X_CODING_3 35 +#define BZ_X_MTF_1 36 +#define BZ_X_MTF_2 37 +#define BZ_X_MTF_3 38 +#define BZ_X_MTF_4 39 +#define BZ_X_MTF_5 40 +#define BZ_X_MTF_6 41 +#define BZ_X_ENDHDR_2 42 +#define BZ_X_ENDHDR_3 43 +#define BZ_X_ENDHDR_4 44 +#define BZ_X_ENDHDR_5 45 +#define BZ_X_ENDHDR_6 46 +#define BZ_X_CCRC_1 47 +#define BZ_X_CCRC_2 48 +#define BZ_X_CCRC_3 49 +#define BZ_X_CCRC_4 50 + + + +/*-- Constants for the fast MTF decoder. --*/ + +#define MTFA_SIZE 4096 +#define MTFL_SIZE 16 + + + +/*-- Structure holding all the decompression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* state indicator for this stream */ + Int32 state; + + /* for doing the final run-length decoding */ + UChar state_out_ch; + Int32 state_out_len; + Bool blockRandomised; + BZ_RAND_DECLS; + + /* the buffer for bit stream reading */ + UInt32 bsBuff; + Int32 bsLive; + + /* misc administratium */ + Int32 blockSize100k; + Bool smallDecompress; + Int32 currBlockNo; + Int32 verbosity; + + /* for undoing the Burrows-Wheeler transform */ + Int32 origPtr; + UInt32 tPos; + Int32 k0; + Int32 unzftab[256]; + Int32 nblock_used; + Int32 cftab[257]; + Int32 cftabCopy[257]; + + /* for undoing the Burrows-Wheeler transform (FAST) */ + UInt32 *tt; + + /* for undoing the Burrows-Wheeler transform (SMALL) */ + UInt16 *ll16; + UChar *ll4; + + /* stored and calculated CRCs */ + UInt32 storedBlockCRC; + UInt32 storedCombinedCRC; + UInt32 calculatedBlockCRC; + UInt32 calculatedCombinedCRC; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + Bool inUse16[16]; + UChar seqToUnseq[256]; + + /* for decoding the MTF values */ + UChar mtfa [MTFA_SIZE]; + Int32 mtfbase[256 / MTFL_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 minLens[BZ_N_GROUPS]; + + /* save area for scalars in the main decompress code */ + Int32 save_i; + Int32 save_j; + Int32 save_t; + Int32 save_alphaSize; + Int32 save_nGroups; + Int32 save_nSelectors; + Int32 save_EOB; + Int32 save_groupNo; + Int32 save_groupPos; + Int32 save_nextSym; + Int32 save_nblockMAX; + Int32 save_nblock; + Int32 save_es; + Int32 save_N; + Int32 save_curr; + Int32 save_zt; + Int32 save_zn; + Int32 save_zvec; + Int32 save_zj; + Int32 save_gSel; + Int32 save_gMinlen; + Int32* save_gLimit; + Int32* save_gBase; + Int32* save_gPerm; + + } + DState; + + + +/*-- Macros for decompression. --*/ + +#define BZ_GET_FAST(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ + s->tPos = s->tt[s->tPos]; \ + cccc = (UChar)(s->tPos & 0xff); \ + s->tPos >>= 8; + +#define BZ_GET_FAST_C(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \ + c_tPos = c_tt[c_tPos]; \ + cccc = (UChar)(c_tPos & 0xff); \ + c_tPos >>= 8; + +#define SET_LL4(i,n) \ + { if (((i) & 0x1) == 0) \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ + } + +#define GET_LL4(i) \ + ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) + +#define SET_LL(i,n) \ + { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ + SET_LL4(i, n >> 16); \ + } + +#define GET_LL(i) \ + (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) + +#define BZ_GET_SMALL(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ + cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ + s->tPos = GET_LL(s->tPos); + + +/*-- externs for decompression. --*/ + +extern Int32 +BZ2_indexIntoF ( Int32, Int32* ); + +extern Int32 +BZ2_decompress ( DState* ); + +extern void +BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, + Int32, Int32, Int32 ); + + +#endif + + +/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ + +#ifdef BZ_NO_STDIO +#ifndef NULL +#define NULL 0 +#endif +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/compress.c b/pit/suse-bzip2/compress.c new file mode 100644 index 0000000..28f8558 --- /dev/null +++ b/pit/suse-bzip2/compress.c @@ -0,0 +1,672 @@ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +/* CHANGES + 0.9.0 -- original version. + 0.9.0a/b -- no changes in this file. + 0.9.0c -- changed setting of nGroups in sendMTFValues() + so as to do a bit better on small files +*/ + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +void BZ2_bsInitWrite ( EState* s ) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static +void bsFinishWrite ( EState* s ) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +#define bsNEEDW(nz) \ +{ \ + while (s->bsLive >= 8) { \ + s->zbits[s->numZ] \ + = (UChar)(s->bsBuff >> 24); \ + s->numZ++; \ + s->bsBuff <<= 8; \ + s->bsLive -= 8; \ + } \ +} + + +/*---------------------------------------------------*/ +static +__inline__ +void bsW ( EState* s, Int32 n, UInt32 v ) +{ + bsNEEDW ( n ); + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutUInt32 ( EState* s, UInt32 u ) +{ + bsW ( s, 8, (u >> 24) & 0xffL ); + bsW ( s, 8, (u >> 16) & 0xffL ); + bsW ( s, 8, (u >> 8) & 0xffL ); + bsW ( s, 8, u & 0xffL ); +} + + +/*---------------------------------------------------*/ +static +void bsPutUChar ( EState* s, UChar c ) +{ + bsW( s, 8, (UInt32)c ); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e ( EState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +static +void generateMTFValues ( EState* s ) +{ + UChar yy[256]; + Int32 i, j; + Int32 zPend; + Int32 wr; + Int32 EOB; + + /* + After sorting (eg, here), + s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, + and + ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] + holds the original block data. + + The first thing to do is generate the MTF values, + and put them in + ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. + Because there are strictly fewer or equal MTF values + than block values, ptr values in this area are overwritten + with MTF values only when they are no longer needed. + + The final compressed bitstream is generated into the + area starting at + (UChar*) (&((UChar*)s->arr2)[s->nblock]) + + These storage aliases are set up in bzCompressInit(), + except for the last one, which is arranged in + compressBlock(). + */ + UInt32* ptr = s->ptr; + UChar* block = s->block; + UInt16* mtfv = s->mtfv; + + makeMaps_e ( s ); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; + + for (i = 0; i < s->nblock; i++) { + UChar ll_i; + AssertD ( wr <= i, "generateMTFValues(1)" ); + j = ptr[i]-1; if (j < 0) j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); + + if (yy[0] == ll_i) { + zPend++; + } else { + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + { + register UChar rtmp; + register UChar* ryy_j; + register UChar rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while ( rll_i != rtmp ) { + register UChar rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + } + + } + } + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + + mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static +void sendMTFValues ( EState* s ) +{ + Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; + Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; + Int32 nGroups, nBytes; + + /*-- + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + is a global since the decoder also needs it. + + Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + are also globals only used in this proc. + Made global to keep stack frame size small. + --*/ + + + UInt16 cost[BZ_N_GROUPS]; + Int32 fave[BZ_N_GROUPS]; + + UInt16* mtfv = s->mtfv; + + if (s->verbosity >= 3) + VPrintf3( " %d in block, %d after MTF & 1-2 coding, " + "%d+2 syms in use\n", + s->nblock, s->nMTF, s->nInUse ); + + alphaSize = s->nInUse+2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH ( s->nMTF > 0, 3001 ); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + Int32 nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs-1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups-nPart) % 2 == 1)) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + if (s->verbosity >= 3) + VPrintf5( " initial group %d, [%d .. %d], " + "has %d syms (%4.1f%%)\n", + nPart, gs, ge, aFreq, + (100.0 * (float)aFreq) / (float)(s->nMTF) ); + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge+1; + remF -= aFreq; + } + } + + /*--- + Iterate up to BZ_N_ITERS times to improve the tables. + ---*/ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + + for (t = 0; t < nGroups; t++) fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + + /*--- + Set up an auxiliary length table which is used to fast-track + the common case (nGroups == 6). + ---*/ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } + + nSelectors = 0; + totc = 0; + gs = 0; + while (True) { + + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + + /*-- + Calculate the cost of this group as coded + by each of the coding tables. + --*/ + for (t = 0; t < nGroups; t++) cost[t] = 0; + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register UInt32 cost01, cost23, cost45; + register UInt16 icv; + cost01 = cost23 = cost45 = 0; + +# define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; \ + + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); + +# undef BZ_ITER + + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + UInt16 icv = mtfv[i]; + for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; + } + } + + /*-- + Find the coding table which is best for this group, + and record its identity in the selector table. + --*/ + bc = 999999999; bt = -1; + for (t = 0; t < nGroups; t++) + if (cost[t] < bc) { bc = cost[t]; bt = t; }; + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /*-- + Increment the symbol frequencies for the selected table. + --*/ + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + +# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ + + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); + +# undef BZ_ITUR + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) + s->rfreq[bt][ mtfv[i] ]++; + } + + gs = ge+1; + } + if (s->verbosity >= 3) { + VPrintf2 ( " pass %d: size is %d, grp uses are ", + iter+1, totc/8 ); + for (t = 0; t < nGroups; t++) + VPrintf1 ( "%d ", fave[t] ); + VPrintf0 ( "\n" ); + } + + /*-- + Recompute the tables based on the accumulated frequencies. + --*/ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), + alphaSize, 20 ); + } + + + AssertH( nGroups < 8, 3002 ); + AssertH( nSelectors < 32768 && + nSelectors <= (2 + (900000 / BZ_G_SIZE)), + 3003 ); + + + /*--- Compute MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + for (i = 0; i < nGroups; i++) pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while ( ll_i != tmp ) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH ( !(maxLen > 20 ), 3004 ); + AssertH ( !(minLen < 1), 3005 ); + BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), + minLen, maxLen, alphaSize ); + } + + /*--- Transmit the mapping table. ---*/ + { + Bool inUse16[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = False; + for (j = 0; j < 16; j++) + if (s->inUse[i * 16 + j]) inUse16[i] = True; + } + + nBytes = s->numZ; + for (i = 0; i < 16; i++) + if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); + + for (i = 0; i < 16; i++) + if (inUse16[i]) + for (j = 0; j < 16; j++) { + if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); + } + + if (s->verbosity >= 3) + VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW ( s, 3, nGroups ); + bsW ( s, 15, nSelectors ); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); + bsW(s,1,0); + } + if (s->verbosity >= 3) + VPrintf1( "selectors %d, ", s->numZ-nBytes ); + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + Int32 curr = s->len[t][0]; + bsW ( s, 5, curr ); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; + bsW ( s, 1, 0 ); + } + } + + if (s->verbosity >= 3) + VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (True) { + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + AssertH ( s->selector[selCtr] < nGroups, 3006 ); + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + UInt16 mtfv_i; + UChar* s_len_sel_selCtr + = &(s->len[s->selector[selCtr]][0]); + Int32* s_code_sel_selCtr + = &(s->code[s->selector[selCtr]][0]); + +# define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW ( s, \ + s_len_sel_selCtr[mtfv_i], \ + s_code_sel_selCtr[mtfv_i] ) + + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); + +# undef BZ_ITAH + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + bsW ( s, + s->len [s->selector[selCtr]] [mtfv[i]], + s->code [s->selector[selCtr]] [mtfv[i]] ); + } + } + + + gs = ge+1; + selCtr++; + } + AssertH( selCtr == nSelectors, 3007 ); + + if (s->verbosity >= 3) + VPrintf1( "codes %d\n", s->numZ-nBytes ); +} + + +/*---------------------------------------------------*/ +void BZ2_compressBlock ( EState* s, Bool is_last_block ) +{ + if (s->nblock > 0) { + + BZ_FINALISE_CRC ( s->blockCRC ); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) s->numZ = 0; + + if (s->verbosity >= 2) + VPrintf4( " block %d: crc = 0x%08x, " + "combined CRC = 0x%08x, size = %d\n", + s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); + + BZ2_blockSort ( s ); + } + + s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite ( s ); + bsPutUChar ( s, BZ_HDR_B ); + bsPutUChar ( s, BZ_HDR_Z ); + bsPutUChar ( s, BZ_HDR_h ); + bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); + } + + if (s->nblock > 0) { + + bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); + bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); + bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutUInt32 ( s, s->blockCRC ); + + /*-- + Now a single bit indicating (non-)randomisation. + As of version 0.9.5, we use a better sorting algorithm + which makes randomisation unnecessary. So always set + the randomised bit to 'no'. Of course, the decoder + still needs to be able to handle randomised blocks + so as to maintain backwards compatibility with + older versions of bzip2. + --*/ + bsW(s,1,0); + + bsW ( s, 24, s->origPtr ); + generateMTFValues ( s ); + sendMTFValues ( s ); + } + + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + + bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); + bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); + bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); + bsPutUInt32 ( s, s->combinedCRC ); + if (s->verbosity >= 2) + VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); + bsFinishWrite ( s ); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/crctable.c b/pit/suse-bzip2/crctable.c new file mode 100644 index 0000000..1fea7e9 --- /dev/null +++ b/pit/suse-bzip2/crctable.c @@ -0,0 +1,104 @@ + +/*-------------------------------------------------------------*/ +/*--- Table for doing CRCs ---*/ +/*--- crctable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*-- + I think this is an implementation of the AUTODIN-II, + Ethernet & FDDI 32-bit CRC standard. Vaguely derived + from code by Rob Warnock, in Section 51 of the + comp.compression FAQ. +--*/ + +UInt32 BZ2_crc32Table[256] = { + + /*-- Ugly, innit? --*/ + + 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, + 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, + 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, + 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, + 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, + 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, + 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, + 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, + 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, + 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, + 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, + 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, + 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, + 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, + 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, + 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, + 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, + 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, + 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, + 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, + 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, + 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, + 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, + 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, + 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, + 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, + 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, + 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, + 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, + 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, + 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, + 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, + 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, + 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, + 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, + 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, + 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, + 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, + 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, + 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, + 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, + 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, + 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, + 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, + 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, + 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, + 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, + 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, + 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, + 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, + 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, + 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, + 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, + 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, + 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, + 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, + 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, + 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, + 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, + 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, + 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, + 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, + 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, + 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L +}; + + +/*-------------------------------------------------------------*/ +/*--- end crctable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/decompress.c b/pit/suse-bzip2/decompress.c new file mode 100644 index 0000000..311f566 --- /dev/null +++ b/pit/suse-bzip2/decompress.c @@ -0,0 +1,646 @@ + +/*-------------------------------------------------------------*/ +/*--- Decompression machinery ---*/ +/*--- decompress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +static +void makeMaps_d ( DState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->seqToUnseq[s->nInUse] = i; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +#define RETURN(rrr) \ + { retVal = rrr; goto save_state_and_return; }; + +#define GET_BITS(lll,vvv,nnn) \ + case lll: s->state = lll; \ + while (True) { \ + if (s->bsLive >= nnn) { \ + UInt32 v; \ + v = (s->bsBuff >> \ + (s->bsLive-nnn)) & ((1 << nnn)-1); \ + s->bsLive -= nnn; \ + vvv = v; \ + break; \ + } \ + if (s->strm->avail_in == 0) RETURN(BZ_OK); \ + s->bsBuff \ + = (s->bsBuff << 8) | \ + ((UInt32) \ + (*((UChar*)(s->strm->next_in)))); \ + s->bsLive += 8; \ + s->strm->next_in++; \ + s->strm->avail_in--; \ + s->strm->total_in_lo32++; \ + if (s->strm->total_in_lo32 == 0) \ + s->strm->total_in_hi32++; \ + } + +#define GET_UCHAR(lll,uuu) \ + GET_BITS(lll,uuu,8) + +#define GET_BIT(lll,uuu) \ + GET_BITS(lll,uuu,1) + +/*---------------------------------------------------*/ +#define GET_MTF_VAL(label1,label2,lval) \ +{ \ + if (groupPos == 0) { \ + groupNo++; \ + if (groupNo >= nSelectors) \ + RETURN(BZ_DATA_ERROR); \ + groupPos = BZ_G_SIZE; \ + gSel = s->selector[groupNo]; \ + gMinlen = s->minLens[gSel]; \ + gLimit = &(s->limit[gSel][0]); \ + gPerm = &(s->perm[gSel][0]); \ + gBase = &(s->base[gSel][0]); \ + } \ + groupPos--; \ + zn = gMinlen; \ + GET_BITS(label1, zvec, zn); \ + while (1) { \ + if (zn > 20 /* the longest code */) \ + RETURN(BZ_DATA_ERROR); \ + if (zvec <= gLimit[zn]) break; \ + zn++; \ + GET_BIT(label2, zj); \ + zvec = (zvec << 1) | zj; \ + }; \ + if (zvec - gBase[zn] < 0 \ + || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ + RETURN(BZ_DATA_ERROR); \ + lval = gPerm[zvec - gBase[zn]]; \ +} + + +/*---------------------------------------------------*/ +Int32 BZ2_decompress ( DState* s ) +{ + UChar uc; + Int32 retVal; + Int32 minLen, maxLen; + bz_stream* strm = s->strm; + + /* stuff that needs to be saved/restored */ + Int32 i; + Int32 j; + Int32 t; + Int32 alphaSize; + Int32 nGroups; + Int32 nSelectors; + Int32 EOB; + Int32 groupNo; + Int32 groupPos; + Int32 nextSym; + Int32 nblockMAX; + Int32 nblock; + Int32 es; + Int32 N; + Int32 curr; + Int32 zt; + Int32 zn; + Int32 zvec; + Int32 zj; + Int32 gSel; + Int32 gMinlen; + Int32* gLimit; + Int32* gBase; + Int32* gPerm; + + if (s->state == BZ_X_MAGIC_1) { + /*initialise the save area*/ + s->save_i = 0; + s->save_j = 0; + s->save_t = 0; + s->save_alphaSize = 0; + s->save_nGroups = 0; + s->save_nSelectors = 0; + s->save_EOB = 0; + s->save_groupNo = 0; + s->save_groupPos = 0; + s->save_nextSym = 0; + s->save_nblockMAX = 0; + s->save_nblock = 0; + s->save_es = 0; + s->save_N = 0; + s->save_curr = 0; + s->save_zt = 0; + s->save_zn = 0; + s->save_zvec = 0; + s->save_zj = 0; + s->save_gSel = 0; + s->save_gMinlen = 0; + s->save_gLimit = NULL; + s->save_gBase = NULL; + s->save_gPerm = NULL; + } + + /*restore from the save area*/ + i = s->save_i; + j = s->save_j; + t = s->save_t; + alphaSize = s->save_alphaSize; + nGroups = s->save_nGroups; + nSelectors = s->save_nSelectors; + EOB = s->save_EOB; + groupNo = s->save_groupNo; + groupPos = s->save_groupPos; + nextSym = s->save_nextSym; + nblockMAX = s->save_nblockMAX; + nblock = s->save_nblock; + es = s->save_es; + N = s->save_N; + curr = s->save_curr; + zt = s->save_zt; + zn = s->save_zn; + zvec = s->save_zvec; + zj = s->save_zj; + gSel = s->save_gSel; + gMinlen = s->save_gMinlen; + gLimit = s->save_gLimit; + gBase = s->save_gBase; + gPerm = s->save_gPerm; + + retVal = BZ_OK; + + switch (s->state) { + + GET_UCHAR(BZ_X_MAGIC_1, uc); + if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_UCHAR(BZ_X_MAGIC_2, uc); + if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_UCHAR(BZ_X_MAGIC_3, uc) + if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) + if (s->blockSize100k < (BZ_HDR_0 + 1) || + s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); + s->blockSize100k -= BZ_HDR_0; + + if (s->smallDecompress) { + s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); + s->ll4 = BZALLOC( + ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) + ); + if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); + } else { + s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); + if (s->tt == NULL) RETURN(BZ_MEM_ERROR); + } + + GET_UCHAR(BZ_X_BLKHDR_1, uc); + + if (uc == 0x17) goto endhdr_2; + if (uc != 0x31) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_2, uc); + if (uc != 0x41) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_3, uc); + if (uc != 0x59) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_4, uc); + if (uc != 0x26) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_5, uc); + if (uc != 0x53) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_6, uc); + if (uc != 0x59) RETURN(BZ_DATA_ERROR); + + s->currBlockNo++; + if (s->verbosity >= 2) + VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); + + s->storedBlockCRC = 0; + GET_UCHAR(BZ_X_BCRC_1, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_2, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_3, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_4, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + + GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); + + s->origPtr = 0; + GET_UCHAR(BZ_X_ORIGPTR_1, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + GET_UCHAR(BZ_X_ORIGPTR_2, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + GET_UCHAR(BZ_X_ORIGPTR_3, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + + if (s->origPtr < 0) + RETURN(BZ_DATA_ERROR); + if (s->origPtr > 10 + 100000*s->blockSize100k) + RETURN(BZ_DATA_ERROR); + + /*--- Receive the mapping table ---*/ + for (i = 0; i < 16; i++) { + GET_BIT(BZ_X_MAPPING_1, uc); + if (uc == 1) + s->inUse16[i] = True; else + s->inUse16[i] = False; + } + + for (i = 0; i < 256; i++) s->inUse[i] = False; + + for (i = 0; i < 16; i++) + if (s->inUse16[i]) + for (j = 0; j < 16; j++) { + GET_BIT(BZ_X_MAPPING_2, uc); + if (uc == 1) s->inUse[i * 16 + j] = True; + } + makeMaps_d ( s ); + if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); + alphaSize = s->nInUse+2; + + /*--- Now the selectors ---*/ + GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); + if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR); + GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); + if (nSelectors < 1) RETURN(BZ_DATA_ERROR); + for (i = 0; i < nSelectors; i++) { + j = 0; + while (True) { + GET_BIT(BZ_X_SELECTOR_3, uc); + if (uc == 0) break; + j++; + if (j >= nGroups) RETURN(BZ_DATA_ERROR); + } + s->selectorMtf[i] = j; + } + + /*--- Undo the MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], tmp, v; + for (v = 0; v < nGroups; v++) pos[v] = v; + + for (i = 0; i < nSelectors; i++) { + v = s->selectorMtf[i]; + tmp = pos[v]; + while (v > 0) { pos[v] = pos[v-1]; v--; } + pos[0] = tmp; + s->selector[i] = tmp; + } + } + + /*--- Now the coding tables ---*/ + for (t = 0; t < nGroups; t++) { + GET_BITS(BZ_X_CODING_1, curr, 5); + for (i = 0; i < alphaSize; i++) { + while (True) { + if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); + GET_BIT(BZ_X_CODING_2, uc); + if (uc == 0) break; + GET_BIT(BZ_X_CODING_3, uc); + if (uc == 0) curr++; else curr--; + } + s->len[t][i] = curr; + } + } + + /*--- Create the Huffman decoding tables ---*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + BZ2_hbCreateDecodeTables ( + &(s->limit[t][0]), + &(s->base[t][0]), + &(s->perm[t][0]), + &(s->len[t][0]), + minLen, maxLen, alphaSize + ); + s->minLens[t] = minLen; + } + + /*--- Now the MTF values ---*/ + + EOB = s->nInUse+1; + nblockMAX = 100000 * s->blockSize100k; + groupNo = -1; + groupPos = 0; + + for (i = 0; i <= 255; i++) s->unzftab[i] = 0; + + /*-- MTF init --*/ + { + Int32 ii, jj, kk; + kk = MTFA_SIZE-1; + for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { + for (jj = MTFL_SIZE-1; jj >= 0; jj--) { + s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); + kk--; + } + s->mtfbase[ii] = kk + 1; + } + } + /*-- end MTF init --*/ + + nblock = 0; + GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); + + while (True) { + + if (nextSym == EOB) break; + + if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { + + es = -1; + N = 1; + do { + /* Check that N doesn't get too big, so that es doesn't + go negative. The maximum value that can be + RUNA/RUNB encoded is equal to the block size (post + the initial RLE), viz, 900k, so bounding N at 2 + million should guard against overflow without + rejecting any legitimate inputs. */ + if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR); + if (nextSym == BZ_RUNA) es = es + (0+1) * N; else + if (nextSym == BZ_RUNB) es = es + (1+1) * N; + N = N * 2; + GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); + } + while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); + + es++; + uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; + s->unzftab[uc] += es; + + if (s->smallDecompress) + while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + s->ll16[nblock] = (UInt16)uc; + nblock++; + es--; + } + else + while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + s->tt[nblock] = (UInt32)uc; + nblock++; + es--; + }; + + continue; + + } else { + + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + + /*-- uc = MTF ( nextSym-1 ) --*/ + { + Int32 ii, jj, kk, pp, lno, off; + UInt32 nn; + nn = (UInt32)(nextSym - 1); + + if (nn < MTFL_SIZE) { + /* avoid general-case expense */ + pp = s->mtfbase[0]; + uc = s->mtfa[pp+nn]; + while (nn > 3) { + Int32 z = pp+nn; + s->mtfa[(z) ] = s->mtfa[(z)-1]; + s->mtfa[(z)-1] = s->mtfa[(z)-2]; + s->mtfa[(z)-2] = s->mtfa[(z)-3]; + s->mtfa[(z)-3] = s->mtfa[(z)-4]; + nn -= 4; + } + while (nn > 0) { + s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; + }; + s->mtfa[pp] = uc; + } else { + /* general case */ + lno = nn / MTFL_SIZE; + off = nn % MTFL_SIZE; + pp = s->mtfbase[lno] + off; + uc = s->mtfa[pp]; + while (pp > s->mtfbase[lno]) { + s->mtfa[pp] = s->mtfa[pp-1]; pp--; + }; + s->mtfbase[lno]++; + while (lno > 0) { + s->mtfbase[lno]--; + s->mtfa[s->mtfbase[lno]] + = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; + lno--; + } + s->mtfbase[0]--; + s->mtfa[s->mtfbase[0]] = uc; + if (s->mtfbase[0] == 0) { + kk = MTFA_SIZE-1; + for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { + for (jj = MTFL_SIZE-1; jj >= 0; jj--) { + s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; + kk--; + } + s->mtfbase[ii] = kk + 1; + } + } + } + } + /*-- end uc = MTF ( nextSym-1 ) --*/ + + s->unzftab[s->seqToUnseq[uc]]++; + if (s->smallDecompress) + s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else + s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); + nblock++; + + GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); + continue; + } + } + + /* Now we know what nblock is, we can do a better sanity + check on s->origPtr. + */ + if (s->origPtr < 0 || s->origPtr >= nblock) + RETURN(BZ_DATA_ERROR); + + /*-- Set up cftab to facilitate generation of T^(-1) --*/ + /* Check: unzftab entries in range. */ + for (i = 0; i <= 255; i++) { + if (s->unzftab[i] < 0 || s->unzftab[i] > nblock) + RETURN(BZ_DATA_ERROR); + } + /* Actually generate cftab. */ + s->cftab[0] = 0; + for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; + for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; + /* Check: cftab entries in range. */ + for (i = 0; i <= 256; i++) { + if (s->cftab[i] < 0 || s->cftab[i] > nblock) { + /* s->cftab[i] can legitimately be == nblock */ + RETURN(BZ_DATA_ERROR); + } + } + /* Check: cftab entries non-descending. */ + for (i = 1; i <= 256; i++) { + if (s->cftab[i-1] > s->cftab[i]) { + RETURN(BZ_DATA_ERROR); + } + } + + s->state_out_len = 0; + s->state_out_ch = 0; + BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); + s->state = BZ_X_OUTPUT; + if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); + + if (s->smallDecompress) { + + /*-- Make a copy of cftab, used in generation of T --*/ + for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; + + /*-- compute the T vector --*/ + for (i = 0; i < nblock; i++) { + uc = (UChar)(s->ll16[i]); + SET_LL(i, s->cftabCopy[uc]); + s->cftabCopy[uc]++; + } + + /*-- Compute T^(-1) by pointer reversal on T --*/ + i = s->origPtr; + j = GET_LL(i); + do { + Int32 tmp = GET_LL(j); + SET_LL(j, i); + i = j; + j = tmp; + } + while (i != s->origPtr); + + s->tPos = s->origPtr; + s->nblock_used = 0; + if (s->blockRandomised) { + BZ_RAND_INIT_MASK; + BZ_GET_SMALL(s->k0); s->nblock_used++; + BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; + } else { + BZ_GET_SMALL(s->k0); s->nblock_used++; + } + + } else { + + /*-- compute the T^(-1) vector --*/ + for (i = 0; i < nblock; i++) { + uc = (UChar)(s->tt[i] & 0xff); + s->tt[s->cftab[uc]] |= (i << 8); + s->cftab[uc]++; + } + + s->tPos = s->tt[s->origPtr] >> 8; + s->nblock_used = 0; + if (s->blockRandomised) { + BZ_RAND_INIT_MASK; + BZ_GET_FAST(s->k0); s->nblock_used++; + BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; + } else { + BZ_GET_FAST(s->k0); s->nblock_used++; + } + + } + + RETURN(BZ_OK); + + + + endhdr_2: + + GET_UCHAR(BZ_X_ENDHDR_2, uc); + if (uc != 0x72) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_3, uc); + if (uc != 0x45) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_4, uc); + if (uc != 0x38) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_5, uc); + if (uc != 0x50) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_6, uc); + if (uc != 0x90) RETURN(BZ_DATA_ERROR); + + s->storedCombinedCRC = 0; + GET_UCHAR(BZ_X_CCRC_1, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_2, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_3, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_4, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + + s->state = BZ_X_IDLE; + RETURN(BZ_STREAM_END); + + default: AssertH ( False, 4001 ); + } + + AssertH ( False, 4002 ); + + save_state_and_return: + + s->save_i = i; + s->save_j = j; + s->save_t = t; + s->save_alphaSize = alphaSize; + s->save_nGroups = nGroups; + s->save_nSelectors = nSelectors; + s->save_EOB = EOB; + s->save_groupNo = groupNo; + s->save_groupPos = groupPos; + s->save_nextSym = nextSym; + s->save_nblockMAX = nblockMAX; + s->save_nblock = nblock; + s->save_es = es; + s->save_N = N; + s->save_curr = curr; + s->save_zt = zt; + s->save_zn = zn; + s->save_zvec = zvec; + s->save_zj = zj; + s->save_gSel = gSel; + s->save_gMinlen = gMinlen; + s->save_gLimit = gLimit; + s->save_gBase = gBase; + s->save_gPerm = gPerm; + + return retVal; +} + + +/*-------------------------------------------------------------*/ +/*--- end decompress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/huffman.c b/pit/suse-bzip2/huffman.c new file mode 100644 index 0000000..2283fdb --- /dev/null +++ b/pit/suse-bzip2/huffman.c @@ -0,0 +1,205 @@ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + Int32 zz, tmp; \ + zz = z; tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + +#define DOWNHEAP(z) \ +{ \ + Int32 zz, yy, tmp; \ + zz = z; tmp = heap[zz]; \ + while (True) { \ + yy = zz << 1; \ + if (yy > nHeap) break; \ + if (yy < nHeap && \ + weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + + +/*---------------------------------------------------*/ +void BZ2_hbMakeCodeLengths ( UChar *len, + Int32 *freq, + Int32 alphaSize, + Int32 maxLen ) +{ + /*-- + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + --*/ + Int32 nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; + Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; + Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (True) { + + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { k = parent[k]; j++; } + len[i-1] = j; + if (j > maxLen) tooLong = True; + } + + if (! tooLong) break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +} + + +/*---------------------------------------------------*/ +void BZ2_hbAssignCodes ( Int32 *code, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) + if (length[i] == n) { code[i] = vec; vec++; }; + vec <<= 1; + } +} + + +/*---------------------------------------------------*/ +void BZ2_hbCreateDecodeTables ( Int32 *limit, + Int32 *base, + Int32 *perm, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 pp, i, j, vec; + + pp = 0; + for (i = minLen; i <= maxLen; i++) + for (j = 0; j < alphaSize; j++) + if (length[j] == i) { perm[pp] = j; pp++; }; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; + for (i = 0; i < alphaSize; i++) base[length[i]+1]++; + + for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; + vec = 0; + + for (i = minLen; i <= maxLen; i++) { + vec += (base[i+1] - base[i]); + limit[i] = vec-1; + vec <<= 1; + } + for (i = minLen + 1; i <= maxLen; i++) + base[i] = ((limit[i-1] + 1) << 1) - base[i]; +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pit/suse-bzip2/randtable.c b/pit/suse-bzip2/randtable.c new file mode 100644 index 0000000..6d62459 --- /dev/null +++ b/pit/suse-bzip2/randtable.c @@ -0,0 +1,84 @@ + +/*-------------------------------------------------------------*/ +/*--- Table for randomising repetitive blocks ---*/ +/*--- randtable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.6 of 6 September 2010 + Copyright (C) 1996-2010 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + + +/*---------------------------------------------*/ +Int32 BZ2_rNums[512] = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 +}; + + +/*-------------------------------------------------------------*/ +/*--- end randtable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/pristine-bz2 b/pristine-bz2 new file mode 100755 index 0000000..53c42bc --- /dev/null +++ b/pristine-bz2 @@ -0,0 +1,285 @@ +#!/usr/bin/perl + +=head1 NAME + +pristine-bz2 - regenerate pristine bz2 files + +=head1 SYNOPSIS + +B [-vdk] gendelta I I + +B [-vdk] genbz2 I I + +=head1 DESCRIPTION + +This is a complement to the pristine-tar(1) command. Normally you +don't need to run it by hand, since pristine-tar calls it as necessary +to handle .tar.bz2 files. + +pristine-bz2 gendelta takes the specified I file, and generates a +small binary I file that can later be used by pristine-bz2 genbz2 +to recreate the original file. + +pristine-bz2 genbz2 takes the specified I file, and compresses the +specified input I (which must be identical to the contents of the +original bz2 file). The resulting file will be identical to +the original gz file used to create the delta. + +The approach used to regenerate the original bz2 file is to figure out +how it was produced -- what compression level was used, whether it was +built with bzip2(1) or with pbzip2(1). + +Note that other tools exist, like bzip2smp or dbzip2, but they are +said to be bit-identical with bzip2. Anyway, bzip2 looks like the most +widespread implementation, so it's hard to find bzip2 files that make +pristine-bz2 fail. Please report! + +The deprecated bzip1 compression method hasn't been implemented. + +If the delta filename is "-", pristine-bz2 reads or writes it to stdio. + +=head1 OPTIONS + +=over 4 + +=item -v + +Verbose mode, show each command that is run. + +=item -d + +Debug mode. + +=item -k + +Don't clean up the temporary directory on exit. + +=item -t + +Try harder to determine how to generate deltas of difficult bz2 files. + +=back + +=head1 ENVIRONMENT + +=over 4 + +=item B + +Specifies a location to place temporary files, other than the default. + +=back + +=head1 AUTHOR + +Joey Hess , +Faidon Liambotis , +Cyril Brulebois + +Licensed under the GPL, version 2. + +=cut + +use warnings; +use strict; +use Pristine::Tar; +use Pristine::Tar::Delta; +use Pristine::Tar::Formats; +use File::Basename qw/basename/; +use IO::Handle; + +delete $ENV{BZIP}; +delete $ENV{BZIP2}; + +my @supported_bzip2_programs = qw(bzip2 pbzip2 zgz); + +my $try=0; + +dispatch( + commands => { + usage => [\&usage], + genbz2 => [\&genbz2, 2], + gendelta => [\&gendelta, 2], + }, + options => { + "t|try!" => \$try, + }, +); + +sub usage { + print STDERR "Usage: pristine-bz2 [-vdkt] gendelta file.bz2 delta\n"; + print STDERR " pristine-bz2 [-vdkt] genbz2 delta file\n"; +} + +sub readbzip2 { + my $filename = shift; + + if (! is_bz2($filename)) { + error "This is not a valid BZip2 archive."; + } + + open(BZIP2, "< $filename") + or die("Could not open '$filename' for reading: $!\n"); + + my $chars; + if (read(BZIP2, $chars, 4) != 4) { + die("Unable to read from input\n"); + } + + my ($id1, $id2, $method, $level) + = unpack("CCCC", $chars); + # we actually want the value, not the ascii position + $level-=48; + + if ($level !~ /^[1-9]$/) { + error "Unknown compression level $level\n"; + } + + close(BZIP2); + + return ($level); +} + +sub predictbzip2args { + my ($level, $program) = @_; + + my @args=["-$level"]; + + if ($program eq 'zgz') { + @args=["-$level", "--old-bzip2"]; + push @args, ["-$level", "--suse-bzip2"]; + push @args, ["-$level", "--suse-pbzip2"]; + } + + return @args; +} + +sub testvariant { + my ($old, $tmpin, $bzip2_program, @args) = @_; + + # some compressors eat the uncompressed file, some + # do not; restore as needed. (Note that file name, + # mode, mtime do not matter to bzip2.) + if (! -e $tmpin) { + doit("cp", "$tmpin.bak", "$tmpin"); + } + + my $new=$tmpin.'.bz2'; + unlink($new); + + # try bzip2'ing with the arguments passed + if ($bzip2_program ne 'zgz') { + doit($bzip2_program, @args, $tmpin); + } + else { + doit_redir($tmpin, $new, $bzip2_program, @args); + } + unless (-e $new) { + die("$bzip2_program failed, aborting"); + } + + # and compare the generated with the original + return !comparefiles($old, $new); +} + +sub reproducebzip2 { + my $orig=shift; + + my $wd=tempdir(); + + my $tmpin="$wd/test"; + doit_redir($orig, "$tmpin.bak", "bzip2", "-dc"); + + # read fields from bzip2 headers + my ($level) = readbzip2($orig); + debug("level: $level"); + + foreach my $program (@supported_bzip2_programs) { + # try to guess the bzip2 arguments that are needed by the + # header information + foreach my $args (predictbzip2args($level, $program)) { + testvariant($orig, $tmpin, $program, @$args) + && return $program, @$args; + } + } + + # 7z has a weird syntax, not supported yet, as not seen in the wild + #testvariant($orig, $tmpin, "7z", "-mx$level", "a", "$tmpin.bz2") + # && return "7z", "-mx$level", "a" ; # XXX need to include outfile + + # pbzip2 -b option affects output, but cannot be detected from a + # header. + if ($try) { + my @args = @{predictbzip2args($level, "pbzip2")->[0]}; + print STDERR "pristine-bz2 will have to try especially hard to reproduce $orig\n"; + print STDERR "(This could take a long time.)\n"; + my %tried; + $tried{9}=1; # default + # Try searching for likely candidates first, and fill in. + # It could go higher than 100, but have to stop somewhere. + STDERR->autoflush(1); + foreach my $try (1..10, + 15, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, + 1..100) { + next if $tried{$try}; + $tried{$try}=1; + print STDERR "\r\tblock size: $try "; + testvariant($orig, $tmpin, "pbzip2", "-b${try}", @args) && + return "pbzip2", "-b${try}", @args; + } + print STDERR "\n"; + } + + print STDERR "pristine-bz2 failed to reproduce build of $orig\n"; + print STDERR "(Please file a bug report.)\n"; + exit 1; +} + +sub genbz2 { + my $deltafile=shift; + my $file=shift; + + my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile); + Pristine::Tar::Delta::assert($delta, type => "bz2", maxversion => 2, + fields => [qw{params program}]); + + my @params=split(' ', $delta->{params}); + while (@params) { + my $param=shift @params; + + next if $param=~/^(-[1-9])$/; + next if $param eq '--old-bzip2'; + next if $param eq '--suse-bzip2'; + next if $param eq '--suse-pbzip2'; + die "paranoia check failed on params from delta (@params)"; + } + @params=split(' ', $delta->{params}); + + my $program=$delta->{program}; + if (! grep { $program eq $_ } @supported_bzip2_programs) { + die "paranoia check failed on program from delta ($program)"; + } + + if ($program eq 'zgz') { + # unlike bzip2, zgz only uses stdio + doit_redir($file, "$file.bz2", $program, @params); + } + else { + doit($program, @params, $file); + } + doit("rm", "-f", $file); +} + +sub gendelta { + my $bzip2file=shift; + my $deltafile=shift; + + my ($program, @params) = reproducebzip2($bzip2file); + + Pristine::Tar::Delta::write(Tarball => $deltafile, { + version => '2.0', + type => 'bz2', + params => "@params", + program => $program, + }); +} diff --git a/pristine-bz2.1 b/pristine-bz2.1 new file mode 100644 index 0000000..e80ce05 --- /dev/null +++ b/pristine-bz2.1 @@ -0,0 +1,191 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "PRISTINE-BZ2 1" +.TH PRISTINE-BZ2 1 "2012-08-27" "perl v5.14.2" "pristine-bz2" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +pristine\-bz2 \- regenerate pristine bz2 files +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +\&\fBpristine\-bz2\fR [\-vdk] gendelta \fIfile.bz2\fR \fIdelta\fR +.PP +\&\fBpristine\-bz2\fR [\-vdk] genbz2 \fIdelta\fR \fIfile\fR +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +This is a complement to the \fIpristine\-tar\fR\|(1) command. Normally you +don't need to run it by hand, since pristine-tar calls it as necessary +to handle .tar.bz2 files. +.PP +pristine\-bz2 gendelta takes the specified \fIbz2\fR file, and generates a +small binary \fIdelta\fR file that can later be used by pristine\-bz2 genbz2 +to recreate the original file. +.PP +pristine\-bz2 genbz2 takes the specified \fIdelta\fR file, and compresses the +specified input \fIfile\fR (which must be identical to the contents of the +original bz2 file). The resulting file will be identical to +the original gz file used to create the delta. +.PP +The approach used to regenerate the original bz2 file is to figure out +how it was produced \*(-- what compression level was used, whether it was +built with \fIbzip2\fR\|(1) or with \fIpbzip2\fR\|(1). +.PP +Note that other tools exist, like bzip2smp or dbzip2, but they are +said to be bit-identical with bzip2. Anyway, bzip2 looks like the most +widespread implementation, so it's hard to find bzip2 files that make +pristine\-bz2 fail. Please report! +.PP +The deprecated bzip1 compression method hasn't been implemented. +.PP +If the delta filename is \*(L"\-\*(R", pristine\-bz2 reads or writes it to stdio. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\-v" 4 +.IX Item "-v" +Verbose mode, show each command that is run. +.IP "\-d" 4 +.IX Item "-d" +Debug mode. +.IP "\-k" 4 +.IX Item "-k" +Don't clean up the temporary directory on exit. +.IP "\-t" 4 +.IX Item "-t" +Try harder to determine how to generate deltas of difficult bz2 files. +.SH "ENVIRONMENT" +.IX Header "ENVIRONMENT" +.IP "\fB\s-1TMPDIR\s0\fR" 4 +.IX Item "TMPDIR" +Specifies a location to place temporary files, other than the default. +.SH "AUTHOR" +.IX Header "AUTHOR" +Joey Hess , +Faidon Liambotis , +Cyril Brulebois +.PP +Licensed under the \s-1GPL\s0, version 2. diff --git a/pristine-gz b/pristine-gz new file mode 100755 index 0000000..3668256 --- /dev/null +++ b/pristine-gz @@ -0,0 +1,325 @@ +#!/usr/bin/perl + +=head1 NAME + +pristine-gz - regenerate pristine gz files + +=head1 SYNOPSIS + +B [-vdk] gendelta I I + +B [-vdk] gengz I I + +=head1 DESCRIPTION + +This is a complement to the pristine-tar(1) command. Normally you don't +need to run it by hand, since pristine-tar calls it as necessary to handle +.tar.gz files. + +pristine-gz gendelta takes the specified I file, and generates a +small binary I file that can later be used by pristine-gz gengz +to recreate the original file. + +pristine-gz gengz takes the specified I file, and compresses +the specified input I (which must be identical to the contents +of the original gz file). The resulting file will be identical to +the original gz file used to create the delta. + +The approach used to regenerate the original gz file is to figure out how +it was produced -- what compression level was used, whether it was built +with GNU gzip(1) or with a library or BSD version, whether the --rsyncable +option was used, etc, and to reproduce this build environment when +regenerating the gz. + +This approach will work for about 99.5% of cases. One example of a case it +cannot currently support is a gz file that has been produced by appending +together multiple gz files. + +For the few where it doesn't work, a binary diff will be included in the +delta between the closest regneratable gz file and the original. In +the worst case, the diff will include the entire content of the original +gz file, resulting in a larger than usual delta. If the delta is much +larger than usual, pristine-gz will print a warning. + +If the delta filename is "-", pristine-gz reads or writes it to stdio. + +=head1 OPTIONS + +=over 4 + +=item -v + +=item --verbose + +Verbose mode, show each command that is run. + +=item -d + +=item --debug + +Debug mode. + +=item -k + +=item --keep + +Don't clean up the temporary directory on exit. + +=back + +=head1 ENVIRONMENT + +=over 4 + +=item B + +Specifies a location to place temporary files, other than the default. + +=back + +=head1 AUTHOR + +Joey Hess , +Faidon Liambotis +Josh Triplett + +Licensed under the GPL, version 2. + +=cut + +use warnings; +use strict; +use Pristine::Tar; +use Pristine::Tar::Delta; +use Pristine::Tar::Formats; +use File::Basename qw/basename/; + +delete $ENV{GZIP}; + +dispatch( + commands => { + usage => [\&usage], + gendelta => [\&gendelta, 2], + gengz => [\&gengz, 2], + }, +); + +sub usage { + print STDERR "Usage: pristine-gz [-vdk] gendelta file.gz delta\n"; + print STDERR " pristine-gz [-vdk] gengz delta file\n"; +} + +sub readgzip { + my $filename = shift; + + if (! is_gz($filename)) { + error "This is not a valid GZip archive."; + } + + open(GZIP, "< $filename") + or die("Could not open '$filename' for reading: $!\n"); + + my $chars; + if (read(GZIP, $chars, 10) != 10) { + die("Unable to read 10 bytes from input\n"); + } + + my ($id1, $id2, $method, $flags, $timestamp, $level, $os, $name) + = (unpack("CCCb8VCC", $chars), ''); + + my @flags = split(//, $flags); + + if ($flags[$fconstants{GZIP_FLAG_FNAME}]) { + # read a null-terminated string + $name .= $chars + while (read(GZIP, $chars, 1) == 1 && ord($chars) != 0); + } + close(GZIP); + + return (\@flags, $timestamp, $level, $os, $name); +} + +sub predictgzipargs { + my ($flags, $timestamp, $level) = @_; + my @flags = @$flags; + + my @args; + unless ($flags[$fconstants{GZIP_FLAG_FNAME}]) { + push @args, '-n'; + push @args, '-M' if $timestamp; + } + + if ($level == $fconstants{GZIP_COMPRESSION_BEST}) { + push @args, '-9' + } + elsif ($level == $fconstants{GZIP_COMPRESSION_FAST}) { + push @args, '-1' + } + + return @args; +} + +sub reproducegz { + my ($orig, $tempdir, $tempin) = @_; + my $tempout="$tempdir/test.gz"; + doit_redir($orig, $tempin, "gzip", "-dc"); + + # read fields from gzip headers + my ($flags, $timestamp, $level, $os, $name) = readgzip($orig); + debug("flags: [".join(", ", @$flags). + "] timestamp: $timestamp level: $level os: $os name: $name"); + + # try to guess the gzip arguments that are needed by the header + # information + my @args = predictgzipargs($flags, $timestamp, $level); + my @extraargs = ("-F", $name, "-T", $timestamp); + + my @try; + + if ($os == $fconstants{GZIP_OS_UNIX}) { + # for 98% of the cases the simple heuristic above works + # and it was produced by gnu gzip. + push @try, ['--gnu', @args]; + push @try, ['--gnu', @args, '--rsyncable']; + push @try, ['--gnu', @args, '--new-rsyncable']; + } + + if ($name =~ /\//) { + push @args, "--original-name", $name; + @extraargs = ("-T", $timestamp); + $name = basename($name); + } + + # set the Operating System flag to the one found in the original + # archive + push @args, ("--osflag", $os) if $os != $fconstants{GZIP_OS_UNIX}; + + # many of the .gz out there are created using the BSD version of + # gzip which is using the zlib library; try with our version of + # bsd-gzip with added support for the undocumented GNU gzip options + # -m and -M + push @try, [@args]; + + # Perl's Compress::Raw::Zlib interfaces directly with zlib and + # apparently is the only implementation out there which tunes a very + # specific parameter of zlib, memLevel, to 9, instead of 8 which is + # the default. The module is used, among others, by Compress::Gzip + # which in turn is used by IO::Zlib. It was found on the real world on + # tarballs generated by Perl 5.10's Module::Build (cf. #618284) + push @try, [@args, '--quirk', 'perl']; + push @try, [@args, '--quirk', 'perl', '-1']; + + # apparently, there is an old version of bsd-gzip (or a similar tool + # based on zlib) that creates gz using maximum compression (-9) but + # does not indicate so in the headers. surprisingly, there are many + # .gz out there. + push @try, [@args, '--quirk', 'buggy-bsd']; + + # Windows' NTFS gzip implementation; quirk is really really evil + # it should be the last test: it can result in a corrupted archive! + if ($os == $fconstants{GZIP_OS_NTFS}) { + pop @args; pop @args; # ntfs quirk implies NTFS osflag + push @try, [@args, '--quirk', 'ntfs']; + } + + my $origsize=(stat($orig))[7]; + my ($bestvariant, $bestsize); + + foreach my $variant (@try) { + doit_redir($tempin, $tempout, 'zgz', @$variant, @extraargs, '-c'); + if (!comparefiles($orig, $tempout)) { + # success + return $name, $timestamp, undef, @$variant; + } + else { + # generate a binary delta and see if this is the + # best variant so far + my $ret=system("xdelta delta -0 --pristine $tempout $orig $tempdir/tmpdelta 2>/dev/null") >> 8; + # xdelta exits 1 on success + if ($ret == 1) { + my $size=(stat("$tempdir/tmpdelta"))[7]; + if (! defined $bestsize || $size < $bestsize) { + $bestvariant = $variant; + $bestsize=$size; + rename("$tempdir/tmpdelta", "$tempdir/bestdelta") || die "rename: $!"; + } + } + } + } + + # Nothing worked perfectly, so use the delta that was generated for + # the best variant + my $percentover=100 - int (($origsize-$bestsize)/$origsize*100); + debug("Using delta to best variant, bloating $percentover%: @$bestvariant"); + if ($percentover > 10) { + print STDERR "warning: pristine-gz cannot reproduce build of $orig; "; + if ($percentover >= 100) { + print STDERR "storing entire file in delta!\n"; + } + else { + print STDERR "storing $percentover% size diff in delta\n"; + } + print STDERR "(Please consider filing a bug report so the delta size can be improved.)\n"; + } + return $name, $timestamp, "$tempdir/bestdelta", @$bestvariant; +} + +sub gengz { + my $deltafile=shift; + my $file=shift; + + my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile); + Pristine::Tar::Delta::assert($delta, type => "gz", maxversion => 3, + fields => [qw{params filename timestamp}]); + + my @params=split(' ', $delta->{params}); + while (@params) { + $_=shift @params; + next if /^(--gnu|--rsyncable|--new-rsyncable|-[nmM1-9])$/; + if (/^(--original-name|--quirk|--osflag)$/) { + shift @params; + next; + } + die "paranoia check failed on params from delta (@params)"; + } + @params=split(' ', $delta->{params}); + + my $filename=$delta->{filename}; + $filename=~s/^.*\///; # basename isn't strong enough + + my @zgz=("zgz", @params, "-T", $delta->{timestamp}); + if (! grep { $_ eq "--original-name" } @params) { + push @zgz, "-F", $filename; + } + push @zgz, "-c"; + + if (exists $delta->{delta}) { + my $tempdir=tempdir(); + my $tfile="$tempdir/".basename($file).".gz"; + doit_redir($file, $tfile, @zgz); + doit("xdelta", "patch", "--pristine", $delta->{delta}, $tfile, "$file.gz"); + } + else { + doit_redir("$file", "$file.gz", @zgz); + } + doit("rm", "-f", $file); +} + +sub gendelta { + my $gzfile=shift; + my $deltafile=shift; + + my $tempdir=tempdir(); + my ($filename, $timestamp, $xdelta, @params)= + reproducegz($gzfile, $tempdir, "$tempdir/test"); + + Pristine::Tar::Delta::write(Tarball => $deltafile, { + version => (defined $xdelta ? "3.0" : "2.0"), + type => 'gz', + params => "@params", + filename => basename($filename), + timestamp => $timestamp, + (defined $xdelta ? (delta => $xdelta) : ()), + }); +} + diff --git a/pristine-gz.1 b/pristine-gz.1 new file mode 100644 index 0000000..a32197d --- /dev/null +++ b/pristine-gz.1 @@ -0,0 +1,205 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "PRISTINE-GZ 1" +.TH PRISTINE-GZ 1 "2012-03-18" "perl v5.14.2" "pristine-gz" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +pristine\-gz \- regenerate pristine gz files +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +\&\fBpristine-gz\fR [\-vdk] gendelta \fIfile.gz\fR \fIdelta\fR +.PP +\&\fBpristine-gz\fR [\-vdk] gengz \fIdelta\fR \fIfile\fR +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +This is a complement to the \fIpristine\-tar\fR\|(1) command. Normally you don't +need to run it by hand, since pristine-tar calls it as necessary to handle +\&.tar.gz files. +.PP +pristine-gz gendelta takes the specified \fIgz\fR file, and generates a +small binary \fIdelta\fR file that can later be used by pristine-gz gengz +to recreate the original file. +.PP +pristine-gz gengz takes the specified \fIdelta\fR file, and compresses +the specified input \fIfile\fR (which must be identical to the contents +of the original gz file). The resulting file will be identical to +the original gz file used to create the delta. +.PP +The approach used to regenerate the original gz file is to figure out how +it was produced \*(-- what compression level was used, whether it was built +with \s-1GNU\s0 \fIgzip\fR\|(1) or with a library or \s-1BSD\s0 version, whether the \-\-rsyncable +option was used, etc, and to reproduce this build environment when +regenerating the gz. +.PP +This approach will work for about 99.5% of cases. One example of a case it +cannot currently support is a gz file that has been produced by appending +together multiple gz files. +.PP +For the few where it doesn't work, a binary diff will be included in the +delta between the closest regneratable gz file and the original. In +the worst case, the diff will include the entire content of the original +gz file, resulting in a larger than usual delta. If the delta is much +larger than usual, pristine-gz will print a warning. +.PP +If the delta filename is \*(L"\-\*(R", pristine-gz reads or writes it to stdio. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\-v" 4 +.IX Item "-v" +.PD 0 +.IP "\-\-verbose" 4 +.IX Item "--verbose" +.PD +Verbose mode, show each command that is run. +.IP "\-d" 4 +.IX Item "-d" +.PD 0 +.IP "\-\-debug" 4 +.IX Item "--debug" +.PD +Debug mode. +.IP "\-k" 4 +.IX Item "-k" +.PD 0 +.IP "\-\-keep" 4 +.IX Item "--keep" +.PD +Don't clean up the temporary directory on exit. +.SH "ENVIRONMENT" +.IX Header "ENVIRONMENT" +.IP "\fB\s-1TMPDIR\s0\fR" 4 +.IX Item "TMPDIR" +Specifies a location to place temporary files, other than the default. +.SH "AUTHOR" +.IX Header "AUTHOR" +Joey Hess , +Faidon Liambotis +Josh Triplett +.PP +Licensed under the \s-1GPL\s0, version 2. diff --git a/pristine-tar b/pristine-tar new file mode 100755 index 0000000..14bac15 --- /dev/null +++ b/pristine-tar @@ -0,0 +1,821 @@ +#!/usr/bin/perl + +=head1 NAME + +pristine-tar - regenerate pristine tarballs + +=head1 SYNOPSIS + +B [-vdk] gendelta I I + +B [-vdk] gentar I I + +B [-vdk] [-m message] commit I [I] + +B [-vdk] checkout I + +B [-vdk] list + +=head1 DESCRIPTION + +pristine-tar can regenerate an exact copy of a pristine upstream tarball +using only a small binary I file and the contents of the tarball, +which are typically kept in an I branch in version control. + +The I file is designed to be checked into version control along-side +the I branch, thus allowing Debian packages to be built entirely +using sources in version control, without the need to keep copies of +upstream tarballs. + +pristine-tar supports compressed tarballs, calling out to pristine-gz(1), +pristine-bz2(1), and pristine-xz(1) to produce the pristine gzip, bzip2, +and xz files. + +=head1 COMMANDS + +=over 4 + +=item pristine-tar gendelta I I + +This takes the specified upstream I, and generates a small binary +delta file that can later be used by pristine-tar gentar to recreate the +tarball. + +If the delta filename is "-", it is written to standard output. + +=item pristine-tar gentar I I + +This takes the specified I file, and the files in the current +directory, which must have identical content to those in the upstream +tarball, and uses these to regenerate the pristine upstream I. + +If the delta filename is "-", it is read from standard input. + +=item pristine-tar commit I [I] + +B generates a pristine-tar delta file for the specified +I, and commits it to version control. The B +command can later be used to recreate the original tarball based only +on the information stored in version control. + +The I parameter specifies the tag or branch that contains the +same content that is present in the tarball. This defaults to +"refs/heads/upstream", or if there's no such branch, any +branch matching "upstream". The name of the tree it points to will be +recorded for later use by B. Note that the content +does not need to be 100% identical to the content of the tarball, but +if it is not, additional space will be used in the delta file. + +The delta files are stored in a branch named "pristine-tar", with filenames +corresponding to the input tarball, with ".delta" appended. This +branch is created or updated as needed to add each new delta. + +=item pristine-tar checkout I + +This regenerates a copy of the specified I using information +previously saved in version control by B. + +=item pristine-tar list + +This lists tarballs that pristine-tar is able to checkout from version +control. + +=back + +=head1 OPTIONS + +=over 4 + +=item -v + +=item --verbose + +Verbose mode, show each command that is run. + +=item -d + +=item --debug + +Debug mode. + +=item -k + +=item --keep + +Don't clean up the temporary directory on exit. + +=item -m message + +=item --message=message + +Use this option to specify a custom commit message to pristine-tar commit. + +=back + +=head1 EXAMPLES + +Suppose you maintain the hello package, in a git repository. You have +just created a tarball of the release, I, which you +will upload to a "forge" site. + +You want to ensure that, if the "forge" loses the tarball, you can always +recreate exactly that same tarball. And you'd prefer not to keep copies +of tarballs for every release, as that could use a lot of disk space +when hello gets the background mp3s and user-contributed levels you +are planning for version 2.0. + +The solution is to use pristine-tar to commit a delta file that efficiently +stores enough information to reproduce the tarball later. + + cd hello + git tag -s 1.0 + pristine-tar commit ../hello-1.0.tar.gz 1.0 + +Remember to tell git to push both the pristine-tar branch, and your tag: + + git push --all --tags + +Now it is a year later. The worst has come to pass; the "forge" lost +all its data, you deleted the tarballs to make room for bug report emails, +and you want to regenerate them. Happily, the git repository is still +available. + + git clone git://github.com/joeyh/hello.git + cd hello + pristine-tar checkout ../hello-1.0.tar.gz + +=head1 LIMITATIONS + +Only tarballs, gzipped tarballs, bzip2ed tarballs, and xzed tarballs +are currently supported. + +Currently only the git revision control system is supported by the +"checkout" and "commit" commands. It's ok if the working copy +is not clean or has uncommitted changes, or has changes staged in the +index; none of that will be touched by "checkout" or "commit". + +=head1 ENVIRONMENT + +=over 4 + +=item B + +Specifies a location to place temporary files, other than the default. + +=back + +=head1 AUTHOR + +Joey Hess + +Licensed under the GPL, version 2 or above. + +=cut + +use warnings; +use strict; +use Pristine::Tar; +use Pristine::Tar::Delta; +use Pristine::Tar::Formats; +use File::Path; +use File::Basename; +use Cwd qw{getcwd abs_path}; + +# Force locale to C since tar may output utf-8 filenames differently +# depending on the locale. +$ENV{LANG}='C'; + +# Don't let environment change tar's behavior. +delete $ENV{TAR_OPTIONS}; +delete $ENV{TAPE}; + +# The following two assignments are potentially munged during the +# build process to hold the values of TAR_PROGRAM and XDELTA_PROGRAM +# parameters as given to Makefile.PL. +my $tar_program = "tar"; +my $xdelta_program = "xdelta"; + +my $message; + +dispatch( + commands => { + usage => [\&usage], + gentar => [\&gentar, 2], + gendelta => [\&gendelta, 2], + commit => [\&commit], + ci => [\&commit, 1], + checkout => [\&checkout, 1], + co => [\&checkout, 1], + list => [\&list, 0], + }, + options => { + "m|message=s" => \$message, + }, +); + +sub usage { + print STDERR "Usage: pristine-tar [-vdk] gendelta tarball delta\n"; + print STDERR " pristine-tar [-vdk] gentar delta tarball\n"; + print STDERR " pristine-tar [-vdk] [-m message] commit tarball [upstream]\n"; + print STDERR " pristine-tar [-vdk] checkout tarball\n"; + print STDERR " pristine-tar list\n"; + exit 1; +} + +my $recreatetarball_tempdir; +sub recreatetarball { + my $manifestfile=shift; + my $source=shift; + my %options=@_; + + my $tempdir=tempdir(); + + my @manifest; + open (IN, "<", $manifestfile) || die "$manifestfile: $!"; + while () { + chomp; + push @manifest, $_; + } + close IN; + link($manifestfile, "$tempdir/manifest") || die "link $tempdir/manifest: $!"; + + # The manifest and source should have the same filenames, + # but the manifest probably has all the files under a common + # subdirectory. Check if it does. + my $subdir=""; + foreach my $file (@manifest) { + #debug("file: $file"); + if ($file=~m!^(/?[^/]+)(/|$)!) { + if (length $subdir && $subdir ne $1) { + debug("found file not in subdir $subdir: $file"); + $subdir=""; + last; + } + elsif (! length $subdir) { + $subdir=$1; + debug("set subdir to $subdir"); + } + } + else { + debug("found file not in subdir: $file"); + $subdir=""; + last; + } + } + + if (length $subdir) { + debug("subdir is $subdir"); + doit("mkdir", "$tempdir/workdir"); + $subdir="/$subdir"; + } + + if (! $options{clobber_source}) { + doit("cp", "-a", $source, "$tempdir/workdir$subdir"); + } + else { + doit("mv", $source, "$tempdir/workdir$subdir"); + } + + # It's important that this create an identical tarball each time + # for a given set of input files. So don't include file metadata + # in the tarball, since it can easily vary. + my $full_sweep=0; + foreach my $file (@manifest) { + if (-l "$tempdir/workdir/$file") { + # Can't set timestamp of a symlink, so + # replace the symlink with an empty file. + unlink("$tempdir/workdir/$file") || die "unlink: $!"; + open(OUT, ">", "$tempdir/workdir/$file") || die "open: $!"; + close OUT; + } + elsif (! -e "$tempdir/workdir/$file") { + debug("$file is listed in the manifest but may not be present in the source directory"); + $full_sweep=1; + + if ($options{create_missing}) { + # Avoid tar failing on the nonexistent item by + # creating a dummy directory. + debug("creating missing $file"); + mkpath "$tempdir/workdir/$file"; + } + } + + if (-d "$tempdir/workdir/$file" && (-u _ || -g _ || -k _)) { + # tar behaves weirdly for some special modes + # and ignores --mode, so clear them. + debug("chmod $file"); + chmod(0755, "$tempdir/workdir/$file") || + die "chmod: $!"; + } + } + + # Set file times only after modifying of the directory content is + # done. + foreach my $file (@manifest) { + if (-e "$tempdir/workdir/$file") { + utime(0, 0, "$tempdir/workdir/$file") || die "utime: $file: $!"; + } + } + + # If some files couldn't be matched up with the manifest, + # it's possible they do exist, but just with names that make sense + # to tar, but not to this program. Work around this and make sure + # such files have their metadata tweaked, by doing a full sweep of + # the tree. + if ($full_sweep) { + debug("doing full tree sweep to catch missing files"); + use File::Find; + find(sub { + if (-l $_) { + unlink($_) || die "unlink: $!"; + open(OUT, ">", $_) || die "open: $!"; + close OUT; + } + if (-d $_ && (-u _ || -g _ || -k _)) { + chmod(0755, $_) || + die "chmod: $!"; + } + }, "$tempdir/workdir"); + find(sub { + utime(0, 0, $_) || die "utime: $_: $!"; + }, "$tempdir/workdir"); + } + + delete $ENV{TAR_LONGLINK_100}; + $recreatetarball_tempdir=$tempdir; + return recreatetarball_helper(); +} + +sub recreatetarball_helper { + my $tempdir=$recreatetarball_tempdir; + + my $ret="$tempdir/recreatetarball"; + + doit($tar_program, "cf", $ret, "--owner", 0, "--group", 0, + "--numeric-owner", "-C", "$tempdir/workdir", + "--no-recursion", "--mode", "0644", + "--files-from", "$tempdir/manifest"); + + return $ret; +} + +sub recreatetarball_longlink_100 { + # For a long time, Debian's tar had a patch that made it output + # larger tar files if a filename was exactly 100 bytes. Now that + # Debian's tar has been fixed, in order to recreate the tarball + # created by that version of tar, we reply on on an environment + # variable to turn back on the old behavior. + # + # This variable is currently only available in Debian's tar, + # so users of non-debian tar who want to recreate tarballs from + # deltas created using the old version of Debian's tar are SOL. + + $ENV{TAR_LONGLINK_100}=1; + my $ret=recreatetarball_helper(); + delete $ENV{TAR_LONGLINK_100}; + return $ret; +} + +sub gentar { + my $deltafile=shift; + my $tarball=shift; + my %opts=@_; + + my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile); + Pristine::Tar::Delta::assert($delta, type => "tar", maxversion => 2, + minversion => 2, fields => [qw{manifest delta}]); + + my $out=(defined $delta->{wrapper} + ? tempdir()."/".basename($tarball).".tmp" + : $tarball); + + my @try; + push @try, sub { recreatetarball($delta->{manifest}, getcwd, + clobber_source => 0, %opts) }; + push @try, \&recreatetarball_longlink_100; + + my $ok; + foreach my $variant (@try) { + my $recreatetarball=$variant->(); + my $ret=try_doit($xdelta_program, "patch", $delta->{delta}, $recreatetarball, $out); + if ($ret == 0) { + $ok=1; + last; + } + } + if (! $ok) { + error "Failed to reproduce original tarball. Please file a bug report."; + } + + if (defined $delta->{wrapper}) { + my $delta_wrapper=Pristine::Tar::Delta::read(Tarball => $delta->{wrapper}); + if (grep { $_ eq $delta_wrapper->{type} } qw{gz bz2 xz}) { + doit("pristine-".$delta_wrapper->{type}, + ($verbose ? "-v" : "--no-verbose"), + ($debug ? "-d" : "--no-debug"), + ($keep ? "-k" : "--no-keep"), + "gen".$delta_wrapper->{type}, + $delta->{wrapper}, $out); + doit("mv", "-f", $out.".".$delta_wrapper->{type}, $tarball); + } + else { + error "unknown wrapper file type: ". + $delta_wrapper->{type}; + } + } +} + +sub genmanifest { + my $tarball=shift; + my $manifest=shift; + + open(IN, "tar tf $tarball |") || die "tar tf: $!"; + open(OUT, ">", $manifest) || die "$!"; + while () { + chomp; + # ./ or / in the manifest just confuses tar + s/^\.?\/+//; + print OUT "$_\n" if length $_; + } + close IN; + close OUT; +} + +sub gendelta { + my $tarball=shift; + my $deltafile=shift; + my %opts=@_; + + my $tempdir=tempdir(); + my %delta; + + # Check to see if it's compressed, and get uncompressed tarball. + my $compression=undef; + if (is_gz($tarball)) { + $compression='gz'; + open(IN, "-|", "zcat", $tarball) || die "zcat: $!"; + open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; + print OUT $_ while ; + close IN || die "zcat: $!"; + close OUT || die "$tempdir/origtarball: $!"; + } + elsif (is_bz2($tarball)) { + $compression='bz2'; + open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!"; + open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; + print OUT $_ while ; + close IN || die "bzcat: $!"; + close OUT || die "$tempdir/origtarball: $!"; + } + elsif (is_xz($tarball)) { + $compression='xz'; + open(IN, "-|", "xzcat", $tarball) || die "xzcat: $!"; + open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; + print OUT $_ while ; + close IN || die "xzcat: $!"; + close OUT || die "$tempdir/origtarball: $!"; + } + close IN; + + # Generate a wrapper file to recreate the compressed file. + if (defined $compression) { + $delta{wrapper}="$tempdir/wrapper"; + doit("pristine-$compression", + ($verbose ? "-v" : "--no-verbose"), + ($debug ? "-d" : "--no-debug"), + ($keep ? "-k" : "--no-keep"), + "gendelta", $tarball, $delta{wrapper}); + $tarball="$tempdir/origtarball"; + } + + $delta{manifest}="$tempdir/manifest"; + genmanifest($tarball, $delta{manifest}); + + my $recreatetarball; + if (! exists $opts{recreatetarball}) { + my $sourcedir="$tempdir/tmp"; + doit("mkdir", $sourcedir); + doit($tar_program, "xf", File::Spec->rel2abs($tarball), "-C", $sourcedir); + # if all files were in a subdir, use the subdir as the sourcedir + my @out=grep { $_ ne "$sourcedir/.." && $_ ne "$sourcedir/." } + (glob("$sourcedir/*"), glob("$sourcedir/.*")); + if ($#out == 0 && -d $out[0]) { + $sourcedir=$out[0]; + } + $recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir, clobber_source => 1); + } + else { + $recreatetarball=$opts{recreatetarball}; + } + + $delta{delta}="$tempdir/delta"; + my $ret=system("$xdelta_program delta -0 --pristine $recreatetarball $tarball $delta{delta}") >> 8; + # xdelta exits 1 on success if there were differences + if ($ret != 1 && $ret != 0) { + error "xdelta failed with return code $ret"; + } + + if (-s $delta{delta} >= -s $tarball) { + print STDERR "error: excessively large binary delta for $tarball\n"; + if (! defined $compression) { + print STDERR "(Probably the tarball is compressed with an unsupported form of compression.)\n"; + } + else { + print STDERR "(Please consider filing a bug report.)\n"; + } + exit 1; + } + + Pristine::Tar::Delta::write(Tarball => $deltafile, { + version => 2, + type => 'tar', + %delta, + }); +} + +sub vcstype { + if (-e ".git" || + (exists $ENV{GIT_DIR} && length $ENV{GIT_DIR})) { + return 'git'; + } + else { + error("cannot determine type of vcs used for the current directory"); + } +} + +sub export { + my $upstream=shift; + + my $dest=tempdir(); + my $id; + + my $vcs=vcstype(); + if ($vcs eq "git") { + if (defined $upstream && $upstream =~ /[A-Za-z0-9]{40}/) { + $id=$upstream; + } + else { + if (! defined $upstream) { + $upstream='upstream'; + } + + my @reflines=map { chomp; $_ } `git show-ref \Q$upstream\E`; + if (! @reflines) { + error "failed to find ref using: git show-ref $upstream"; + } + + # if one line's ref matches exactly, use it + foreach my $line (@reflines) { + my ($b)=$line=~/^[A-Za-z0-9]+\s(.*)/; + if ($b eq $upstream || $b eq "refs/heads/$upstream") { + ($id)=$line=~/^([A-Za-z0-9]+)\s/; + last; + } + } + + if (! defined $id) { + if (@reflines == 1) { + ($id)=$reflines[0]=~/^([A-Za-z0-9]+)\s/; + } + else { + error "more than one ref matches \"$upstream\":\n". + join("\n", @reflines); + } + } + } + + # We have an id that is probably a commit. Let's get to the + # id of the actual tree instead. This makes us more robust + # against any later changes to the commit. + my $treeid=`git rev-parse '$id^{tree}'`; + chomp $treeid; + $id = $treeid if length $treeid; + + doit("git archive --format=tar \Q$id\E | (cd '$dest' && tar x)"); + } + else { + die "unsupported vcs $vcs"; + } + + return ($dest, $id); +} + +sub git_findbranch { + # Looks for a branch with the given name. If a local branch exists, + # returns it. Otherwise, looks for a remote branch, and if exactly + # one exists, returns that. If there's no such branch at all, returns + # undef. Finally, if there are multiple remote branches and no + # local branch, fails with an error. + my $branch=shift; + + my @reflines=split(/\n/, `git show-ref \Q$branch\E`); + my @remotes=grep { ! m/ refs\/heads\/\Q$branch\E$/ } @reflines; + if ($#reflines != $#remotes) { + return $branch; + } + else { + if (@reflines == 0) { + return undef; + } + elsif (@remotes == 1) { + my ($remote_branch)=$remotes[0]=~/^[A-Za-z0-9]+\s(.*)/; + return $remote_branch; + } + else { + error "There's no local $branch branch. Several remote $branch branches exist.\n". + "Run \"git branch --track $branch \" to create a local $branch branch\n". + join("\n", @remotes); + } + } +} + +sub checkoutdelta { + my $tarball=shift; + + my $branch="pristine-tar"; + my $deltafile=basename($tarball).".delta"; + my $idfile=basename($tarball).".id"; + + my ($delta, $id); + + my $vcs=vcstype(); + if ($vcs eq "git") { + my $b=git_findbranch($branch); + if (! defined $b) { + error "no $branch branch found, use \"pristine-tar commit\" first"; + } + elsif ($b eq $branch) { + $branch="refs/heads/$branch"; + } + else { + # use remote branch + $branch=$b; + } + + $delta=`git show $branch:\Q$deltafile\E`; + if ($?) { + error "git show $branch:$deltafile failed"; + } + if (! length $delta) { + error "git show $branch:$deltafile returned no content"; + } + $id=`git show $branch:\Q$idfile\E`; + if ($?) { + error "git show $branch:$idfile failed"; + } + chomp $id; + if (! length $id) { + error "git show $branch:$idfile returned no id"; + } + } + else { + die "unsupported vcs $vcs"; + } + + return ($delta, $id); +} + +sub commitdelta { + my $delta=shift; + my $id=shift; + my $tarball=shift; + + my $branch="pristine-tar"; + my $deltafile=basename($tarball).".delta"; + my $idfile=basename($tarball).".id"; + my $commit_message=defined $message ? $message : + "pristine-tar data for ".basename($tarball); + + my $vcs=vcstype(); + if ($vcs eq "git") { + my $tempdir=tempdir(); + open(OUT, ">$tempdir/$deltafile") || die "$tempdir/$deltafile: $!"; + print OUT $delta; + close OUT; + open(OUT, ">$tempdir/$idfile") || die "$tempdir/$idfile: $!"; + print OUT "$id\n"; + close OUT; + + # Commit the delta to a branch in git without affecting the + # index, and without touching the working tree. Aka deep + # git magick. + $ENV{GIT_INDEX_FILE}="$tempdir/index"; + if (! exists $ENV{GIT_DIR} || ! length $ENV{GIT_DIR}) { + $ENV{GIT_DIR}=getcwd."/.git"; + } + else { + $ENV{GIT_DIR}=abs_path($ENV{GIT_DIR}); + } + chdir($tempdir) || die "chdir: $!"; + + # If there's no local branch, branch from a remote branch + # if one exists. If there's no remote branch either, the + # code below will create the local branch. + my $b=git_findbranch($branch); + if (defined $b && $b ne $branch) { + doit("git branch --track \Q$branch\E \Q$b\E"); + } + + my $branch_exists=(system("git show-ref --quiet --verify refs/heads/$branch") == 0); + if ($branch_exists) { + doit("git ls-tree -r --full-name $branch | git update-index --index-info"); + } + doit("git", "update-index", "--add", $deltafile, $idfile); + my $sha=`git write-tree`; + if ($?) { + error("git write-tree failed"); + } + $sha=~s/\n//sg; + if (! length $sha) { + error("git write-tree did not return a sha"); + } + my $pid = open(COMMIT, "|-"); + if (! $pid) { + # child + my $commitopts=$branch_exists ? "-p $branch" : ""; + my $commitsha=`git commit-tree $sha $commitopts`; + if ($?) { + error("git commit-tree failed"); + } + $commitsha=~s/\n//sg; + if (! length $commitsha) { + error("git commit-tree did not return a sha"); + } + doit("git", "update-ref", "refs/heads/$branch", $commitsha); + exit 0; + } + else { + # parent + print COMMIT $commit_message."\n"; + close COMMIT || error("git commit-tree failed"); + } + + message("committed $deltafile to branch $branch"); + } + else { + die "unsupported vcs $vcs"; + } +} + +sub commit { + my $tarball=shift; + my $upstream=shift; # optional + + if (! defined $tarball || @_) { + usage(); + } + + my $tempdir=tempdir(); + my ($sourcedir, $id)=export($upstream); + genmanifest($tarball, "$tempdir/manifest"); + my $recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir, + clobber_source => 1, create_missing => 1); + my $pid = open(GENDELTA, "-|"); + if (! $pid) { + # child + gendelta($tarball, "-", recreatetarball => $recreatetarball); + exit 0; + } + local $/=undef; + my $delta=; + close GENDELTA || error "failed to generate delta"; + commitdelta($delta, $id, $tarball); +} + +sub checkout { + my $tarball=shift; + + my ($delta, $id)=checkoutdelta($tarball); + my ($sourcedir, undef)=export($id); + my $pid = open(GENTAR, "|-"); + if (! $pid) { + # child + $tarball=abs_path($tarball); + chdir($sourcedir) || die "chdir $sourcedir: $!"; + gentar("-", $tarball, clobber_source => 1, create_missing => 1); + exit 0; + } + print GENTAR $delta; + close GENTAR || error "failed to generate tarball"; + + message("successfully generated $tarball"); +} + +sub list { + my $branch="pristine-tar"; + my $vcs=vcstype(); + if ($vcs eq "git") { + my $b=git_findbranch($branch); + if (defined $b) { + open (LIST, "git ls-tree $b --name-only |"); + while () { + chomp; + next unless s/\.delta$//; + print $_."\n"; + } + } + } + else { + die "unsupported vcs $vcs"; + } +} diff --git a/pristine-tar.1 b/pristine-tar.1 new file mode 100644 index 0000000..71679d2 --- /dev/null +++ b/pristine-tar.1 @@ -0,0 +1,286 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "PRISTINE-TAR 1" +.TH PRISTINE-TAR 1 "2013-02-13" "perl v5.14.2" "pristine-tar" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +pristine\-tar \- regenerate pristine tarballs +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +\&\fBpristine-tar\fR [\-vdk] gendelta \fItarball\fR \fIdelta\fR +.PP +\&\fBpristine-tar\fR [\-vdk] gentar \fIdelta\fR \fItarball\fR +.PP +\&\fBpristine-tar\fR [\-vdk] [\-m message] commit \fItarball\fR [\fIupstream\fR] +.PP +\&\fBpristine-tar\fR [\-vdk] checkout \fItarball\fR +.PP +\&\fBpristine-tar\fR [\-vdk] list +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +pristine-tar can regenerate an exact copy of a pristine upstream tarball +using only a small binary \fIdelta\fR file and the contents of the tarball, +which are typically kept in an \fIupstream\fR branch in version control. +.PP +The \fIdelta\fR file is designed to be checked into version control along-side +the \fIupstream\fR branch, thus allowing Debian packages to be built entirely +using sources in version control, without the need to keep copies of +upstream tarballs. +.PP +pristine-tar supports compressed tarballs, calling out to \fIpristine\-gz\fR\|(1), +\&\fIpristine\-bz2\fR\|(1), and \fIpristine\-xz\fR\|(1) to produce the pristine gzip, bzip2, +and xz files. +.SH "COMMANDS" +.IX Header "COMMANDS" +.IP "pristine-tar gendelta \fItarball\fR \fIdelta\fR" 4 +.IX Item "pristine-tar gendelta tarball delta" +This takes the specified upstream \fItarball\fR, and generates a small binary +delta file that can later be used by pristine-tar gentar to recreate the +tarball. +.Sp +If the delta filename is \*(L"\-\*(R", it is written to standard output. +.IP "pristine-tar gentar \fIdelta\fR \fItarball\fR" 4 +.IX Item "pristine-tar gentar delta tarball" +This takes the specified \fIdelta\fR file, and the files in the current +directory, which must have identical content to those in the upstream +tarball, and uses these to regenerate the pristine upstream \fItarball\fR. +.Sp +If the delta filename is \*(L"\-\*(R", it is read from standard input. +.IP "pristine-tar commit \fItarball\fR [\fIupstream\fR]" 4 +.IX Item "pristine-tar commit tarball [upstream]" +\&\fBpristine-tar commit\fR generates a pristine-tar delta file for the specified +\&\fItarball\fR, and commits it to version control. The \fBpristine-tar checkout\fR +command can later be used to recreate the original tarball based only +on the information stored in version control. +.Sp +The \fIupstream\fR parameter specifies the tag or branch that contains the +same content that is present in the tarball. This defaults to +\&\*(L"refs/heads/upstream\*(R", or if there's no such branch, any +branch matching \*(L"upstream\*(R". The name of the tree it points to will be +recorded for later use by \fBpristine-tar checkout\fR. Note that the content +does not need to be 100% identical to the content of the tarball, but +if it is not, additional space will be used in the delta file. +.Sp +The delta files are stored in a branch named \*(L"pristine-tar\*(R", with filenames +corresponding to the input tarball, with \*(L".delta\*(R" appended. This +branch is created or updated as needed to add each new delta. +.IP "pristine-tar checkout \fItarball\fR" 4 +.IX Item "pristine-tar checkout tarball" +This regenerates a copy of the specified \fItarball\fR using information +previously saved in version control by \fBpristine-tar commit\fR. +.IP "pristine-tar list" 4 +.IX Item "pristine-tar list" +This lists tarballs that pristine-tar is able to checkout from version +control. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\-v" 4 +.IX Item "-v" +.PD 0 +.IP "\-\-verbose" 4 +.IX Item "--verbose" +.PD +Verbose mode, show each command that is run. +.IP "\-d" 4 +.IX Item "-d" +.PD 0 +.IP "\-\-debug" 4 +.IX Item "--debug" +.PD +Debug mode. +.IP "\-k" 4 +.IX Item "-k" +.PD 0 +.IP "\-\-keep" 4 +.IX Item "--keep" +.PD +Don't clean up the temporary directory on exit. +.IP "\-m message" 4 +.IX Item "-m message" +.PD 0 +.IP "\-\-message=message" 4 +.IX Item "--message=message" +.PD +Use this option to specify a custom commit message to pristine-tar commit. +.SH "EXAMPLES" +.IX Header "EXAMPLES" +Suppose you maintain the hello package, in a git repository. You have +just created a tarball of the release, \fIhello\-1.0.tar.gz\fR, which you +will upload to a \*(L"forge\*(R" site. +.PP +You want to ensure that, if the \*(L"forge\*(R" loses the tarball, you can always +recreate exactly that same tarball. And you'd prefer not to keep copies +of tarballs for every release, as that could use a lot of disk space +when hello gets the background mp3s and user-contributed levels you +are planning for version 2.0. +.PP +The solution is to use pristine-tar to commit a delta file that efficiently +stores enough information to reproduce the tarball later. +.PP +.Vb 3 +\& cd hello +\& git tag \-s 1.0 +\& pristine\-tar commit ../hello\-1.0.tar.gz 1.0 +.Ve +.PP +Remember to tell git to push both the pristine-tar branch, and your tag: +.PP +.Vb 1 +\& git push \-\-all \-\-tags +.Ve +.PP +Now it is a year later. The worst has come to pass; the \*(L"forge\*(R" lost +all its data, you deleted the tarballs to make room for bug report emails, +and you want to regenerate them. Happily, the git repository is still +available. +.PP +.Vb 3 +\& git clone git://github.com/joeyh/hello.git +\& cd hello +\& pristine\-tar checkout ../hello\-1.0.tar.gz +.Ve +.SH "LIMITATIONS" +.IX Header "LIMITATIONS" +Only tarballs, gzipped tarballs, bzip2ed tarballs, and xzed tarballs +are currently supported. +.PP +Currently only the git revision control system is supported by the +\&\*(L"checkout\*(R" and \*(L"commit\*(R" commands. It's ok if the working copy +is not clean or has uncommitted changes, or has changes staged in the +index; none of that will be touched by \*(L"checkout\*(R" or \*(L"commit\*(R". +.SH "ENVIRONMENT" +.IX Header "ENVIRONMENT" +.IP "\fB\s-1TMPDIR\s0\fR" 4 +.IX Item "TMPDIR" +Specifies a location to place temporary files, other than the default. +.SH "AUTHOR" +.IX Header "AUTHOR" +Joey Hess +.PP +Licensed under the \s-1GPL\s0, version 2 or above. diff --git a/pristine-tar.spec b/pristine-tar.spec new file mode 100644 index 0000000..81d7c29 --- /dev/null +++ b/pristine-tar.spec @@ -0,0 +1,57 @@ +Name: pristine-tar +Version: 1.27 +Release: 2%{?dist} +Summary: regenerate pristine tarballs + +Group: System Tools +License: GPLv2 +Url: http://kitenet.net/~joey/code/pristine-tar/ +Source0: http://ftp.debian.org/debian/pool/main/p/pristine-tar/%{name}_%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +Requires: git, xdelta + +%description +pristine-tar can regenerate a pristine upstream tarball using only a +small binary delta file and a copy of the source which can be a revision +control checkout. + +The package also includes a pristine-gz command, which can regenerate +a pristine .gz file. + +The delta file is designed to be checked into revision control along-side +the source code, thus allowing the original tarball to be extracted from +revision control. + +pristine-tar is available in git at git://git.kitenet.net/pristine-tar/ + + +%prep +%setup -q -n %{name} + + +%build +perl Makefile.PL INSTALLDIRS=vendor PREFIX=%{_prefix} +make %{?_smp_mflags} + + +%install +rm -rf $RPM_BUILD_ROOT +make install DESTDIR=$RPM_BUILD_ROOT + + +%clean +rm -rf $RPM_BUILD_ROOT + + +%files +%defattr(-,root,root,-) +%doc GPL TODO delta-format.txt +%{_bindir}/* +%{_mandir}/* + + +%changelog +* Tue Feb 24 2009 Jimmy Tang - 0.21-1 +- initial package + diff --git a/pristine-xz b/pristine-xz new file mode 100755 index 0000000..830559b --- /dev/null +++ b/pristine-xz @@ -0,0 +1,379 @@ +#!/usr/bin/perl + +=head1 NAME + +pristine-xz - regenerate pristine xz files + +=head1 SYNOPSIS + +B [-vdk] gendelta I I + +B [-vdk] genxz I I + +=head1 DESCRIPTION + +This is a complement to the pristine-tar(1) command. Normally you +don't need to run it by hand, since pristine-tar calls it as necessary +to handle .tar.xz files. + +pristine-xz gendelta takes the specified I file, and generates a +small binary I file that can later be used by pristine-xz genxz +to recreate the original file. + +pristine-xz genxz takes the specified I file, and compresses the +specified input I (which must be identical to the contents of the +original xz file). The resulting file will be identical to +the original gz file used to create the delta. + +The approach used to regenerate the original xz file is to figure out +how it was produced -- what compression level was used, etc. Currently +support is poor for xz files produced with unusual compression options. + +If the delta filename is "-", pristine-xz reads or writes it to stdio. + +=head1 OPTIONS + +=over 4 + +=item -v + +Verbose mode, show each command that is run. + +=item -d + +Debug mode. + +=item -k + +Don't clean up the temporary directory on exit. + +=item -t + +Try harder to determine how to generate deltas of difficult xz files. + +=back + +=head1 ENVIRONMENT + +=over 4 + +=item B + +Specifies a location to place temporary files, other than the default. + +=back + +=head1 AUTHOR + +Joey Hess , +Faidon Liambotis , +Cyril Brulebois + +Licensed under the GPL, version 2. + +=cut + +use warnings; +use strict; +use Pristine::Tar; +use Pristine::Tar::Delta; +use Pristine::Tar::Formats; +use File::Basename qw/basename/; +use IO::Handle; + +my @supported_xz_programs = qw(xz); + +my $try=0; + +dispatch( + commands => { + usage => [\&usage], + genxz => [\&genxz, 2], + gendelta => [\&gendelta, 2], + }, + options => { + "t|try!" => \$try, + }, +); + +sub usage { + print STDERR "Usage: pristine-xz [-vdkt] gendelta file.xz delta\n"; + print STDERR " pristine-xz [-vdkt] genxz delta file\n"; +} + +sub assign_fields { + my ($hash, $labels, $fields) = @_; + @$hash{@$labels} = @$fields[1..scalar(@$labels)]; +} + +sub scan_xz_lvv_robot { + my ($filename) = @_; + # We need at least version 5.0 to get a proper '-lvv --robot' + # implemented + my $cmd = "xz -lvv --robot $filename"; + my $ret = open (my $in, "$cmd |") || die "$cmd failed: $!"; + my %xz = (file => {}, stream => {}, blocks => [], + summary => {}, totals => {}); + my (%file, %stream, @blocks, %summary, %totals); + my @file_labels = qw{nb_streams nb_blocks compressed uncompressed + ratio checks padding_size}; + my @stream_labels = + qw{stream_num nb_blocks compressed_offset uncompressed_offset + compressed_size uncompressed_size ratio check_name + padding_size}; + my @block_labels = + qw{stream_num block_in_stream block_in_file compressed_offset + uncompressed_offset compressed_size uncompressed_size ratio + check_name check_value header_size size_present_flags + actual_compressed_size uncompress_memory filter_chain}; + my @summary_labels = qw{uncompressed_memory size_in_blocks}; + my @totals_labels = + qw{nb_streams nb_blocks compressed_size uncompressed_size ratio + check_names padding_size nb_files uncompressed_memory + size_in_blocks}; + + while (my $line = <$in>) { + chomp $line; + my @fields = split(/\t/, $line); + if ($fields[0] eq 'name') { + next; + } + if ($fields[0] eq 'file') { + assign_fields($xz{file}, \@file_labels, \@fields); + next; + } + if ($fields[0] eq 'stream') { + assign_fields($xz{stream}, \@stream_labels, \@fields); + next; + } + if ($fields[0] eq 'block') { + my %block; + assign_fields(\%block, \@block_labels, \@fields); + push @{$xz{blocks}}, \%block; + next; + } + if ($fields[0] eq 'summary') { + assign_fields($xz{summary}, \@summary_labels, \@fields); + next; + } + if ($fields[0] eq 'totals') { + assign_fields($xz{totals}, \@totals_labels, \@fields); + next; + } + } + close $in; + return \%xz; +} + +sub predict_xz_args { + my ($xz) = @_; + my $presets = undef; + my $block_list = undef; + my $blocks = $xz->{blocks}; + if (scalar(@$blocks)) { + # There is at least one block. We assume the same compression + # level for all blocks + my $block = $blocks->[0]; + my @filters = split(/,/, $block->{filter_chain}); + if (scalar(@filters) != 1 || $filters[0] !~ /^--lzma2=/) { + die "Only LZMA2 is supported"; + } + # Deduce the presets from the dict size + if ($filters[0] =~ /--lzma2=dict=(.*)/) { + my $dict_size = $1; + my %lzma2_presets_from_dict_size_of = + ('256KiB' => ['0'], + '1Mib' => ['1'], + '2MiB' => ['2'], + '4MiB' => ['4', '3'], + # Put 6 before 5 as it's the default and is + # more likely to be right + '8MiB' => ['6', '5'], + '16MiB' => ['7'], + '32MiB' => ['8'], + '64MiB' => ['9'], + ); + $presets = $lzma2_presets_from_dict_size_of{$dict_size}; + die "Unkown dict size: $dict_size\n" + if (!defined($presets)); + } + if (scalar(@$blocks) > 1) { + # Gather the block uncompressed sizes + $block_list = join(',', map {$_->{uncompressed_size}} + @$blocks); + } + } + my %check_kwd_of = + (None => 'none', + CRC32 => 'crc32', + CRC64 => 'crc64', + 'SHA-256' => 'sha256', + ); + my $check_name = $xz->{stream}->{check_name}; + my $check_kwd = $check_kwd_of{$check_name}; + die "Unknown xz check: $check_name\n" if (!defined($check_kwd)); + + my $possible_args = []; + my $common = ["--check=$check_kwd", "-z"]; + if (defined($block_list)) { + unshift @$common, "--block-list=$block_list"; + } + foreach my $preset (@$presets) { + push @$possible_args, [@$common, "-$preset"]; + push @$possible_args, [@$common, "-${preset}e"]; + } + return $possible_args; +} + +sub readxz { + my $filename = shift; + + if (! is_xz($filename)) { + error "This is not a valid xz archive."; + } + + # This will guess the compression level, check and blocks from the file. + # More info is still needed if the level used was 3/4 or 5/6 (see + # lzma2_presets_from_dict_size_of in predict_xz_args) or if --extreme + # was used. We output possible args for each combination in this case. + my $xz = scan_xz_lvv_robot($filename); + my $possible_args = predict_xz_args($xz); + return $possible_args; +} + +sub predictxzlevels { + my $filename = shift; + + if (! is_xz($filename)) { + error "This is not a valid xz archive."; + } + + # XXX We don't currently have a way to guess the level from the + # file format, as this level only presets several other tunables. + # Correct handling would involve finding as many preset values as + # possible, and reconstructing the compression level from that. + # + # So far in the wild only these levels have been seen. + # (Note that level 9 can use a lot of memory.) + my $possible_levels = ["6", "9", "0", "6e", "9e", "0e"]; + + return ($possible_levels); +} + +sub predictxzargs { + my ($possible_levels, $program) = @_; + + my @args; + foreach my $level (@$possible_levels) { + push @args, ["-z", "-$level"]; + push @args, ["-z", "-$level", "--check=crc32"]; + push @args, ["-z", "-$level", "--check=sha256"]; + } + return @args; +} + +sub testvariant { + my ($old, $tmpin, $xz_program, @args) = @_; + + my $new=$tmpin.'.xz'; + unlink($new); + + # Note that file name, mode, mtime do not matter to xz. + + # try xz'ing with the arguments passed + doit_redir($tmpin, $new, $xz_program, @args); + + unless (-e $new) { + die("$xz_program failed, aborting"); + } + + # and compare the generated with the original + return !comparefiles($old, $new); +} + +sub reproducexz { + my $orig=shift; + + my $wd=tempdir(); + + my $tmpin="$wd/test"; + doit_redir($orig, $tmpin, "xz", "-dc"); + + # read fields from xz headers + my $possible_args; + eval { + $possible_args = readxz($orig); + }; + # If we get an error we fallback to guessing, otherwise, we should + # succeed with one of the proposed combinations + if (! $@) { + foreach my $program (@supported_xz_programs) { + foreach my $args (@$possible_args) { + testvariant($orig, $tmpin, $program, @$args) + && return $program, @$args; + } + } + } + else { + # Fallback to guessing + my ($possible_levels) = predictxzlevels($orig); + + foreach my $program (@supported_xz_programs) { + # try to guess the xz arguments that are needed + foreach my $args (predictxzargs($possible_levels, + $program)) { + testvariant($orig, $tmpin, $program, @$args) + && return $program, @$args; + } + } + } + + print STDERR "pristine-xz failed to reproduce build of $orig\n"; + print STDERR "(Please file a bug report.)\n"; + exit 1; +} + +sub genxz { + my $deltafile=shift; + my $file=shift; + + my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile); + Pristine::Tar::Delta::assert($delta, type => "xz", maxversion => 2, + fields => [qw{params program}]); + + my @params=split(' ', $delta->{params}); + while (@params) { + my $param=shift @params; + + next if $param=~/^(-[0-9]e?)$/; + next if $param eq '-z'; + next if $param eq '--check=none'; + next if $param eq '--check=crc32'; + next if $param eq '--check=crc64'; + next if $param eq '--check=sha256'; + next if $param=~/^(--block-list=[0-9,]+)$/; + die "paranoia check failed on params from delta (@params)"; + } + @params=split(' ', $delta->{params}); + + my $program=$delta->{program}; + if (! grep { $program eq $_ } @supported_xz_programs) { + die "paranoia check failed on program from delta ($program)"; + } + + doit($program, @params, $file); +} + +sub gendelta { + my $xzfile=shift; + my $deltafile=shift; + + my ($program, @params) = reproducexz($xzfile); + + Pristine::Tar::Delta::write(Tarball => $deltafile, { + version => '2.0', + type => 'xz', + params => "@params", + program => $program, + }); +} diff --git a/pristine-xz.1 b/pristine-xz.1 new file mode 100644 index 0000000..ee3c8f2 --- /dev/null +++ b/pristine-xz.1 @@ -0,0 +1,184 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "PRISTINE-XZ 1" +.TH PRISTINE-XZ 1 "2012-07-16" "perl v5.14.2" "pristine-xz" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +pristine\-xz \- regenerate pristine xz files +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +\&\fBpristine-xz\fR [\-vdk] gendelta \fIfile.xz\fR \fIdelta\fR +.PP +\&\fBpristine-xz\fR [\-vdk] genxz \fIdelta\fR \fIfile\fR +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +This is a complement to the \fIpristine\-tar\fR\|(1) command. Normally you +don't need to run it by hand, since pristine-tar calls it as necessary +to handle .tar.xz files. +.PP +pristine-xz gendelta takes the specified \fIxz\fR file, and generates a +small binary \fIdelta\fR file that can later be used by pristine-xz genxz +to recreate the original file. +.PP +pristine-xz genxz takes the specified \fIdelta\fR file, and compresses the +specified input \fIfile\fR (which must be identical to the contents of the +original xz file). The resulting file will be identical to +the original gz file used to create the delta. +.PP +The approach used to regenerate the original xz file is to figure out +how it was produced \*(-- what compression level was used, etc. Currently +support is poor for xz files produced with unusual compression options. +.PP +If the delta filename is \*(L"\-\*(R", pristine-xz reads or writes it to stdio. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\-v" 4 +.IX Item "-v" +Verbose mode, show each command that is run. +.IP "\-d" 4 +.IX Item "-d" +Debug mode. +.IP "\-k" 4 +.IX Item "-k" +Don't clean up the temporary directory on exit. +.IP "\-t" 4 +.IX Item "-t" +Try harder to determine how to generate deltas of difficult xz files. +.SH "ENVIRONMENT" +.IX Header "ENVIRONMENT" +.IP "\fB\s-1TMPDIR\s0\fR" 4 +.IX Item "TMPDIR" +Specifies a location to place temporary files, other than the default. +.SH "AUTHOR" +.IX Header "AUTHOR" +Joey Hess , +Faidon Liambotis , +Cyril Brulebois +.PP +Licensed under the \s-1GPL\s0, version 2. diff --git a/zgz.1 b/zgz.1 new file mode 100644 index 0000000..ca4350d --- /dev/null +++ b/zgz.1 @@ -0,0 +1,158 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "ZGZ 1" +.TH ZGZ 1 "2012-03-18" "perl v5.14.2" "zgz" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +zgz \- Frankenstein's compressor +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +\&\fBzgz\fR [options] < file > file.gz +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +This program is an unholy combination of the \s-1BSD\s0 gzip program, a modified +\&\s-1GNU\s0 gzip that supports setting an arbitrary file name and timestamp, +and an old, rotting version of bzip2 that we dug up somewhere at midnight. +Only the bits to do with file compression were kept. +.PP +There are many arcane options which aid pristine-gz(1) in re-animating +files. Use \-\-help to see all the gory details. +.SH "AUTHOR" +.IX Header "AUTHOR" +Matthew R. Green, +Jean-loup Gailly, +Julian R Seward, +Faidon Liambotis, +Josh Triplett, +Joey Hess +.PP +Note that several of the above donors would be very suprised at finding +parts of themselves ... er, their work ... shambling around here. Please +direct correspondance and/or flaming pitchforks to the pristine-tar +maintainers. diff --git a/zgz/gzip/bits.c b/zgz/gzip/bits.c new file mode 100644 index 0000000..26b6ea8 --- /dev/null +++ b/zgz/gzip/bits.c @@ -0,0 +1,180 @@ +/* bits.c -- output variable-length bit strings + + Copyright (C) 1999 Free Software Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + + +/* + * PURPOSE + * + * Output variable-length bit strings. Compression can be done + * to a file or to memory. (The latter is not supported in this version.) + * + * DISCUSSION + * + * The PKZIP "deflate" file format interprets compressed file data + * as a sequence of bits. Multi-bit strings in the file may cross + * byte boundaries without restriction. + * + * The first bit of each byte is the low-order bit. + * + * The routines in this file allow a variable-length bit value to + * be output right-to-left (useful for literal values). For + * left-to-right output (useful for code strings from the tree routines), + * the bits must have been reversed first with bi_reverse(). + * + * For in-memory compression, the compressed bit stream goes directly + * into the requested output buffer. The input data is read in blocks + * by the mem_read() function. The buffer is limited to 64K on 16 bit + * machines. + * + * INTERFACE + * + * void bi_init (FILE *zipfile) + * Initialize the bit string routines. + * + * void send_bits (int value, int length) + * Write out a bit string, taking the source bits right to + * left. + * + * int bi_reverse (int value, int length) + * Reverse the bits of a bit string, taking the source bits left to + * right and emitting them right to left. + * + * void bi_windup (void) + * Write out any remaining bits in an incomplete byte. + * + * void copy_block(char *buf, unsigned len, int header) + * Copy a stored block to the zip file, storing first the length and + * its one's complement if requested. + * + */ + +#include "gzip.h" + +/* =========================================================================== + * Local data used by the "bit string" routines. + */ + +static int zfile; /* output gzip file */ + +static unsigned short bi_buf; +/* Output buffer. bits are inserted starting at the bottom (least significant + * bits). + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +static int bi_valid; +/* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +int (*read_buf)(char *buf, unsigned size); +/* Current input function. Set to mem_read for in-memory compression */ + +/* =========================================================================== + * Initialize the bit string routines. + */ +void bi_init (int zipfile) /* output zip file, NO_FILE for in-memory compression */ +{ + zfile = zipfile; + bi_buf = 0; + bi_valid = 0; + + /* Set the defaults for file compression. They are set by memcompress + * for in-memory compression. + */ + if (zfile != NO_FILE) { + read_buf = file_read; + } +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +void send_bits(int value, /* value to send */ + int length) /* number of bits */ +{ + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (bi_valid > (int)Buf_size - length) { + bi_buf |= (value << bi_valid); + put_short(bi_buf); + bi_buf = (ush)value >> (Buf_size - bi_valid); + bi_valid += length - Buf_size; + } else { + bi_buf |= value << bi_valid; + bi_valid += length; + } +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +unsigned bi_reverse(unsigned code, /* the value to invert */ + int len) /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Write out any remaining bits in an incomplete byte. + */ +void bi_windup(void) +{ + if (bi_valid > 8) { + put_short(bi_buf); + } else if (bi_valid > 0) { + put_byte(bi_buf); + } + bi_buf = 0; + bi_valid = 0; +} + +/* =========================================================================== + * Copy a stored block to the zip file, storing first the length and its + * one's complement if requested. + */ +void copy_block(char *buf, /* the input data */ + unsigned len, /* its length */ + int header) /* true if block header must be written */ +{ + bi_windup(); /* align on byte boundary */ + + if (header) { + put_short((ush)len); + put_short((ush)~len); + } + while (len--) { + put_byte(*buf++); + } +} diff --git a/zgz/gzip/deflate.c b/zgz/gzip/deflate.c new file mode 100644 index 0000000..780e500 --- /dev/null +++ b/zgz/gzip/deflate.c @@ -0,0 +1,792 @@ +/* deflate.c -- compress data using the deflation algorithm + + Copyright (C) 1999, 2006 Free Software Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* + * PURPOSE + * + * Identify new text as repetitions of old text within a fixed- + * length sliding window trailing behind the new text. + * + * DISCUSSION + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many info-zippers for bug reports and testing. + * + * REFERENCES + * + * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + * INTERFACE + * + * void lm_init (int pack_level, ush *flags) + * Initialize the "longest match" routines for a new file + * + * void deflate (void) + * Processes a new input file. Sets the compressed length, crc, + * deflate flags and internal file attributes. + */ + +#include + +#include "gzip.h" + +/* =========================================================================== + * Configuration parameters + */ + +#define HASH_BITS 15 + +#define HASH_SIZE (unsigned)(1<= HASH_BITS + */ + +unsigned int prev_length; +/* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + +unsigned strstart; /* start of string to insert */ +unsigned match_start; /* start of matching string */ +static int eofile; /* flag set at end of input file */ +static unsigned lookahead; /* number of valid bytes ahead in window */ + +unsigned max_chain_length; +/* To speed up deflation, hash chains are never searched beyond this length. + * A higher limit improves compression ratio but degrades the speed. + */ + +static unsigned int max_lazy_match; +/* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +#define max_insert_length max_lazy_match +/* Insert new strings in the hash table only if the match length + * is not greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + +static unsigned good_match; +/* Use a faster search when the previous match is longer than this */ + +static ulg rsync_sum; /* rolling sum of rsync window */ +static ulg rsync_chunk_end; /* next rsync sequence point */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ + +typedef struct config { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; +} config; + +#ifdef FULL_SEARCH +# define nice_match MAX_MATCH +#else + int nice_match; /* Stop searching when current match exceeds this */ +#endif + +static config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0}, /* store only */ +/* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8}, +/* 3 */ {4, 6, 32, 32}, + +/* 4 */ {4, 4, 16, 16}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32}, +/* 6 */ {8, 16, 128, 128}, +/* 7 */ {8, 32, 128, 256}, +/* 8 */ {32, 128, 258, 1024}, +/* 9 */ {32, 258, 258, 4096}}; /* maximum compression */ + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Prototypes for local functions. + */ +static void fill_window(void); + + int longest_match(IPos cur_match); + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(h,c) (h = (((h)< 9) gzip_error ("bad pack level"); + + /* Initialize the hash table. */ + memzero((char*)head, HASH_SIZE*sizeof(*head)); + /* prev will be initialized on the fly */ + + /* rsync params */ + rsync_chunk_end = 0xFFFFFFFFUL; + rsync_sum = 0; + + /* Set the default configuration parameters: + */ + max_lazy_match = configuration_table[pack_level].max_lazy; + good_match = configuration_table[pack_level].good_length; +#ifndef FULL_SEARCH + nice_match = configuration_table[pack_level].nice_length; +#endif + max_chain_length = configuration_table[pack_level].max_chain; + if (pack_level == 1) { + *flags |= FAST; + } else if (pack_level == 9) { + *flags |= SLOW; + } + /* ??? reduce max_chain_length for binary files */ + + strstart = 0; + block_start = 0L; + + lookahead = read_buf((char*)window, + sizeof(int) <= 2 ? (unsigned)WSIZE : 2*WSIZE); + + if (lookahead == 0 || lookahead == (unsigned)EOF) { + eofile = 1, lookahead = 0; + return; + } + eofile = 0; + /* Make sure that we always have enough lookahead. This is important + * if input comes from a device such as a tty. + */ + while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); + + ins_h = 0; + for (j=0; j= 1 + */ +int longest_match(IPos cur_match) +{ + unsigned chain_length = max_chain_length; /* max hash chain length */ + register uch *scan = window + strstart; /* current string */ + register uch *match; /* matched string */ + register int len; /* length of current match */ + int best_len = prev_length; /* best match length so far */ + IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + +/* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ +#if HASH_BITS < 8 || MAX_MATCH != 258 + error: Code too clever +#endif + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register uch *strend = window + strstart + MAX_MATCH - 1; + register ush scan_start = *(ush*)scan; + register ush scan_end = *(ush*)(scan+best_len-1); +#else + register uch *strend = window + strstart + MAX_MATCH; + register uch scan_end1 = scan[best_len-1]; + register uch scan_end = scan[best_len]; +#endif + + /* Do not waste too much time if we already have a good match: */ + if (prev_length >= good_match) { + chain_length >>= 2; + } + Assert(strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead"); + + do { + Assert(cur_match < strstart, "no future"); + match = window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ush*)(match+best_len-1) != scan_end || + *(ush*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + scan++, match++; + do { + } while (*(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= window+(unsigned)(window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ush*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & WMASK]) > limit + && --chain_length != 0); + + return best_len; +} + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead, and sets eofile if end of input file. + * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 + * OUT assertions: at least one byte has been read, or eofile is set; + * file reads are performed for at least two bytes (required for the + * translate_eol option). + */ +static void fill_window(void) +{ + register unsigned n, m; + unsigned more = (unsigned)(window_size - (ulg)lookahead - (ulg)strstart); + /* Amount of free space at the end of the window. */ + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (more == (unsigned)EOF) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + } else if (strstart >= WSIZE+MAX_DIST) { + /* By the IN assertion, the window is not empty so we can't confuse + * more == 0 with more == 64K on a 16 bit machine. + */ + Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM"); + + memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE); + match_start -= WSIZE; + strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ + if (rsync_chunk_end != 0xFFFFFFFFUL) + rsync_chunk_end -= WSIZE; + + block_start -= (long) WSIZE; + + for (n = 0; n < HASH_SIZE; n++) { + m = head[n]; + head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); + } + for (n = 0; n < WSIZE; n++) { + m = prev[n]; + prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } + more += WSIZE; + } + /* At this point, more >= 2 */ + if (!eofile) { + n = read_buf((char*)window+strstart+lookahead, more); + if (n == 0 || n == (unsigned)EOF) { + eofile = 1; + } else { + lookahead += n; + } + } +} + +static void rsync_roll(unsigned start, unsigned num) +{ + unsigned i; + + if (start < RSYNC_WIN) { + /* before window fills. */ + for (i = start; i < RSYNC_WIN; i++) { + if (i == start + num) return; + rsync_sum += (ulg)window[i]; + } + num -= (RSYNC_WIN - start); + start = RSYNC_WIN; + } + + /* buffer after window full */ + for (i = start; i < start+num; i++) { + /* New character in */ + rsync_sum += (ulg)window[i]; + /* Old character out */ + rsync_sum -= (ulg)window[i - RSYNC_WIN]; + if (debian_rsyncable && rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH_DEBIAN(rsync_sum)) + rsync_chunk_end = i; + if (! debian_rsyncable && rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum)) + rsync_chunk_end = i; + } +} + +/* =========================================================================== + * Set rsync_chunk_end if window sum matches magic value. + */ +#define RSYNC_ROLL(s, n) \ + do { if (rsync) rsync_roll((s), (n)); } while(0) + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK(eof) \ + flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \ + (char*)NULL, (long)strstart - block_start, flush-1, (eof)) + +/* =========================================================================== + * Processes a new input file and return its compressed length. This + * function does not perform lazy evaluationof matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +static void deflate_fast(int pack_level, int rsync) +{ + IPos hash_head; /* head of the hash chain */ + int flush = 0; /* set if current block must be flushed, 2=>and padded */ + unsigned match_length = 0; /* length of best match */ + + prev_length = MIN_MATCH-1; + while (lookahead != 0) { + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + INSERT_STRING(strstart, hash_head); + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && strstart - hash_head <= MAX_DIST + && strstart <= window_size - MIN_LOOKAHEAD) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + match_length = longest_match (hash_head); + /* longest_match() sets match_start */ + if (match_length > lookahead) match_length = lookahead; + } + if (match_length >= MIN_MATCH) { + flush = ct_tally(pack_level, strstart-match_start, match_length - MIN_MATCH); + + lookahead -= match_length; + + RSYNC_ROLL(strstart, match_length); + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ + if (match_length <= max_insert_length) { + match_length--; /* string at strstart already in hash table */ + do { + strstart++; + INSERT_STRING(strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH + * these bytes are garbage, but it does not matter since + * the next lookahead bytes will be emitted as literals. + */ + } while (--match_length != 0); + strstart++; + } else { + strstart += match_length; + match_length = 0; + ins_h = window[strstart]; + UPDATE_HASH(ins_h, window[strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c",window[strstart])); + flush = ct_tally (pack_level, 0, window[strstart]); + RSYNC_ROLL(strstart, 1); + lookahead--; + strstart++; + } + if (rsync && debian_rsyncable && strstart > rsync_chunk_end) { + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 2; + } + if (rsync && ! debian_rsyncable && strstart > rsync_chunk_end) { + flush = 1; + ct_init(); + rsync_chunk_end = 0xFFFFFFFFUL; + } + if (flush) FLUSH_BLOCK(0), block_start = strstart; + + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); + + } + FLUSH_BLOCK(1); /* eof */ +} + +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +void gnu_deflate(int pack_level, int rsync, int newrsync) +{ + IPos hash_head; /* head of hash chain */ + IPos prev_match; /* previous match */ + int flush = 0; /* set if current block must be flushed */ + int match_available = 0; /* set if previous match exists */ + register unsigned match_length = MIN_MATCH-1; /* length of best match */ + + /* The newrsync mode superscedes Debian's old, somewhat broken + * rsync patch. */ + if (newrsync) { + rsync=1; + disable_debian_rsyncable(); + } + + if (pack_level <= 3) { + deflate_fast(pack_level, rsync); /* optimized for speed */ + return; + } + + /* Process the input block. */ + while (lookahead != 0) { + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + INSERT_STRING(strstart, hash_head); + + /* Find the longest match, discarding those <= prev_length. + */ + prev_length = match_length, prev_match = match_start; + match_length = MIN_MATCH-1; + + if (hash_head != NIL && prev_length < max_lazy_match && + strstart - hash_head <= MAX_DIST && + strstart <= window_size - MIN_LOOKAHEAD) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + match_length = longest_match (hash_head); + /* longest_match() sets match_start */ + if (match_length > lookahead) match_length = lookahead; + + /* Ignore a length 3 match if it is too distant: */ + if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){ + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + match_length--; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (prev_length >= MIN_MATCH && match_length <= prev_length) { + flush = ct_tally(pack_level, strstart-1-prev_match, prev_length - MIN_MATCH); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. + */ + lookahead -= prev_length-1; + prev_length -= 2; + RSYNC_ROLL(strstart, prev_length+1); + do { + strstart++; + INSERT_STRING(strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH + * these bytes are garbage, but it does not matter since the + * next lookahead bytes will always be emitted as literals. + */ + } while (--prev_length != 0); + match_available = 0; + match_length = MIN_MATCH-1; + strstart++; + + if (rsync && debian_rsyncable && strstart > rsync_chunk_end) { + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 2; + } + if (rsync && ! debian_rsyncable && strstart > rsync_chunk_end) { + ct_init(); + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 1; + } + if (flush) FLUSH_BLOCK(0), block_start = strstart; + } else if (match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c",window[strstart-1])); + flush = ct_tally (pack_level, 0, window[strstart-1]); + if (rsync && debian_rsyncable && strstart > rsync_chunk_end) { + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 2; + } + if (rsync && ! debian_rsyncable && strstart > rsync_chunk_end) { + ct_init(); + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 1; + } + if (flush) FLUSH_BLOCK(0), block_start = strstart; + RSYNC_ROLL(strstart, 1); + strstart++; + lookahead--; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + if (rsync && debian_rsyncable && strstart > rsync_chunk_end) { + /* Reset huffman tree */ + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 2; + FLUSH_BLOCK(0), block_start = strstart; + } + if (rsync && ! debian_rsyncable && strstart > rsync_chunk_end) { + ct_init(); + /* Reset huffman tree */ + rsync_chunk_end = 0xFFFFFFFFUL; + FLUSH_BLOCK(0), block_start = strstart; + } + match_available = 1; + RSYNC_ROLL(strstart, 1); + strstart++; + lookahead--; + } + /* Assert (strstart <= bytes_in && lookahead <= bytes_in, "a bit too far"); */ + + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); + } + if (match_available) ct_tally (pack_level, 0, window[strstart-1]); + + FLUSH_BLOCK(1); /* eof */ +} diff --git a/zgz/gzip/gzip.c b/zgz/gzip/gzip.c new file mode 100644 index 0000000..a9a39dc --- /dev/null +++ b/zgz/gzip/gzip.c @@ -0,0 +1,120 @@ +/* zip.c -- compress files to the gzip or pkzip format + + Copyright (C) 1997, 1998, 1999, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include +#include +#include + +#include "gzip.h" + +uch outbuf[OUTBUFSIZ]; +unsigned outcnt; /* bytes in output buffer */ + +static ulg crc; /* crc on uncompressed file data */ + +static int ifd; /* input file descriptor */ +static int ofd; /* output file descriptor */ + +static off_t bytes_in; /* number of input bytes */ + +/* =========================================================================== + * Deflate in to out. + * IN assertions: the input and output buffers are cleared. + */ +void gnuzip(int in, int out, char *origname, ulg timestamp, int level, int osflag, int rsync, int newrsync) +{ + uch flags = 0; /* general purpose bit flags */ + ush deflate_flags = 0; /* pkzip -es, -en or -ex equivalent */ + + ifd = in; + ofd = out; + outcnt = 0; + bytes_in = 0L; + + /* Write the header to the gzip file. */ + + put_byte(GZIP_MAGIC[0]); /* magic header */ + put_byte(GZIP_MAGIC[1]); + put_byte(DEFLATED); /* compression method */ + + if (origname) + flags |= ORIG_NAME; + put_byte(flags); /* general flags */ + put_long(timestamp); + + /* Write deflated file to zip file */ + crc = updcrc(0, 0); + + bi_init(out); + ct_init(); + lm_init(level, &deflate_flags); + + put_byte((uch)deflate_flags); /* extra flags */ + put_byte(osflag); /* OS identifier */ + + if (origname) { + char *p = origname; + do { + put_byte(*p); + } while (*p++); + } + + gnu_deflate(level, rsync, newrsync); + + /* Write the crc and uncompressed size */ + put_long(crc); + put_long((ulg)bytes_in); + + flush_outbuf(); +} + + +/* =========================================================================== + * Read a new buffer from the current input file, perform end-of-line + * translation, and update the crc and input file size. + * IN assertion: size >= 2 (for end-of-line translation) + */ +int file_read(char *buf, unsigned size) +{ + unsigned len; + + len = read_buffer (ifd, buf, size); + if (len == 0) return (int)len; + if (len == (unsigned)-1) { + read_error(); + return EOF; + } + + crc = updcrc((uch*)buf, len); + bytes_in += (off_t)len; + return (int)len; +} + +/* =========================================================================== + * Write the output buffer outbuf[0..outcnt-1]. + * (used for the compressed data only) + */ +void flush_outbuf(void) +{ + if (outcnt == 0) return; + + write_buf(ofd, (char *)outbuf, outcnt); + outcnt = 0; +} diff --git a/zgz/gzip/gzip.h b/zgz/gzip/gzip.h new file mode 100644 index 0000000..267a2ec --- /dev/null +++ b/zgz/gzip/gzip.h @@ -0,0 +1,154 @@ +/* gzip.h -- common declarations for all gzip modules + + Copyright (C) 1997, 1998, 1999, 2001, 2006, 2007 Free Software + Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly. + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#define BITS 16 + +#ifndef __attribute__ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__ +# define __attribute__(x) +# endif +#endif + +#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__)) + +/* I don't like nested includes, but the following headers are used + * too often + */ +#include +#include /* for off_t */ +#include +#include +#define memzero(s, n) memset ((s), 0, (n)) + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +/* Return codes from gzip */ +#define OK 0 +#define ERROR 1 + +/* Compression methods */ +#define DEFLATED 8 + +#define INBUFSIZ 0x8000 /* input buffer size */ +#define OUTBUFSIZ 16384 /* output buffer size */ +#define DIST_BUFSIZE 0x8000 /* buffer for distances, see trees.c */ + +extern uch outbuf[]; /* output buffer */ + +extern unsigned outcnt; /* bytes in output buffer */ + +#define NO_FILE (-1) /* in memory compression */ + + +#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */ +#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x */ +#define PKZIP_MAGIC "\120\113\003\004" /* Magic header for pkzip files */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +/* internal file attribute */ +#define UNKNOWN 0xffff +#define BINARY 0 +#define ASCII 1 + +#define WSIZE 0x8000 /* window size--must be a power of two, and */ + /* at least 32K for zip's deflate method */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST (WSIZE-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +/* put_byte is used for the compressed output. */ +#define put_byte(c) {outbuf[outcnt++]=(uch)(c); if (outcnt==OUTBUFSIZ)\ + flush_outbuf();} + +/* Output a 16 bit value, lsb first */ +#define put_short(w) \ +{ if (outcnt < OUTBUFSIZ-2) { \ + outbuf[outcnt++] = (uch) ((w) & 0xff); \ + outbuf[outcnt++] = (uch) ((ush)(w) >> 8); \ + } else { \ + put_byte((uch)((w) & 0xff)); \ + put_byte((uch)((ush)(w) >> 8)); \ + } \ +} + +/* Output a 32 bit value to the bit stream, lsb first */ +#define put_long(n) { \ + put_short((n) & 0xffff); \ + put_short(((ulg)(n)) >> 16); \ +} + +/* Diagnostic functions */ +#define Assert(cond,msg) +#define Trace(x) +#define Tracev(x) +#define Tracevv(x) +#define Tracec(c,x) +#define Tracecv(c,x) + + /* in zip.c: */ +extern int file_read(char *buf, unsigned size); + + /* in deflate.c */ +void lm_init(int pack_level, ush *flags); +void gnu_deflate(int pack_level, int rsync, int newrsync); + + /* in trees.c */ +void ct_init(void); +int ct_tally(int pack_level, int dist, int lc); +void flush_block(char *buf, ulg stored_len, int pad, int eof); + + /* in bits.c */ +void bi_init(int zipfile); +void send_bits(int value, int length); +unsigned bi_reverse(unsigned value, int length); +void bi_windup(void); +void copy_block(char *buf, unsigned len, int header); +extern int (*read_buf)(char *buf, unsigned size); + + /* in util.c: */ +extern ulg updcrc(uch *s, unsigned n); +extern void flush_outbuf(void); +extern void write_buf(int fd, void *buf, unsigned cnt); +extern int read_buffer(int fd, void *buf, unsigned int cnt); +extern void gzip_error(char *m); +extern void read_error(void); +extern void write_error(void); diff --git a/zgz/gzip/trees.c b/zgz/gzip/trees.c new file mode 100644 index 0000000..13fdcf3 --- /dev/null +++ b/zgz/gzip/trees.c @@ -0,0 +1,978 @@ +/* trees.c -- output deflated data using Huffman coding + + Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* + * PURPOSE + * + * Encode various sets of source values using variable-length + * binary code trees. + * + * DISCUSSION + * + * The PKZIP "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in the ZIP file in a compressed form + * which is itself a Huffman encoding of the lengths of + * all the code strings (in ascending order by source values). + * The actual code strings are reconstructed from the lengths in + * the UNZIP process, as described in the "application note" + * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. + * + * REFERENCES + * + * Lynch, Thomas J. + * Data Compression: Techniques and Applications, pp. 53-55. + * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + * + * INTERFACE + * + * void ct_init (void) + * Allocate the match buffer and initialize the various tables + * + * void ct_tally (int pack_level, int dist, int lc); + * Save the match info and tally the frequency counts. + * + * void flush_block (char *buf, ulg stored_len, int pad, int eof) + * Determine the best encoding for the current block: dynamic trees, + * static trees or store, and output the encoded block to the zip + * file. If pad is set, pads the block to the next byte. + * */ + +#include + +#include "gzip.h" + +/* =========================================================================== + * Constants + */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + + +static int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +static int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define LIT_BUFSIZE 0x8000 +/* Sizes of match buffers for literals/lengths and distances. There are + * 4 reasons for limiting LIT_BUFSIZE to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input data is + * still in the window so we can still emit a stored block even when input + * comes from standard input. (This can also be done for all blocks if + * LIT_BUFSIZE is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting trees + * more frequently. + * - I can't count above 4 + * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save + * memory at the expense of compression). Some optimizations would be possible + * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. + */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +/* =========================================================================== + * Local data + */ + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +static ct_data dyn_ltree[HEAP_SIZE]; /* literal and length tree */ +static ct_data dyn_dtree[2*D_CODES+1]; /* distance tree */ + +static ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see ct_init + * below). + */ + +static ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +static ct_data bl_tree[2*BL_CODES+1]; +/* Huffman tree for the bit lengths */ + +typedef struct tree_desc { + ct_data *dyn_tree; /* the dynamic tree */ + ct_data *static_tree; /* corresponding static tree or NULL */ + int *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ + int max_code; /* largest code with non zero frequency */ +} tree_desc; + +static tree_desc l_desc = +{dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0}; + +static tree_desc d_desc = +{dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0}; + +static tree_desc bl_desc = +{bl_tree, (ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0}; + + +static ush bl_count[MAX_BITS+1]; +/* number of codes at each bit length for an optimal tree */ + +static uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +static int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ +static int heap_len; /* number of elements in the heap */ +static int heap_max; /* element of largest frequency */ +/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + +static uch depth[2*L_CODES+1]; +/* Depth of each subtree used as tie breaker for trees of equal frequency */ + +static uch length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +static uch dist_code[512]; +/* distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +static int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +static int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +static uch l_buf[INBUFSIZ]; /* buffer for literals or lengths */ +static ush d_buf[DIST_BUFSIZE]; /* buffer for distances */ + +static uch flag_buf[(LIT_BUFSIZE/8)]; +/* flag_buf is a bit array distinguishing literals from lengths in + * l_buf, thus indicating the presence or absence of a distance. + */ + +static unsigned last_lit; /* running index in l_buf */ +static unsigned last_dist; /* running index in d_buf */ +static unsigned last_flags; /* running index in flag_buf */ +static uch flags; /* current flags not yet saved in flag_buf */ +static uch flag_bit; /* current bit used in flags */ +/* bits are filled in flags starting at bit 0 (least significant). + * Note: these flags are overkill in the current code since we don't + * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. + */ + +static ulg opt_len; /* bit length of current block with optimal trees */ +static ulg static_len; /* bit length of current block with static trees */ + +static off_t compressed_len; /* total bit length of compressed file */ + +extern long block_start; /* window offset of current block */ +extern unsigned strstart; /* window offset of current string */ + +/* =========================================================================== + * Local (static) routines in this file. + */ + +static void init_block(void); +static void pqdownheap(ct_data *tree, int k); +static void gen_bitlen(tree_desc *desc); +static void gen_codes(ct_data *tree, int max_code); +static void build_tree(tree_desc *desc); +static void scan_tree(ct_data *tree, int max_code); +static void send_tree(ct_data *tree, int max_code); +static int build_bl_tree(void); +static void send_all_trees(int lcodes, int dcodes, int blcodes); +static void compress_block(ct_data *ltree, ct_data *dtree); + +#define send_code(c, tree) send_bits(tree[c].Code, tree[c].Len) +/* Send a code of the given tree. c and tree must not have side effects */ + +#define d_code(dist) \ + ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. dist_code[256] and dist_code[257] are never + * used. + */ + +#define MAX(a,b) (a >= b ? a : b) +/* the arguments must not have side effects */ + +/* =========================================================================== + * Allocate the match buffer and initialize the various tables + */ +void ct_init(void) +{ + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + + compressed_len = 0L; + + if (static_dtree[0].Len != 0) return; /* ct_init already called */ + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "ct_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse(n, 5); + } + + /* Initialize the first block of the first file: */ + init_block(); +} + +/* =========================================================================== + * Initialize a new block. + */ +static void init_block(void) +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) bl_tree[n].Freq = 0; + + dyn_ltree[END_BLOCK].Freq = 1; + opt_len = static_len = 0L; + last_lit = last_dist = last_flags = 0; + flags = 0; flag_bit = 1; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(tree, top) \ +{\ + top = heap[SMALLEST]; \ + heap[SMALLEST] = heap[heap_len--]; \ + pqdownheap(tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +static void pqdownheap(ct_data *tree, /* the tree to restore */ + int k) /* node to move down */ +{ + int v = heap[k]; + int j = k << 1; /* left son of k */ + while (j <= heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < heap_len && smaller(tree, heap[j+1], heap[j])) j++; + + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, heap[j])) break; + + /* Exchange v with the smallest son */ + heap[k] = heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +static void gen_bitlen(tree_desc *desc) +{ + ct_data *tree = desc->dyn_tree; + int *extra = desc->extra_bits; + int base = desc->extra_base; + int max_code = desc->max_code; + int max_length = desc->max_length; + ct_data *stree = desc->static_tree; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[heap[heap_max]].Len = 0; /* root of the heap */ + + for (h = heap_max+1; h < HEAP_SIZE; h++) { + n = heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + opt_len += (ulg)f * (bits + xbits); + if (stree) static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (bl_count[bits] == 0) bits--; + bl_count[bits]--; /* move one leaf down the tree */ + bl_count[bits+1] += 2; /* move one overflow item as its brother */ + bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = bl_count[bits]; + while (n != 0) { + m = heap[--h]; + if (m > max_code) continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + opt_len += ((long)bits-(long)tree[m].Len)*(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +static void gen_codes (ct_data *tree, /* the tree to decorate */ + int max_code) /* largest code with non zero frequency */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + ct_data *stree = desc->static_tree; + int elems = desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node = elems; /* next internal node of the tree */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + heap_len = 0, heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + heap[++heap_len] = max_code = n; + depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (heap_len < 2) { + int new = heap[++heap_len] = (max_code < 2 ? ++max_code : 0); + tree[new].Freq = 1; + depth[new] = 0; + opt_len--; if (stree) static_len -= stree[new].Len; + /* new is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = heap_len/2; n >= 1; n--) pqdownheap(tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + do { + pqremove(tree, n); /* n = node of least frequency */ + m = heap[SMALLEST]; /* m = node of next least frequency */ + + heap[--heap_max] = n; /* keep the nodes sorted by frequency */ + heap[--heap_max] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + depth[node] = (uch) (MAX(depth[n], depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + heap[SMALLEST] = node++; + pqdownheap(tree, SMALLEST); + + } while (heap_len >= 2); + + heap[--heap_max] = heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen((tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. Updates opt_len to take into account the repeat + * counts. (The contribution of the bit length codes will be added later + * during the construction of bl_tree.) + */ +static void scan_tree (ct_data *tree, /* the tree to be scanned */ + int max_code) /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) bl_tree[curlen].Freq++; + bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + bl_tree[REPZ_3_10].Freq++; + } else { + bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +static void send_tree (ct_data *tree, /* the tree to be scanned */ + int max_code) /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(curlen, bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(curlen, bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(REP_3_6, bl_tree); send_bits(count-3, 2); + + } else if (count <= 10) { + send_code(REPZ_3_10, bl_tree); send_bits(count-3, 3); + + } else { + send_code(REPZ_11_138, bl_tree); send_bits(count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +static int build_bl_tree(void) +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree((ct_data *)dyn_ltree, l_desc.max_code); + scan_tree((ct_data *)dyn_dtree, d_desc.max_code); + + /* Build the bit length tree: */ + build_tree((tree_desc *)(&bl_desc)); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %lu, stat %lu", opt_len, static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +static void send_all_trees(int lcodes, int dcodes, int blcodes) +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(dcodes-1, 5); + send_bits(blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(bl_tree[bl_order[rank]].Len, 3); + } + + send_tree((ct_data *)dyn_ltree, lcodes-1); /* send the literal tree */ + + send_tree((ct_data *)dyn_dtree, dcodes-1); /* send the distance tree */ +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. This function + * returns the total compressed length for the file so far. + */ +void flush_block(char *buf, /* input block, or NULL if too old */ + ulg stored_len, /* length of input block */ + int pad, /* pad output to byte boundary */ + int eof) /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex; /* index of last bit length code of non zero freq */ + + flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ + + /* Construct the literal and distance trees */ + build_tree((tree_desc *)(&l_desc)); + Tracev((stderr, "\nlit data: dyn %lu, stat %lu", opt_len, static_len)); + + build_tree((tree_desc *)(&d_desc)); + Tracev((stderr, "\ndist data: dyn %lu, stat %lu", opt_len, static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(); + + /* Determine the best encoding. Compute first the block length in bytes */ + opt_lenb = (opt_len+3+7)>>3; + static_lenb = (static_len+3+7)>>3; + + Trace((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ", + opt_lenb, opt_len, static_lenb, static_len, stored_len, + last_lit, last_dist)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + send_bits((STORED_BLOCK<<1)+eof, 3); /* send block type */ + compressed_len = (compressed_len + 3 + 7) & ~7L; + compressed_len += (stored_len + 4) << 3; + + copy_block(buf, (unsigned)stored_len, 1); /* with header */ + + } else if (static_lenb == opt_lenb) { + send_bits((STATIC_TREES<<1)+eof, 3); + compress_block((ct_data *)static_ltree, (ct_data *)static_dtree); + compressed_len += 3 + static_len; + } else { + send_bits((DYN_TREES<<1)+eof, 3); + send_all_trees(l_desc.max_code+1, d_desc.max_code+1, max_blindex+1); + compress_block((ct_data *)dyn_ltree, (ct_data *)dyn_dtree); + compressed_len += 3 + opt_len; + } + Assert (compressed_len == bits_sent, "bad compressed size"); + init_block(); + + if (eof) { + /* Assert (input_len == bytes_in, "bad input size"); */ + bi_windup(); + compressed_len += 7; /* align on byte boundary */ + } else if (pad && (compressed_len % 8) != 0) { + send_bits((STORED_BLOCK<<1)+eof, 3); /* send block type */ + compressed_len = (compressed_len + 3 + 7) & ~7L; + copy_block(buf, 0, 1); /* with header */ + } +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ct_tally (int pack_level, /* Compression level, 1 to 9 */ + int dist, /* distance of matched string */ + int lc) /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + l_buf[last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + dyn_ltree[lc].Freq++; + } else { + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "ct_tally: bad match"); + + dyn_ltree[length_code[lc]+LITERALS+1].Freq++; + dyn_dtree[d_code(dist)].Freq++; + + d_buf[last_dist++] = (ush)dist; + flags |= flag_bit; + } + flag_bit <<= 1; + + /* Output the flags if they fill a byte: */ + if ((last_lit & 7) == 0) { + flag_buf[last_flags++] = flags; + flags = 0, flag_bit = 1; + } + /* Try to guess if it is profitable to stop the current block here */ + if (pack_level > 2 && (last_lit & 0xfff) == 0) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)last_lit*8L; + ulg in_length = (ulg)strstart-block_start; + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)dyn_dtree[dcode].Freq*(5L+extra_dbits[dcode]); + } + out_length >>= 3; + Trace((stderr,"\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ", + last_lit, last_dist, in_length, out_length, + 100L - out_length*100L/in_length)); + if (last_dist < last_lit/2 && out_length < in_length/2) return 1; + } + return (last_lit == LIT_BUFSIZE-1 || last_dist == DIST_BUFSIZE); + /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +static void compress_block(ct_data *ltree, /* literal tree */ + ct_data *dtree) /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned dx = 0; /* running index in d_buf */ + unsigned fx = 0; /* running index in flag_buf */ + uch flag = 0; /* current flags */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (last_lit != 0) do { + if ((lx & 7) == 0) flag = flag_buf[fx++]; + lc = l_buf[lx++]; + if ((flag & 1) == 0) { + send_code(lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = length_code[lc]; + send_code(code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(lc, extra); /* send the extra length bits */ + } + dist = d_buf[dx++]; + /* Here, dist is the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + flag >>= 1; + } while (lx < last_lit); + + send_code(END_BLOCK, ltree); +} diff --git a/zgz/gzip/util.c b/zgz/gzip/util.c new file mode 100644 index 0000000..e7e049f --- /dev/null +++ b/zgz/gzip/util.c @@ -0,0 +1,179 @@ +/* util.c -- utility functions for gzip support + + Copyright (C) 1997, 1998, 1999, 2001, 2002, 2006 Free Software + Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + Copyright (C) 2008 Josh Triplett + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include +#include +#include +#include +#include +#include + +#include "gzip.h" + +static int write_buffer(int, void *, unsigned int); + +extern ulg crc_32_tab[]; /* crc table, defined below */ + +/* =========================================================================== + * Run a set of bytes through the crc shift register. If s is a NULL + * pointer, then initialize the crc shift register contents instead. + * Return the current crc in either case. + */ +ulg updcrc(uch *s, unsigned n) +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) do { + c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} + +/* Like the standard read function, except do not attempt to read more + than SSIZE_MAX bytes at a time. */ +int +read_buffer (int fd, void *buf, unsigned int cnt) +{ +#ifdef SSIZE_MAX + if (SSIZE_MAX < cnt) + cnt = (unsigned int)SSIZE_MAX; +#endif + return read (fd, buf, cnt); +} + +/* Likewise for 'write'. */ +static int +write_buffer (int fd, void *buf, unsigned int cnt) +{ +#ifdef SSIZE_MAX + if (SSIZE_MAX < cnt) + cnt = (unsigned int)SSIZE_MAX; +#endif + return write (fd, buf, cnt); +} + +/* =========================================================================== + * Does the same as write(), but also handles partial pipe writes and checks + * for error return. + */ +void write_buf(int fd, void *buf, unsigned cnt) +{ + unsigned n; + + while ((n = write_buffer (fd, buf, cnt)) != cnt) { + if (n == (unsigned)(-1)) { + write_error(); + } + cnt -= n; + buf = ((char*)buf)+n; + } +} + +/* ======================================================================== + * Error handlers. + */ +void +gzip_error (char *m) +{ + fprintf (stderr, "\nzgz: stdin: %s\n", m); + exit(ERROR); +} + +void read_error(void) +{ + if (errno != 0) + perror("\nzgz: stdin"); + else + fprintf(stderr, "\nzgz: stdin: unexpected end of file\n"); + exit(ERROR); +} + +void write_error(void) +{ + perror("\nzgz: stdout"); + exit(ERROR); +} + +/* ======================================================================== + * Table of CRC-32's of all single-byte values (made by makecrc.c) + */ +ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; diff --git a/zgz/old-bzip2/blocksort.c b/zgz/old-bzip2/blocksort.c new file mode 100644 index 0000000..94759aa --- /dev/null +++ b/zgz/old-bzip2/blocksort.c @@ -0,0 +1,1062 @@ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#include "bzlib_private.h" + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +__inline__ +void fallbackSimpleSort ( UInt32* fmap, + UInt32* eclass, + Int32 lo, + Int32 hi ) +{ + Int32 i, j, tmp; + UInt32 ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for ( i = hi-4; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for ( i = hi-1; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define fvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + fswap(fmap[yyp1], fmap[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + + +#define fmin(a,b) ((a) < (b)) ? (a) : (b) + +#define fpush(lz,hz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; } + +#define fpop(lz,hz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; } + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + + +static +void fallbackQSort3 ( UInt32* fmap, + UInt32* eclass, + Int32 loSt, + Int32 hiSt ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m; + Int32 sp, lo, hi; + UInt32 med, r, r3; + Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; + Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush ( loSt, hiSt ); + + while (sp > 0) { + + AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); + + fpop ( lo, hi ); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort ( fmap, eclass, lo, hi ); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) med = eclass[fmap[lo]]; else + if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unLo]] - (Int32)med; + if (n == 0) { + fswap(fmap[unLo], fmap[ltLo]); + ltLo++; unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unHi]] - (Int32)med; + if (n == 0) { + fswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); + + if (gtHi < ltLo) continue; + + n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); + m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush ( lo, n ); + fpush ( m, hi ); + } else { + fpush ( m, hi ); + fpush ( lo, n ); + } + } +} + +#undef fmin +#undef fpush +#undef fpop +#undef fswap +#undef fvswap +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + eclass exists for [0 .. nblock-1] + ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block + All other areas of eclass destroyed + fmap [0 .. nblock-1] holds sorted order + bhtab [ 0 .. 2+(nblock/32) ] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort ( UInt32* fmap, + UInt32* eclass, + UInt32* bhtab, + Int32 nblock, + Int32 verb ) +{ + Int32 ftab[257]; + Int32 ftabCopy[256]; + Int32 H, i, j, k, l, r, cc, cc1; + Int32 nNotDone; + Int32 nBhtab; + UInt16* eclass16 = (UInt16*)eclass; + + /*-- + Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + if (verb >= 4) + VPrintf0 ( " bucket sorting ...\n" ); + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + for (i = 0; i < nblock; i++) { + j = eclass16[i] >> 8; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + (nblock / 32); + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /*-- + Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + + if (verb >= 4) + VPrintf1 ( " depth %6d has ", H ); + + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) j = i; + k = fmap[i] - H; if (k < 0) k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3 ( fmap, eclass, l, r ); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { SET_BH(i); cc = cc1; }; + } + } + } + + if (verb >= 4) + VPrintf1 ( "%6d unresolved strings\n", nNotDone ); + + H *= 2; + if (H > nblock || nNotDone == 0) break; + } + + /*-- + Reconstruct the original block in + eclass16 [0 .. nblock-1] [15:8], since the + previous phase destroyed it. + --*/ + if (verb >= 4) + VPrintf0 ( " reconstructing block ...\n" ); + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) j++; + ftabCopy[j]--; + eclass16[fmap[i]] = j << 8; + } + AssertH ( j < 256, 1005 ); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +__inline__ +Bool mainGtU ( UInt32 i1, + UInt32 i2, + UInt16* block, + UInt16* quadrant, + UInt32 nblock, + Int32* budget ) +{ + Int32 k; + UInt16 s1, s2; + + AssertD ( i1 != i2, "mainGtU" ); + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + k = nblock + 8; + + do { + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + s1 = block[i1]; s2 = block[i2]; + if (s1 != s2) return (s1 > s2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1 += 2; i2 += 2; + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + k -= 8; + (*budget)--; + } + while (k >= 0); + + return False; +} + + +/*---------------------------------------------*/ +/*-- + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. +--*/ +Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 }; + +static +void mainSimpleSort ( UInt32* ptr, + UInt16* block, + UInt16* quadrant, + Int32 nblock, + Int32 lo, + Int32 hi, + Int32 d, + Int32* budget ) +{ + Int32 i, j, h, bigN, hp; + UInt32 v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (True) { + + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/*-- + The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + +#define mswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define mvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + mswap(ptr[yyp1], ptr[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + + +static +__inline__ +UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) +{ + UInt16 t; + if (a > b) { t = a; a = b; b = t; }; + if (b > c) { t = b; b = c; c = t; }; + if (a > b) b = a; + return b; +} + + +#define mmin(a,b) ((a) < (b)) ? (a) : (b) + +#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; } + +#define mpop(lz,hz,dz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; } + + +#define mnextsize(az) (nextHi[az]-nextLo[az]) + +#define mnextswap(az,bz) \ + { Int32 tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } + + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static +void mainQSort3 ( UInt32* ptr, + UInt16* block, + UInt16* quadrant, + Int32 nblock, + Int32 loSt, + Int32 hiSt, + Int32 dSt, + Int32* budget ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m, med; + Int32 sp, lo, hi, d; + + Int32 stackLo[MAIN_QSORT_STACK_SIZE]; + Int32 stackHi[MAIN_QSORT_STACK_SIZE]; + Int32 stackD [MAIN_QSORT_STACK_SIZE]; + + Int32 nextLo[3]; + Int32 nextHi[3]; + Int32 nextD [3]; + + sp = 0; + mpush ( loSt, hiSt, dSt ); + + while (sp > 0) { + + AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); + + mpop ( lo, hi, d ); + if (hi - lo < MAIN_QSORT_SMALL_THRESH || + d > MAIN_QSORT_DEPTH_THRESH) { + mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); + if (*budget < 0) return; + continue; + } + + med = (Int32) + mmed3 ( block[ptr[ lo ]+d], + block[ptr[ hi ]+d], + block[ptr[ (lo+hi)>>1 ]+d] ); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (True) { + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; unLo++; continue; + }; + if (n > 0) break; + unLo++; + } + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; unHi--; continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "mainQSort3(2)" ); + + if (gtHi < ltLo) { + mpush(lo, hi, d+2 ); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); + + mpush (nextLo[0], nextHi[0], nextD[0]); + mpush (nextLo[1], nextHi[1], nextD[1]); + mpush (nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mswap +#undef mvswap +#undef mpush +#undef mpop +#undef mmin +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > N_OVERSHOOT + block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block + All other areas of block32 destroyed + ftab [0 .. 65536 ] destroyed + ptr [0 .. nblock-1] holds sorted order + if (*budget < 0), sorting was abandoned +*/ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static +void mainSort ( UInt32* ptr, + UInt16* block, + UInt16* quadrant, + UInt32* ftab, + Int32 nblock, + Int32 verb, + Int32* budget ) +{ + Int32 i, j, k, m, ss, sb; + Int32 runningOrder[256]; + Int32 copy[256]; + Bool bigDone[256]; + UChar c1; + Int32 numQSorted = 0; + UInt16 s; + + if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); + + /*-- Stripe the block data into 16 bits, and at the + same time set up the 2-byte frequency table + --*/ + for (i = 65536; i >= 0; i--) ftab[i] = 0; + + s = block[0]; + for (i = 1; i < nblock; i++) { + quadrant[i] = 0; + s = (s << 8) | block[i]; + block[i-1] = s; + ftab[s]++; + } + quadrant[0] = 0; + s = (s << 8) | (block[0] >> 8); + block[nblock-1] = s; + ftab[s]++; + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); + + /*-- Complete the initial radix sort --*/ + for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; + + for (i = 0; i < nblock; i++) { + s = block[i]; + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + } + + /*-- + Now ftab contains the first loc of every small bucket. + Calculate the running order, from smallest to largest + big bucket. + --*/ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + Int32 vv; + Int32 h = 1; + do h = 3 * h + 1; while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /*-- + The main sorting loop. + --*/ + + for (i = 0; i <= 255; i++) { + + /*-- + Process big buckets, starting with the least full. + Basically this is a 4-step process in which we call + mainQSort3 to sort the small buckets [ss, j], but + also make a big effort to avoid the calls if we can. + --*/ + ss = runningOrder[i]; + + /*-- + Step 1: + Complete the big bucket [ss] by quicksorting + any unsorted small buckets [ss, j], for j != ss. + Hopefully previous pointer-scanning phases have already + completed many of the small buckets [ss, j], so + we don't have to sort them at all. + --*/ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if ( ! (ftab[sb] & SETMASK) ) { + Int32 lo = ftab[sb] & CLEARMASK; + Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + if (verb >= 4) + VPrintf4 ( " qsort [0x%x, 0x%x] " + "done %d this %d\n", + ss, j, numQSorted, hi - lo + 1 ); + mainQSort3 ( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + numQSorted += (hi - lo + 1); + if (*budget < 0) return; + } + } + ftab[sb] |= SETMASK; + } + } + + /*-- + Step 2: + Deal specially with case [ss, ss]. This establishes the + sorted order for [ss, ss] without any comparisons. + A clever trick, cryptically described as steps Q6b and Q6c + in SRC-124 (aka BW94). Compared to bzip2, this makes it + practical not to use a preliminary run-length coder. + --*/ + { + Int32 put0, get0, put1, get1; + Int32 sbn = (ss << 8) + ss; + Int32 lo = ftab[sbn] & CLEARMASK; + Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; + UChar ssc = (UChar)ss; + put0 = lo; + get0 = ftab[ss << 8] & CLEARMASK; + put1 = hi; + get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; + while (get0 < put0) { + j = ptr[get0]-1; if (j < 0) j += nblock; + c1 = (UChar)(block[j] >> 8); + if (c1 == ssc) { ptr[put0] = j; put0++; }; + get0++; + } + while (get1 > put1) { + j = ptr[get1]-1; if (j < 0) j += nblock; + c1 = (UChar)(block[j] >> 8); + if (c1 == ssc) { ptr[put1] = j; put1--; }; + get1--; + } + ftab[sbn] |= SETMASK; + } + + /*-- + Step 3: + The [ss] big bucket is now done. Record this fact, + and update the quadrant descriptors. Remember to + update quadrants in the overshoot area too, if + necessary. The "if (i < 255)" test merely skips + this updating for the last bucket processed, since + updating for the last bucket is pointless. + + The quadrant array provides a way to incrementally + cache sort orderings, as they appear, so as to + make subsequent comparisons in fullGtU() complete + faster. For repetitive blocks this makes a big + difference (but not big enough to be able to avoid + the fallback sorting mechanism, exponential radix sort). + + The precise meaning is: at all times: + + for 0 <= i < nblock and 0 <= j <= nblock + + if block[i] != block[j], + + then the relative values of quadrant[i] and + quadrant[j] are meaningless. + + else { + if quadrant[i] < quadrant[j] + then the string starting at i lexicographically + precedes the string starting at j + + else if quadrant[i] > quadrant[j] + then the string starting at j lexicographically + precedes the string starting at i + + else + the relative ordering of the strings starting + at i and j has not yet been determined. + } + --*/ + bigDone[ss] = True; + + if (i < 255) { + Int32 bbStart = ftab[ss << 8] & CLEARMASK; + Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + Int32 shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = 0; j < bbSize; j++) { + Int32 a2update = ptr[bbStart + j]; + UInt16 qVal = (UInt16)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); + } + + /*-- + Step 4: + Now scan this big bucket [ss] so as to synthesise the + sorted order for small buckets [t, ss] for all t != ss. + This will avoid doing Real Work in subsequent Step 1's. + --*/ + for (j = 0; j <= 255; j++) + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + + m = ftab[(ss+1) << 8] & CLEARMASK; + for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) { + k = ptr[j] - 1; if (k < 0) k += nblock; + c1 = (UChar)(block[k] >> 8); + if ( ! bigDone[c1] ) { + ptr[copy[c1]] = k; + copy[c1] ++; + } + } + + for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; + } + + if (verb >= 4) + VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", + nblock, numQSorted, nblock - numQSorted ); +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block + arr1 exists for [0 .. nblock-1] + + Post: + ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block + All other areas of block destroyed + ftab [ 0 .. 65536 ] destroyed + arr1 [0 .. nblock-1] holds sorted order +*/ +void blockSort ( EState* s ) +{ + UInt32* ptr = s->ptr; + UInt16* block = s->block; + UInt32* ftab = s->ftab; + Int32 nblock = s->nblock; + Int32 verb = s->verbosity; + Int32 wfact = s->workFactor; + UInt16* quadrant; + Int32 budget; + Int32 budgetInit; + Int32 i; + + if (nblock < 10000) { + for (i = 0; i < nblock; i++) block[i] <<= 8; + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } else { + quadrant = &(block[nblock+BZ_N_OVERSHOOT]); + + /* (wfact-1) / 3 puts the default-factor-30 + transition point at very roughly the same place as + with v0.1 and v0.9.0. + Not that it particularly matters any more, since the + resulting compressed stream is now the same regardless + of whether or not we use the main sort or fallback sort. + */ + if (wfact < 1 ) wfact = 1; + if (wfact > 100) wfact = 100; + budgetInit = nblock * ((wfact-1) / 3); + budget = budgetInit; + + mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); + if (verb >= 3) + VPrintf3 ( " %d work, %d block, ratio %5.2f\n", + budgetInit - budget, + nblock, + (float)(budgetInit - budget) / + (float)(nblock==0 ? 1 : nblock) ); + if (budget < 0) { + if (verb >= 2) + VPrintf0 ( " too repetitive; using fallback" + " sorting algorithm\n" ); + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) + { s->origPtr = i; break; }; + + AssertH( s->origPtr != -1, 1003 ); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/bzip2.c b/zgz/old-bzip2/bzip2.c new file mode 100644 index 0000000..6bf70df --- /dev/null +++ b/zgz/old-bzip2/bzip2.c @@ -0,0 +1,178 @@ +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + (Now heavily hacked to be part of zgz; decompression and + portability ripped out.) + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + +#include +#include +#include +#include +#include +#include +#include +#include "bzlib.h" + +#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } +#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } +#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); } + +#include +#include +#include +#include +#include + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +/*-- + IntNative is your platform's `native' int size. + Only here to avoid probs with 64-bit platforms. +--*/ +typedef int IntNative; + +Int32 verbosity; +Int32 blockSize100k; +Int32 workFactor; + +Bool myfeof ( FILE* f ) +{ + Int32 c = fgetc ( f ); + if (c == EOF) return True; + ungetc ( c, f ); + return False; +} + +void panic (char *msg) { + perror("oops"); + fprintf(stderr, "%s\n", msg); + exit(1); +} + +void compressStream ( FILE *stream, FILE *zStream ) +{ + BZFILE* bzf = NULL; + UChar ibuf[5000]; + Int32 nIbuf; + UInt32 nbytes_in, nbytes_out; + Int32 bzerr, bzerr_dummy, ret; + + if (ferror(stream)) goto errhandler_io; + if (ferror(zStream)) goto errhandler_io; + + bzf = bzWriteOpen ( &bzerr, zStream, + blockSize100k, verbosity, workFactor ); + if (bzerr != BZ_OK) goto errhandler; + + if (verbosity >= 2) fprintf ( stderr, "\n" ); + + while (True) { + + if (myfeof(stream)) break; + nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream ); + if (ferror(stream)) goto errhandler_io; + if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); + if (bzerr != BZ_OK) goto errhandler; + + } + + bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out ); + if (bzerr != BZ_OK) goto errhandler; + + if (ferror(zStream)) goto errhandler_io; + ret = fflush ( zStream ); + if (ret == EOF) goto errhandler_io; + if (zStream != stdout) { + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; + } + if (ferror(stream)) goto errhandler_io; + ret = fclose ( stream ); + if (ret == EOF) goto errhandler_io; + + if (nbytes_in == 0) nbytes_in = 1; + + return; + + errhandler: + bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out ); + switch (bzerr) { + case BZ_MEM_ERROR: + panic ( "out of memory" ); + case BZ_IO_ERROR: + errhandler_io: + panic ( "io error" ); + default: + panic ( "compress:unexpected error" ); + } + + panic ( "compress:end" ); + /*notreached*/ +} + +void old_bzip2(int level) { + workFactor = 30; + blockSize100k = level; + + compressStream(stdin, stdout); +} diff --git a/zgz/old-bzip2/bzlib.c b/zgz/old-bzip2/bzlib.c new file mode 100644 index 0000000..adee3ae --- /dev/null +++ b/zgz/old-bzip2/bzlib.c @@ -0,0 +1,915 @@ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + +/*-- + CHANGES + ~~~~~~~ + 0.9.0 -- original version. + + 0.9.0a/b -- no changes in this file. + + 0.9.0c + * made zero-length BZ_FLUSH work correctly in bzCompress(). + * fixed bzWrite/bzRead to ignore zero-length requests. + * fixed bzread to correctly handle read requests after EOF. + * wrong parameter order in call to bzDecompressInit in + bzBuffToBuffDecompress. Fixed. +--*/ + +#include "bzlib_private.h" + +void bz__AssertH__fail ( int errcode ) { + fprintf(stderr, "bzip2 compressor internal error\n"); + exit(1); +} + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) +{ + void* v = malloc ( items * size ); + return v; +} + +static +void default_bzfree ( void* opaque, void* addr ) +{ + if (addr != NULL) free ( addr ); +} + + +/*---------------------------------------------------*/ +static +void prepare_new_block ( EState* s ) +{ + Int32 i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC ( s->blockCRC ); + for (i = 0; i < 256; i++) s->inUse[i] = False; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +void init_RL ( EState* s ) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +Bool isempty_RL ( EState* s ) +{ + if (s->state_in_ch < 256 && s->state_in_len > 0) + return False; else + return True; +} + + +/*---------------------------------------------------*/ +int BZ_API(bzCompressInit) + ( bz_stream* strm, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 n; + EState* s; + + if (strm == NULL || + blockSize100k < 1 || blockSize100k > 9 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; + if (strm->bzfree == NULL) strm->bzfree = default_bzfree; + + s = BZALLOC( sizeof(EState) ); + if (s == NULL) return BZ_MEM_ERROR; + s->strm = strm; + + s->arr1 = NULL; + s->arr2 = NULL; + s->ftab = NULL; + + n = 100000 * blockSize100k; + s->arr1 = BZALLOC( n * sizeof(UInt32) ); + s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); + s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); + + if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + if (s != NULL) BZFREE(s); + return BZ_MEM_ERROR; + } + + s->blockNo = 0; + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->combinedCRC = 0; + s->blockSize100k = blockSize100k; + s->nblockMAX = 100000 * blockSize100k - 19; + s->verbosity = verbosity; + s->workFactor = workFactor; + + s->block = (UInt16*)s->arr2; + s->mtfv = (UInt16*)s->arr1; + s->zbits = NULL; + s->ptr = (UInt32*)s->arr1; + + strm->state = s; + strm->total_in = 0; + strm->total_out = 0; + init_RL ( s ); + prepare_new_block ( s ); + return BZ_OK; +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block ( EState* s ) +{ + Int32 i; + UChar ch = (UChar)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC( s->blockCRC, ch ); + } + s->inUse[s->state_in_ch] = True; + switch (s->state_in_len) { + case 1: + s->block[s->nblock] = (UInt16)ch; s->nblock++; + break; + case 2: + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + break; + case 3: + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len-4] = True; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = ((UInt16)(s->state_in_len-4)); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL ( EState* s ) +{ + if (s->state_in_ch < 256) add_pair_to_block ( s ); + init_RL ( s ); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ +{ \ + UInt32 zchh = (UInt32)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && \ + zs->state_in_len == 1) { \ + UChar ch = (UChar)(zs->state_in_ch); \ + BZ_UPDATE_CRC( zs->blockCRC, ch ); \ + zs->inUse[zs->state_in_ch] = True; \ + zs->block[zs->nblock] = (UInt16)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || \ + zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block ( zs ); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +Bool copy_input_until_stop ( EState* s ) +{ + Bool progress_in = False; + + if (s->mode == BZ_M_RUNNING) { + + /*-- fast track the common case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in++; + } + + } else { + + /*-- general, uncommon case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- flush/finish end? --*/ + if (s->avail_in_expect == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in++; + s->avail_in_expect--; + } + } + return progress_in; +} + + +/*---------------------------------------------------*/ +static +Bool copy_output_until_stop ( EState* s ) +{ + Bool progress_out = False; + + while (True) { + + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + progress_out = True; + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out++; + + } + + return progress_out; +} + + +/*---------------------------------------------------*/ +static +Bool handle_compress ( bz_stream* strm ) +{ + Bool progress_in = False; + Bool progress_out = False; + EState* s = strm->state; + + while (True) { + + if (s->state == BZ_S_OUTPUT) { + progress_out |= copy_output_until_stop ( s ); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + prepare_new_block ( s ); + s->state = BZ_S_INPUT; + if (s->mode == BZ_M_FLUSHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + } + + if (s->state == BZ_S_INPUT) { + progress_in |= copy_input_until_stop ( s ); + if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + flush_RL ( s ); + compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); + s->state = BZ_S_OUTPUT; + } + else + if (s->nblock >= s->nblockMAX) { + compressBlock ( s, False ); + s->state = BZ_S_OUTPUT; + } + else + if (s->strm->avail_in == 0) { + break; + } + } + + } + + return progress_in || progress_out; +} + + +/*---------------------------------------------------*/ +int BZ_API(bzCompress) ( bz_stream *strm, int action ) +{ + Bool progress; + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + preswitch: + switch (s->mode) { + + case BZ_M_IDLE: + return BZ_SEQUENCE_ERROR; + + case BZ_M_RUNNING: + if (action == BZ_RUN) { + progress = handle_compress ( strm ); + return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; + } + else + if (action == BZ_FLUSH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto preswitch; + } + else + if (action == BZ_FINISH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto preswitch; + } + else + return BZ_PARAM_ERROR; + + case BZ_M_FLUSHING: + if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; + + case BZ_M_FINISHING: + if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (!progress) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FINISH_OK; + s->mode = BZ_M_IDLE; + return BZ_STREAM_END; + } + return BZ_OK; /*--not reached--*/ +} + + +/*---------------------------------------------------*/ +int BZ_API(bzCompressEnd) ( bz_stream *strm ) +{ + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + BZFREE(strm->state); + + strm->state = NULL; + + return BZ_OK; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ +/*--- File I/O stuff ---*/ +/*---------------------------------------------------*/ + +#define BZ_SETERR(eee) \ +{ \ + if (bzerror != NULL) *bzerror = eee; \ + if (bzf != NULL) bzf->lastErr = eee; \ +} + +typedef + struct { + FILE* handle; + Char buf[BZ_MAX_UNUSED]; + Int32 bufN; + Bool writing; + bz_stream strm; + Int32 lastErr; + Bool initialisedOk; + } + bzFile; + + +/*---------------------------------------------------*/ +BZFILE* BZ_API(bzWriteOpen) + ( int* bzerror, + FILE* f, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 ret; + bzFile* bzf = NULL; + + BZ_SETERR(BZ_OK); + + if (f == NULL || + (blockSize100k < 1 || blockSize100k > 9) || + (workFactor < 0 || workFactor > 250) || + (verbosity < 0 || verbosity > 4)) + { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; + + if (ferror(f)) + { BZ_SETERR(BZ_IO_ERROR); return NULL; }; + + bzf = malloc ( sizeof(bzFile) ); + if (bzf == NULL) + { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; + + BZ_SETERR(BZ_OK); + bzf->initialisedOk = False; + bzf->bufN = 0; + bzf->handle = f; + bzf->writing = True; + bzf->strm.bzalloc = NULL; + bzf->strm.bzfree = NULL; + bzf->strm.opaque = NULL; + + if (workFactor == 0) workFactor = 30; + ret = bzCompressInit ( &(bzf->strm), blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) + { BZ_SETERR(ret); free(bzf); return NULL; }; + + bzf->strm.avail_in = 0; + bzf->initialisedOk = True; + return bzf; +} + + + +/*---------------------------------------------------*/ +void BZ_API(bzWrite) + ( int* bzerror, + BZFILE* b, + void* buf, + int len ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + if (bzf == NULL || buf == NULL || len < 0) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (len == 0) + { BZ_SETERR(BZ_OK); return; }; + + bzf->strm.avail_in = len; + bzf->strm.next_in = buf; + + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = bzCompress ( &(bzf->strm), BZ_RUN ); + if (ret != BZ_RUN_OK) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (bzf->strm.avail_in == 0) + { BZ_SETERR(BZ_OK); return; }; + } +} + + +/*---------------------------------------------------*/ +void BZ_API(bzWriteClose) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + if (bzf == NULL) + { BZ_SETERR(BZ_OK); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (nbytes_in != NULL) *nbytes_in = 0; + if (nbytes_out != NULL) *nbytes_out = 0; + + if ((!abandon) && bzf->lastErr == BZ_OK) { + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = bzCompress ( &(bzf->strm), BZ_FINISH ); + if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (ret == BZ_STREAM_END) break; + } + } + + if ( !abandon && !ferror ( bzf->handle ) ) { + fflush ( bzf->handle ); + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (nbytes_in != NULL) *nbytes_in = bzf->strm.total_in; + if (nbytes_out != NULL) *nbytes_out = bzf->strm.total_out; + + BZ_SETERR(BZ_OK); + bzCompressEnd ( &(bzf->strm) ); + free ( bzf ); +} + + +/*---------------------------------------------------*/ +void BZ_API(bzReadGetUnused) + ( int* bzerror, + BZFILE* b, + void** unused, + int* nUnused ) +{ + bzFile* bzf = (bzFile*)b; + if (bzf == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (bzf->lastErr != BZ_STREAM_END) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (unused == NULL || nUnused == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + + BZ_SETERR(BZ_OK); + *nUnused = bzf->strm.avail_in; + *unused = bzf->strm.next_in; +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +int BZ_API(bzBuffToBuffCompress) + ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor ) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + blockSize100k < 1 || blockSize100k > 9 || + verbosity < 0 || verbosity > 4 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + strm.bzalloc = NULL; + strm.bzfree = NULL; + strm.opaque = NULL; + ret = bzCompressInit ( &strm, blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) return ret; + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = bzCompress ( &strm, BZ_FINISH ); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + bzCompressEnd ( &strm ); + return BZ_OK; + + output_overflow: + bzCompressEnd ( &strm ); + return BZ_OUTBUFF_FULL; + + errhandler: + bzCompressEnd ( &strm ); + return ret; +} + + +/*---------------------------------------------------*/ +/*-- + Code contributed by Yoshioka Tsuneo + (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp), + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +/*-- + return version like "0.9.0c". +--*/ +const char * BZ_API(bzlibVersion)(void) +{ + return BZ_VERSION; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ + +#if defined(_WIN32) || defined(OS2) || defined(MSDOS) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif +static +BZFILE * bzopen_or_bzdopen + ( const char *path, /* no use when bzdopen */ + int fd, /* no use when bzdopen */ + const char *mode, + int open_mode) /* bzopen: 0, bzdopen:1 */ +{ + int bzerr; + int blockSize100k = 9; + int writing = 0; + char mode2[10] = ""; + FILE *fp = NULL; + BZFILE *bzfp = NULL; + int verbosity = 0; + int workFactor = 30; + + if (mode == NULL) return NULL; + while (*mode) { + switch (*mode) { + case 'r': + writing = 0; break; + case 'w': + writing = 1; break; + case 's': + //smallMode = 1; + break; + default: + if (isdigit((int)(*mode))) { + blockSize100k = *mode-'0'; + } + } + mode++; + } + strcat(mode2, writing ? "w" : "r" ); + strcat(mode2,"b"); /* binary mode */ + + if (open_mode==0) { + if (path==NULL || strcmp(path,"")==0) { + fp = (writing ? stdout : stdin); + SET_BINARY_MODE(fp); + } else { + fp = fopen(path,mode2); + } + } else { +#ifdef BZ_STRICT_ANSI + fp = NULL; +#else + fp = fdopen(fd,mode2); +#endif + } + if (fp == NULL) return NULL; + + if (writing) { + /* Guard against total chaos and anarchy -- JRS */ + if (blockSize100k < 1) blockSize100k = 1; + if (blockSize100k > 9) blockSize100k = 9; + bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor); + } + if (bzfp == NULL) { + if (fp != stdin && fp != stdout) fclose(fp); + return NULL; + } + return bzfp; +} + + +/*---------------------------------------------------*/ +/*-- + open file for read or write. + ex) bzopen("file","w9") + case path="" or NULL => use stdin or stdout. +--*/ +BZFILE * BZ_API(bzopen) + ( const char *path, + const char *mode ) +{ + return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); +} + + +/*---------------------------------------------------*/ +BZFILE * BZ_API(bzdopen) + ( int fd, + const char *mode ) +{ + return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); +} + + +/*---------------------------------------------------*/ +int BZ_API(bzwrite) (BZFILE* b, void* buf, int len ) +{ + int bzerr; + + bzWrite(&bzerr,b,buf,len); + if(bzerr == BZ_OK){ + return len; + }else{ + return -1; + } +} + + +/*---------------------------------------------------*/ +int BZ_API(bzflush) (BZFILE *b) +{ + /* do nothing now... */ + return 0; +} + + +/*---------------------------------------------------*/ +void BZ_API(bzclose) (BZFILE* b) +{ + int bzerr; + FILE *fp = ((bzFile *)b)->handle; + + if (b==NULL) {return;} + if(((bzFile*)b)->writing){ + bzWriteClose(&bzerr,b,0,NULL,NULL); + if(bzerr != BZ_OK){ + bzWriteClose(NULL,b,1,NULL,NULL); + } + } + if(fp!=stdin && fp!=stdout){ + fclose(fp); + } +} + + +/*---------------------------------------------------*/ +/*-- + return last error code +--*/ +static char *bzerrorstrings[] = { + "OK" + ,"SEQUENCE_ERROR" + ,"PARAM_ERROR" + ,"MEM_ERROR" + ,"DATA_ERROR" + ,"DATA_ERROR_MAGIC" + ,"IO_ERROR" + ,"UNEXPECTED_EOF" + ,"OUTBUFF_FULL" + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ +}; + + +const char * BZ_API(bzerror) (BZFILE *b, int *errnum) +{ + int err = ((bzFile *)b)->lastErr; + + if(err>0) err = 0; + *errnum = err; + return bzerrorstrings[err*-1]; +} +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/bzlib.h b/zgz/old-bzip2/bzlib.h new file mode 100644 index 0000000..203adca --- /dev/null +++ b/zgz/old-bzip2/bzlib.h @@ -0,0 +1,309 @@ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* This has been heavily hacked to be part of zgz. Decompression + * support has been ripped out. */ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#ifndef _BZLIB_H +#define _BZLIB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) + +typedef + struct { + char *next_in; + unsigned int avail_in; + unsigned int total_in; + + char *next_out; + unsigned int avail_out; + unsigned int total_out; + + void *state; + + void *(*bzalloc)(void *,int,int); + void (*bzfree)(void *,void *); + void *opaque; + } + bz_stream; + + +#ifndef BZ_IMPORT +#define BZ_EXPORT +#endif + +#ifdef _WIN32 +# include +# include +# ifdef small + /* windows.h define small to char */ +# undef small +# endif +# ifdef BZ_EXPORT +# define BZ_API(func) WINAPI func +# define BZ_EXTERN extern +# else + /* import windows dll dynamically */ +# define BZ_API(func) (WINAPI * func) +# define BZ_EXTERN +# endif +#else +# define BZ_API(func) func +# define BZ_EXTERN extern +#endif + + +/*-- Core (low-level) library functions --*/ + +BZ_EXTERN int BZ_API(bzCompressInit) ( + bz_stream* strm, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN int BZ_API(bzCompress) ( + bz_stream* strm, + int action + ); + +BZ_EXTERN int BZ_API(bzCompressEnd) ( + bz_stream* strm + ); + +BZ_EXTERN int BZ_API(bzDecompressInit) ( + bz_stream *strm, + int verbosity, + int small + ); + +BZ_EXTERN int BZ_API(bzDecompress) ( + bz_stream* strm + ); + +BZ_EXTERN int BZ_API(bzDecompressEnd) ( + bz_stream *strm + ); + + + +/*-- High(er) level library functions --*/ + +#ifndef BZ_NO_STDIO +#define BZ_MAX_UNUSED 5000 + +typedef void BZFILE; + +BZ_EXTERN BZFILE* BZ_API(bzReadOpen) ( + int* bzerror, + FILE* f, + int verbosity, + int small, + void* unused, + int nUnused + ); + +BZ_EXTERN void BZ_API(bzReadClose) ( + int* bzerror, + BZFILE* b + ); + +BZ_EXTERN void BZ_API(bzReadGetUnused) ( + int* bzerror, + BZFILE* b, + void** unused, + int* nUnused + ); + +BZ_EXTERN int BZ_API(bzRead) ( + int* bzerror, + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) ( + int* bzerror, + FILE* f, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN void BZ_API(bzWrite) ( + int* bzerror, + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN void BZ_API(bzWriteClose) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out + ); +#endif + + +/*-- Utility functions --*/ + +BZ_EXTERN int BZ_API(bzBuffToBuffCompress) ( + char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor + ); + +BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) ( + char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity + ); + + +/*-- + Code contributed by Yoshioka Tsuneo + (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp), + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ + +BZ_EXTERN const char * BZ_API(bzlibVersion) ( + void + ); + +#ifndef BZ_NO_STDIO +BZ_EXTERN BZFILE * BZ_API(bzopen) ( + const char *path, + const char *mode + ); + +BZ_EXTERN BZFILE * BZ_API(bzdopen) ( + int fd, + const char *mode + ); + +BZ_EXTERN int BZ_API(bzread) ( + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN int BZ_API(bzwrite) ( + BZFILE* b, + void* buf, + int len + ); + +BZ_EXTERN int BZ_API(bzflush) ( + BZFILE* b + ); + +BZ_EXTERN void BZ_API(bzclose) ( + BZFILE* b + ); + +BZ_EXTERN const char * BZ_API(bzerror) ( + BZFILE *b, + int *errnum + ); +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/bzlib_private.h b/zgz/old-bzip2/bzlib_private.h new file mode 100644 index 0000000..8e93480 --- /dev/null +++ b/zgz/old-bzip2/bzlib_private.h @@ -0,0 +1,528 @@ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#ifndef _BZLIB_PRIVATE_H +#define _BZLIB_PRIVATE_H + +#include + +#ifndef BZ_NO_STDIO +#include +#include +#include +#endif + +#include "bzlib.h" + + + +/*-- General stuff. --*/ + +#define BZ_VERSION "0.9.5d" + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +#ifndef __GNUC__ +#define __inline__ /* */ +#endif + +#ifndef BZ_NO_STDIO +extern void bz__AssertH__fail ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) bz__AssertH__fail ( errcode ); } +#if BZ_DEBUG +#define AssertD(cond,msg) \ + { if (!(cond)) { \ + fprintf ( stderr, \ + "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ + exit(1); \ + }} +#else +#define AssertD(cond,msg) /* */ +#endif +#define VPrintf0(zf) \ + fprintf(stderr,zf) +#define VPrintf1(zf,za1) \ + fprintf(stderr,zf,za1) +#define VPrintf2(zf,za1,za2) \ + fprintf(stderr,zf,za1,za2) +#define VPrintf3(zf,za1,za2,za3) \ + fprintf(stderr,zf,za1,za2,za3) +#define VPrintf4(zf,za1,za2,za3,za4) \ + fprintf(stderr,zf,za1,za2,za3,za4) +#define VPrintf5(zf,za1,za2,za3,za4,za5) \ + fprintf(stderr,zf,za1,za2,za3,za4,za5) +#else +extern void bz_internal_error ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) bz_internal_error ( errcode ); } +#define AssertD(cond,msg) /* */ +#define VPrintf0(zf) /* */ +#define VPrintf1(zf,za1) /* */ +#define VPrintf2(zf,za1,za2) /* */ +#define VPrintf3(zf,za1,za2,za3) /* */ +#define VPrintf4(zf,za1,za2,za3,za4) /* */ +#define VPrintf5(zf,za1,za2,za3,za4,za5) /* */ +#endif + + +#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) +#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) + + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + + +/*-- Stuff for randomising repetitive blocks. --*/ + +extern Int32 rNums[512]; + +#define BZ_RAND_DECLS \ + Int32 rNToGo; \ + Int32 rTPos \ + +#define BZ_RAND_INIT_MASK \ + s->rNToGo = 0; \ + s->rTPos = 0 \ + +#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) + +#define BZ_RAND_UPD_MASK \ + if (s->rNToGo == 0) { \ + s->rNToGo = rNums[s->rTPos]; \ + s->rTPos++; \ + if (s->rTPos == 512) s->rTPos = 0; \ + } \ + s->rNToGo--; + + + +/*-- Stuff for doing CRCs. --*/ + +extern UInt32 crc32Table[256]; + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(crcVar,cha) \ +{ \ + crcVar = (crcVar << 8) ^ \ + crc32Table[(crcVar >> 24) ^ \ + ((UChar)cha)]; \ +} + + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + Int32 mode; + Int32 state; + + /* remembers avail_in when flush/finish requested */ + UInt32 avail_in_expect; + + /* for doing the block sorting */ + UInt32* arr1; + UInt32* arr2; + UInt32* ftab; + Int32 origPtr; + + /* aliases for arr1 and arr2 */ + UInt32* ptr; + UInt16* block; + UInt16* mtfv; + UChar* zbits; + + /* for deciding when to use the fallback sorting algorithm */ + Int32 workFactor; + + /* run-length-encoding of the input */ + UInt32 state_in_ch; + Int32 state_in_len; + BZ_RAND_DECLS; + + /* input and output limits and current posns */ + Int32 nblock; + Int32 nblockMAX; + Int32 numZ; + Int32 state_out_pos; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + UChar unseqToSeq[256]; + + /* the buffer for bit stream creation */ + UInt32 bsBuff; + Int32 bsLive; + + /* block and combined CRCs */ + UInt32 blockCRC; + UInt32 combinedCRC; + + /* misc administratium */ + Int32 verbosity; + Int32 blockNo; + Int32 blockSize100k; + + /* stuff for coding the MTF values */ + Int32 nMTF; + Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + } + EState; + + + +/*-- externs for compression. --*/ + +extern void +blockSort ( EState* ); + +extern void +compressBlock ( EState*, Bool ); + +extern void +bsInitWrite ( EState* ); + +extern void +hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); + +extern void +hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); + + + +/*-- states for decompression. --*/ + +#define BZ_X_IDLE 1 +#define BZ_X_OUTPUT 2 + +#define BZ_X_MAGIC_1 10 +#define BZ_X_MAGIC_2 11 +#define BZ_X_MAGIC_3 12 +#define BZ_X_MAGIC_4 13 +#define BZ_X_BLKHDR_1 14 +#define BZ_X_BLKHDR_2 15 +#define BZ_X_BLKHDR_3 16 +#define BZ_X_BLKHDR_4 17 +#define BZ_X_BLKHDR_5 18 +#define BZ_X_BLKHDR_6 19 +#define BZ_X_BCRC_1 20 +#define BZ_X_BCRC_2 21 +#define BZ_X_BCRC_3 22 +#define BZ_X_BCRC_4 23 +#define BZ_X_RANDBIT 24 +#define BZ_X_ORIGPTR_1 25 +#define BZ_X_ORIGPTR_2 26 +#define BZ_X_ORIGPTR_3 27 +#define BZ_X_MAPPING_1 28 +#define BZ_X_MAPPING_2 29 +#define BZ_X_SELECTOR_1 30 +#define BZ_X_SELECTOR_2 31 +#define BZ_X_SELECTOR_3 32 +#define BZ_X_CODING_1 33 +#define BZ_X_CODING_2 34 +#define BZ_X_CODING_3 35 +#define BZ_X_MTF_1 36 +#define BZ_X_MTF_2 37 +#define BZ_X_MTF_3 38 +#define BZ_X_MTF_4 39 +#define BZ_X_MTF_5 40 +#define BZ_X_MTF_6 41 +#define BZ_X_ENDHDR_2 42 +#define BZ_X_ENDHDR_3 43 +#define BZ_X_ENDHDR_4 44 +#define BZ_X_ENDHDR_5 45 +#define BZ_X_ENDHDR_6 46 +#define BZ_X_CCRC_1 47 +#define BZ_X_CCRC_2 48 +#define BZ_X_CCRC_3 49 +#define BZ_X_CCRC_4 50 + + + +/*-- Constants for the fast MTF decoder. --*/ + +#define MTFA_SIZE 4096 +#define MTFL_SIZE 16 + + + +/*-- Structure holding all the decompression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* state indicator for this stream */ + Int32 state; + + /* for doing the final run-length decoding */ + UChar state_out_ch; + Int32 state_out_len; + Bool blockRandomised; + BZ_RAND_DECLS; + + /* the buffer for bit stream reading */ + UInt32 bsBuff; + Int32 bsLive; + + /* misc administratium */ + Int32 blockSize100k; + Bool smallDecompress; + Int32 currBlockNo; + Int32 verbosity; + + /* for undoing the Burrows-Wheeler transform */ + Int32 origPtr; + UInt32 tPos; + Int32 k0; + Int32 unzftab[256]; + Int32 nblock_used; + Int32 cftab[257]; + Int32 cftabCopy[257]; + + /* for undoing the Burrows-Wheeler transform (FAST) */ + UInt32 *tt; + + /* for undoing the Burrows-Wheeler transform (SMALL) */ + UInt16 *ll16; + UChar *ll4; + + /* stored and calculated CRCs */ + UInt32 storedBlockCRC; + UInt32 storedCombinedCRC; + UInt32 calculatedBlockCRC; + UInt32 calculatedCombinedCRC; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + Bool inUse16[16]; + UChar seqToUnseq[256]; + + /* for decoding the MTF values */ + UChar mtfa [MTFA_SIZE]; + Int32 mtfbase[256 / MTFL_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 minLens[BZ_N_GROUPS]; + + /* save area for scalars in the main decompress code */ + Int32 save_i; + Int32 save_j; + Int32 save_t; + Int32 save_alphaSize; + Int32 save_nGroups; + Int32 save_nSelectors; + Int32 save_EOB; + Int32 save_groupNo; + Int32 save_groupPos; + Int32 save_nextSym; + Int32 save_nblockMAX; + Int32 save_nblock; + Int32 save_es; + Int32 save_N; + Int32 save_curr; + Int32 save_zt; + Int32 save_zn; + Int32 save_zvec; + Int32 save_zj; + Int32 save_gSel; + Int32 save_gMinlen; + Int32* save_gLimit; + Int32* save_gBase; + Int32* save_gPerm; + + } + DState; + + + +/*-- Macros for decompression. --*/ + +#define BZ_GET_FAST(cccc) \ + s->tPos = s->tt[s->tPos]; \ + cccc = (UChar)(s->tPos & 0xff); \ + s->tPos >>= 8; + +#define BZ_GET_FAST_C(cccc) \ + c_tPos = c_tt[c_tPos]; \ + cccc = (UChar)(c_tPos & 0xff); \ + c_tPos >>= 8; + +#define SET_LL4(i,n) \ + { if (((i) & 0x1) == 0) \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ + } + +#define GET_LL4(i) \ + ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) + +#define SET_LL(i,n) \ + { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ + SET_LL4(i, n >> 16); \ + } + +#define GET_LL(i) \ + (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) + +#define BZ_GET_SMALL(cccc) \ + cccc = indexIntoF ( s->tPos, s->cftab ); \ + s->tPos = GET_LL(s->tPos); + + +/*-- externs for decompression. --*/ + +extern Int32 +indexIntoF ( Int32, Int32* ); + +extern Int32 +decompress ( DState* ); + +extern void +hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, + Int32, Int32, Int32 ); + + +#endif + + +/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ + +#ifdef BZ_NO_STDIO +#ifndef NULL +#define NULL 0 +#endif +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/compress.c b/zgz/old-bzip2/compress.c new file mode 100644 index 0000000..7b192c3 --- /dev/null +++ b/zgz/old-bzip2/compress.c @@ -0,0 +1,627 @@ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + +/*-- + CHANGES + ~~~~~~~ + 0.9.0 -- original version. + + 0.9.0a/b -- no changes in this file. + + 0.9.0c + * changed setting of nGroups in sendMTFValues() so as to + do a bit better on small files +--*/ + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +void bsInitWrite ( EState* s ) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static +void bsFinishWrite ( EState* s ) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +#define bsNEEDW(nz) \ +{ \ + while (s->bsLive >= 8) { \ + s->zbits[s->numZ] \ + = (UChar)(s->bsBuff >> 24); \ + s->numZ++; \ + s->bsBuff <<= 8; \ + s->bsLive -= 8; \ + } \ +} + + +/*---------------------------------------------------*/ +static +void bsW ( EState* s, Int32 n, UInt32 v ) +{ + bsNEEDW ( n ); + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutUInt32 ( EState* s, UInt32 u ) +{ + bsW ( s, 8, (u >> 24) & 0xffL ); + bsW ( s, 8, (u >> 16) & 0xffL ); + bsW ( s, 8, (u >> 8) & 0xffL ); + bsW ( s, 8, u & 0xffL ); +} + + +/*---------------------------------------------------*/ +static +void bsPutUChar ( EState* s, UChar c ) +{ + bsW( s, 8, (UInt32)c ); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e ( EState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +static +void generateMTFValues ( EState* s ) +{ + UChar yy[256]; + Int32 i, j; + UChar tmp; + UChar tmp2; + Int32 zPend; + Int32 wr; + Int32 EOB; + + /* + After sorting (eg, here), + s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, + and + ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8] + holds the original block data. + + The first thing to do is generate the MTF values, + and put them in + ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. + Because there are strictly fewer or equal MTF values + than block values, ptr values in this area are overwritten + with MTF values only when they are no longer needed. + + The final compressed bitstream is generated into the + area starting at + (UChar*) (&((UInt16)s->arr2)[s->nblock]) + + These storage aliases are set up in bzCompressInit(), + except for the last one, which is arranged in + compressBlock(). + */ + UInt32* ptr = s->ptr; + UInt16* block = s->block; + UInt16* mtfv = s->mtfv; + + makeMaps_e ( s ); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; + + for (i = 0; i < s->nblock; i++) { + UChar ll_i; + + AssertD ( wr <= i, "generateMTFValues(1)" ); + j = ptr[i]-1; if (j < 0) j += s->nblock; + ll_i = s->unseqToSeq[block[j] >> 8]; + AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); + + tmp = yy[0]; + if (tmp == ll_i) { + zPend++; + } else { + tmp2 = tmp; + tmp = yy[1]; + yy[1] = tmp2; + j = 1; + while ( ll_i != tmp ) { + j++; + tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + }; + yy[0] = tmp; + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + } + } + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + } + + mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static +void sendMTFValues ( EState* s ) +{ + Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; + Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; + Int32 nGroups, nBytes; + + /*-- + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + is a global since the decoder also needs it. + + Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + are also globals only used in this proc. + Made global to keep stack frame size small. + --*/ + + + UInt16 cost[BZ_N_GROUPS]; + Int32 fave[BZ_N_GROUPS]; + + UInt16* mtfv = s->mtfv; + + if (s->verbosity >= 3) + VPrintf3( " %d in block, %d after MTF & 1-2 coding, " + "%d+2 syms in use\n", + s->nblock, s->nMTF, s->nInUse ); + + alphaSize = s->nInUse+2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH ( s->nMTF > 0, 3001 ); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + Int32 nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs-1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups-nPart) % 2 == 1)) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + if (s->verbosity >= 3) + VPrintf5( " initial group %d, [%d .. %d], " + "has %d syms (%4.1f%%)\n", + nPart, gs, ge, aFreq, + (100.0 * (float)aFreq) / (float)(s->nMTF) ); + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge+1; + remF -= aFreq; + } + } + + /*--- + Iterate up to BZ_N_ITERS times to improve the tables. + ---*/ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + + for (t = 0; t < nGroups; t++) fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + + nSelectors = 0; + totc = 0; + gs = 0; + while (True) { + + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + + /*-- + Calculate the cost of this group as coded + by each of the coding tables. + --*/ + for (t = 0; t < nGroups; t++) cost[t] = 0; + + if (nGroups == 6) { + register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; + cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; + for (i = gs; i <= ge; i++) { + UInt16 icv = mtfv[i]; + cost0 += s->len[0][icv]; + cost1 += s->len[1][icv]; + cost2 += s->len[2][icv]; + cost3 += s->len[3][icv]; + cost4 += s->len[4][icv]; + cost5 += s->len[5][icv]; + } + cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; + cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; + } else { + for (i = gs; i <= ge; i++) { + UInt16 icv = mtfv[i]; + for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; + } + } + + /*-- + Find the coding table which is best for this group, + and record its identity in the selector table. + --*/ + bc = 999999999; bt = -1; + for (t = 0; t < nGroups; t++) + if (cost[t] < bc) { bc = cost[t]; bt = t; }; + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /*-- + Increment the symbol frequencies for the selected table. + --*/ + for (i = gs; i <= ge; i++) + s->rfreq[bt][ mtfv[i] ]++; + + gs = ge+1; + } + if (s->verbosity >= 3) { + VPrintf2 ( " pass %d: size is %d, grp uses are ", + iter+1, totc/8 ); + for (t = 0; t < nGroups; t++) + VPrintf1 ( "%d ", fave[t] ); + VPrintf0 ( "\n" ); + } + + /*-- + Recompute the tables based on the accumulated frequencies. + --*/ + for (t = 0; t < nGroups; t++) + hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), + alphaSize, 20 ); + } + + + AssertH( nGroups < 8, 3002 ); + AssertH( nSelectors < 32768 && + nSelectors <= (2 + (900000 / BZ_G_SIZE)), + 3003 ); + + + /*--- Compute MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + for (i = 0; i < nGroups; i++) pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while ( ll_i != tmp ) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH ( !(maxLen > 20), 3004 ); + AssertH ( !(minLen < 1), 3005 ); + hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), + minLen, maxLen, alphaSize ); + } + + /*--- Transmit the mapping table. ---*/ + { + Bool inUse16[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = False; + for (j = 0; j < 16; j++) + if (s->inUse[i * 16 + j]) inUse16[i] = True; + } + + nBytes = s->numZ; + for (i = 0; i < 16; i++) + if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); + + for (i = 0; i < 16; i++) + if (inUse16[i]) + for (j = 0; j < 16; j++) { + if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); + } + + if (s->verbosity >= 3) + VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW ( s, 3, nGroups ); + bsW ( s, 15, nSelectors ); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); + bsW(s,1,0); + } + if (s->verbosity >= 3) + VPrintf1( "selectors %d, ", s->numZ-nBytes ); + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + Int32 curr = s->len[t][0]; + bsW ( s, 5, curr ); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; + bsW ( s, 1, 0 ); + } + } + + if (s->verbosity >= 3) + VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (True) { + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + for (i = gs; i <= ge; i++) { + AssertH ( s->selector[selCtr] < nGroups, 3006 ); + bsW ( s, + s->len [s->selector[selCtr]] [mtfv[i]], + s->code [s->selector[selCtr]] [mtfv[i]] ); + } + + gs = ge+1; + selCtr++; + } + AssertH( selCtr == nSelectors, 3007 ); + + if (s->verbosity >= 3) + VPrintf1( "codes %d\n", s->numZ-nBytes ); +} + + +/*---------------------------------------------------*/ +void compressBlock ( EState* s, Bool is_last_block ) +{ + if (s->nblock > 0) { + + BZ_FINALISE_CRC ( s->blockCRC ); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) s->numZ = 0; + + if (s->verbosity >= 2) + VPrintf4( " block %d: crc = 0x%8x, " + "combined CRC = 0x%8x, size = %d\n", + s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); + + blockSort ( s ); + } + + s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]); + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + bsInitWrite ( s ); + bsPutUChar ( s, 'B' ); + bsPutUChar ( s, 'Z' ); + bsPutUChar ( s, 'h' ); + bsPutUChar ( s, (UChar)('0' + s->blockSize100k) ); + } + + if (s->nblock > 0) { + + bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); + bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); + bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutUInt32 ( s, s->blockCRC ); + + /*-- + Now a single bit indicating (non-)randomisation. + As of version 0.9.5, we use a better sorting algorithm + which makes randomisation unnecessary. So always set + the randomised bit to 'no'. Of course, the decoder + still needs to be able to handle randomised blocks + so as to maintain backwards compatibility with + older versions of bzip2. + --*/ + bsW(s,1,0); + + bsW ( s, 24, s->origPtr ); + generateMTFValues ( s ); + sendMTFValues ( s ); + } + + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + + bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); + bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); + bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); + bsPutUInt32 ( s, s->combinedCRC ); + if (s->verbosity >= 2) + VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC ); + bsFinishWrite ( s ); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/crctable.c b/zgz/old-bzip2/crctable.c new file mode 100644 index 0000000..ab53df6 --- /dev/null +++ b/zgz/old-bzip2/crctable.c @@ -0,0 +1,144 @@ + +/*-------------------------------------------------------------*/ +/*--- Table for doing CRCs ---*/ +/*--- crctable.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#include "bzlib_private.h" + +/*-- + I think this is an implementation of the AUTODIN-II, + Ethernet & FDDI 32-bit CRC standard. Vaguely derived + from code by Rob Warnock, in Section 51 of the + comp.compression FAQ. +--*/ + +UInt32 crc32Table[256] = { + + /*-- Ugly, innit? --*/ + + 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, + 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, + 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, + 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, + 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, + 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, + 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, + 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, + 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, + 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, + 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, + 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, + 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, + 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, + 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, + 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, + 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, + 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, + 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, + 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, + 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, + 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, + 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, + 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, + 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, + 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, + 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, + 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, + 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, + 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, + 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, + 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, + 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, + 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, + 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, + 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, + 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, + 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, + 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, + 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, + 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, + 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, + 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, + 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, + 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, + 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, + 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, + 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, + 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, + 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, + 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, + 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, + 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, + 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, + 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, + 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, + 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, + 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, + 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, + 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, + 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, + 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, + 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, + 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L +}; + + +/*-------------------------------------------------------------*/ +/*--- end crctable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/huffman.c b/zgz/old-bzip2/huffman.c new file mode 100644 index 0000000..bb2a6cc --- /dev/null +++ b/zgz/old-bzip2/huffman.c @@ -0,0 +1,228 @@ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#include "bzlib_private.h" + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + Int32 zz, tmp; \ + zz = z; tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + +#define DOWNHEAP(z) \ +{ \ + Int32 zz, yy, tmp; \ + zz = z; tmp = heap[zz]; \ + while (True) { \ + yy = zz << 1; \ + if (yy > nHeap) break; \ + if (yy < nHeap && \ + weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + + +/*---------------------------------------------------*/ +void hbMakeCodeLengths ( UChar *len, + Int32 *freq, + Int32 alphaSize, + Int32 maxLen ) +{ + /*-- + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + --*/ + Int32 nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; + Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; + Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (True) { + + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { k = parent[k]; j++; } + len[i-1] = j; + if (j > maxLen) tooLong = True; + } + + if (! tooLong) break; + + for (i = 1; i < alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +} + + +/*---------------------------------------------------*/ +void hbAssignCodes ( Int32 *code, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) + if (length[i] == n) { code[i] = vec; vec++; }; + vec <<= 1; + } +} + + +/*---------------------------------------------------*/ +void hbCreateDecodeTables ( Int32 *limit, + Int32 *base, + Int32 *perm, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 pp, i, j, vec; + + pp = 0; + for (i = minLen; i <= maxLen; i++) + for (j = 0; j < alphaSize; j++) + if (length[j] == i) { perm[pp] = j; pp++; }; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; + for (i = 0; i < alphaSize; i++) base[length[i]+1]++; + + for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; + vec = 0; + + for (i = minLen; i <= maxLen; i++) { + vec += (base[i+1] - base[i]); + limit[i] = vec-1; + vec <<= 1; + } + for (i = minLen + 1; i <= maxLen; i++) + base[i] = ((limit[i-1] + 1) << 1) - base[i]; +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/old-bzip2/randtable.c b/zgz/old-bzip2/randtable.c new file mode 100644 index 0000000..8f6266f --- /dev/null +++ b/zgz/old-bzip2/randtable.c @@ -0,0 +1,124 @@ + +/*-------------------------------------------------------------*/ +/*--- Table for randomising repetitive blocks ---*/ +/*--- randtable.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.5 of 24 May 1999 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + + +#include "bzlib_private.h" + + +/*---------------------------------------------*/ +Int32 rNums[512] = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 +}; + + +/*-------------------------------------------------------------*/ +/*--- end randtable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/zgz/zgz.c b/zgz/zgz.c new file mode 100644 index 0000000..28ba166 --- /dev/null +++ b/zgz/zgz.c @@ -0,0 +1,624 @@ +/* + * Authors: Faidon Liambotis + * Josh Triplett + * + * This is a zlib-based gzip that is heavily based on NetBSD's gzip, + * developed by Matthew R. Green. + * + * This is suited for gzip regeneration and is part of pristine-tar. + * As such, it adds some extra options which are needed to successfully + * reproduce the gzips out there and removes features of the original + * implementation that were not relevant (e.g. decompression) + * + * It also has a mode to generate old bzip2 files. + * + * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green + * Copyright (c) 2007 Faidon Liambotis + * Copyright (c) 2008 Josh Triplett + * Copyright (c) 2010 Joey Hess + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * gzip.c -- GPL free gzip using zlib. + * + * RFC 1950 covers the zlib format + * RFC 1951 covers the deflate format + * RFC 1952 covers the gzip format + * + */ + +#define _GNU_SOURCE + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void gnuzip(int in, int out, char *origname, unsigned long timestamp, int level, int osflag, int rsync, int newrsync); +extern void old_bzip2(int level); + +#define BUFLEN (64 * 1024) + +#define GZIP_MAGIC0 0x1F +#define GZIP_MAGIC1 0x8B + +#define HEAD_CRC 0x02 +#define EXTRA_FIELD 0x04 +#define ORIG_NAME 0x08 +#define COMMENT 0x10 + +#define GZIP_OS_UNIX 3 /* Unix */ +#define GZIP_OS_NTFS 11 /* NTFS */ + +static const char gzip_version[] = "zgz 20100613 based on NetBSD gzip 20060927, GNU gzip 1.3.12, and bzip2 0.9.5d"; + +static const char gzip_copyright[] = \ +" Authors: Faidon Liambotis \n" +" Josh Triplett \n" +"\n" +" Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n" +" Copyright (c) 2007 Faidon Liambotis\n" +" Copyright (c) 2008 Josh Triplett\n" +" * All rights reserved.\n" +" *\n" +" * Redistribution and use in source and binary forms, with or without\n" +" * modification, are permitted provided that the following conditions\n" +" * are met:\n" +" * 1. Redistributions of source code must retain the above copyright\n" +" * notice, this list of conditions and the following disclaimer.\n" +" * 2. Redistributions in binary form must reproduce the above copyright\n" +" * notice, this list of conditions and the following disclaimer in the\n" +" * documentation and/or other materials provided with the distribution.\n" +" * 3. The name of the author may not be used to endorse or promote products\n" +" * derived from this software without specific prior written permission.\n" +" *\n" +" * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" +" * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" +" * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" +" * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" +" * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n" +" * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n" +" * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n" +" * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n" +" * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n" +" * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n" +" * SUCH DAMAGE."; + +static int qflag; /* quiet mode */ + +static void maybe_err(const char *fmt, ...) + __attribute__((__format__(__printf__, 1, 2),noreturn)); +static void maybe_errx(const char *fmt, ...) + __attribute__((__format__(__printf__, 1, 2),noreturn)); +static void gz_compress(int, int, const char *, uint32_t, int, int, int, int, int); +static void usage(void); +static void display_version(void); +static void display_license(void); +static void shamble(char *, int); +static void rebrain(char *, char *, int); + +int main(int, char **p); + +static const struct option longopts[] = { + { "stdout", no_argument, 0, 'c' }, + { "to-stdout", no_argument, 0, 'c' }, + { "decompress", no_argument, 0, 'd' }, + { "uncompress", no_argument, 0, 'd' }, + { "force", no_argument, 0, 'f' }, + { "help", no_argument, 0, 'h' }, + { "no-name", no_argument, 0, 'n' }, + { "name", no_argument, 0, 'N' }, + { "quiet", no_argument, 0, 'q' }, + { "fast", no_argument, 0, '1' }, + { "best", no_argument, 0, '9' }, + { "ascii", no_argument, 0, 'a' }, + /* new options */ + { "gnu", no_argument, 0, 'G' }, + { "old-bzip2", no_argument, 0, 'O' }, + { "suse-bzip2", no_argument, 0, 'S' }, + { "suse-pbzip2", no_argument, 0, 'P' }, + { "zlib", no_argument, 0, 'Z' }, + { "rsyncable", no_argument, 0, 'R' }, + { "new-rsyncable", no_argument, 0, 'r' }, + { "no-timestamp", no_argument, 0, 'm' }, + { "force-timestamp", no_argument, 0, 'M' }, + { "timestamp", required_argument, 0, 'T' }, + { "osflag", required_argument, 0, 's' }, + { "original-name", required_argument, 0, 'o' }, + { "filename", required_argument, 0, 'F' }, + { "quirk", required_argument, 0, 'k' }, + /* end */ + { "version", no_argument, 0, 'V' }, + { "license", no_argument, 0, 'L' }, + { NULL, no_argument, 0, 0 }, +}; + +int +main(int argc, char **argv) +{ + const char *progname = argv[0]; + int gnu = 0; + int bzold = 0; + int bzsuse = 0; + int pbzsuse = 0; + int quirks = 0; + char *origname = NULL; + uint32_t timestamp = 0; + int memlevel = 8; /* zlib's default */ + int nflag = 0; + int mflag = 0; + int fflag = 0; + int xflag = -1; + int ntfs_quirk = 0; + int level = 6; + int osflag = GZIP_OS_UNIX; + int rsync = 0; + int new_rsync = 0; + int ch; + + if (strcmp(progname, "gunzip") == 0 || + strcmp(progname, "zcat") == 0 || + strcmp(progname, "gzcat") == 0) { + fprintf(stderr, "%s: decompression is not supported on this version\n", progname); + usage(); + } + +#define OPT_LIST "123456789acdfhF:GLNnMmqRrT:Vo:k:s:ZOSP" + + while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) { + switch (ch) { + case 'G': + gnu = 1; + break; + case 'O': + bzold = 1; + break; + case 'S': + bzsuse = 1; + break; + case 'P': + pbzsuse = 1; + break; + case 'Z': + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + level = ch - '0'; + break; + case 'c': + /* Ignored for compatibility; zgz always uses -c */ + break; + case 'f': + fflag = 1; + break; + case 'N': + nflag = 0; + break; + case 'n': + nflag = 1; + /* no break, n implies m */ + case 'm': + mflag = 1; + break; + case 'M': + mflag = 0; + break; + case 'q': + qflag = 1; + break; + case 's': + osflag = atoi(optarg); + break; + case 'F': + case 'o': + origname = optarg; + break; + case 'k': + quirks = 1; + if (strcmp(optarg, "buggy-bsd") == 0) { + /* certain archives made with older versions of + * BSD variants of gzip */ + + /* no name or timestamp information */ + nflag = 1; + mflag = 1; + /* maximum compression but without indicating so */ + level = 9; + xflag = 0; + } else if (strcmp(optarg, "ntfs") == 0) { + ntfs_quirk = 1; + /* no name or timestamp information */ + nflag = 1; + mflag = 1; + /* osflag is NTFS */ + osflag = GZIP_OS_NTFS; + } else if (strcmp(optarg, "perl") == 0) { + /* Perl's Compress::Raw::Zlib */ + memlevel = 9; + + /* no name or timestamp information */ + nflag = 1; + mflag = 1; + /* maximum compression but without indicating so */ + level = 9; + xflag = 0; + } else { + fprintf(stderr, "%s: unknown quirk!\n", progname); + usage(); + } + break; + case 'T': + timestamp = atoi(optarg); + break; + case 'R': + rsync = 1; + break; + case 'r': + new_rsync = 1; + break; + case 'd': + fprintf(stderr, "%s: decompression is not supported on this version\n", progname); + usage(); + break; + case 'a': + fprintf(stderr, "%s: option --ascii ignored on this version\n", progname); + break; + case 'V': + display_version(); + /* NOTREACHED */ + case 'L': + display_license(); + /* NOT REACHED */ + default: + usage(); + /* NOTREACHED */ + } + } + argv += optind; + argc -= optind; + + if (argc != 0) { + fprintf(stderr, "%s: filenames not supported; use stdin and stdout\n", progname); + return 1; + } + + if (fflag == 0 && isatty(STDOUT_FILENO)) + maybe_errx("standard output is a terminal -- ignoring"); + + if (nflag) + origname = NULL; + if (mflag) + timestamp = 0; + + if (gnu) { + if (quirks) { + fprintf(stderr, "%s: quirks not supported with --gnu\n", progname); + return 1; + } + gnuzip(STDIN_FILENO, STDOUT_FILENO, origname, timestamp, level, osflag, rsync, new_rsync); + } else if (bzold) { + if (quirks) { + fprintf(stderr, "%s: quirks not supported with --old-bzip2\n", progname); + return 1; + } + old_bzip2(level); + } else if (bzsuse) { + shamble("suse-bzip2/bzip2", level); + } else if (pbzsuse) { + rebrain("suse-bzip2", "pbzip2", level); + } else { + if (rsync || new_rsync) { + fprintf(stderr, "%s: --rsyncable not supported with --zlib\n", progname); + return 1; + } + + gz_compress(STDIN_FILENO, STDOUT_FILENO, origname, timestamp, level, memlevel, osflag, xflag, ntfs_quirk); + } + return 0; +} + +/* maybe print an error */ +void +maybe_err(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + } + exit(1); +} + +/* ... without an errno. */ +void +maybe_errx(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + } + exit(1); +} + +/* compress input to output. */ +static void +gz_compress(int in, int out, const char *origname, uint32_t mtime, int level, int memlevel, int osflag, int xflag, int ntfs_quirk) +{ + z_stream z; + char *outbufp, *inbufp; + off_t in_tot = 0; + ssize_t in_size; + int i, error; + uLong crc; + + outbufp = malloc(BUFLEN); + inbufp = malloc(BUFLEN); + if (outbufp == NULL || inbufp == NULL) + maybe_err("malloc failed"); + + memset(&z, 0, sizeof z); + z.zalloc = Z_NULL; + z.zfree = Z_NULL; + z.opaque = 0; + + i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", + GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, + origname ? ORIG_NAME : 0, + mtime & 0xff, + (mtime >> 8) & 0xff, + (mtime >> 16) & 0xff, + (mtime >> 24) & 0xff, + xflag >= 0 ? xflag : + level == 1 ? 4 : level == 9 ? 2 : 0, + osflag, origname ? origname : ""); + if (i >= BUFLEN) + /* this need PATH_MAX > BUFLEN ... */ + maybe_err("snprintf"); + if (origname) + i++; + + z.next_out = (unsigned char *)outbufp + i; + z.avail_out = BUFLEN - i; + + error = deflateInit2(&z, level, Z_DEFLATED, + (-MAX_WBITS), memlevel, Z_DEFAULT_STRATEGY); + if (error != Z_OK) + maybe_err("deflateInit2 failed"); + + crc = crc32(0L, Z_NULL, 0); + for (;;) { + if (z.avail_out == 0) { + if (write(out, outbufp, BUFLEN) != BUFLEN) + maybe_err("write"); + + z.next_out = (unsigned char *)outbufp; + z.avail_out = BUFLEN; + } + + if (z.avail_in == 0) { + in_size = read(in, inbufp, BUFLEN); + if (in_size < 0) + maybe_err("read"); + if (in_size == 0) + break; + + crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size); + in_tot += in_size; + z.next_in = (unsigned char *)inbufp; + z.avail_in = in_size; + } + + error = deflate(&z, Z_NO_FLUSH); + if (error != Z_OK && error != Z_STREAM_END) + maybe_errx("deflate failed"); + } + + /* clean up */ + for (;;) { + size_t len; + ssize_t w; + + error = deflate(&z, Z_FINISH); + if (error != Z_OK && error != Z_STREAM_END) + maybe_errx("deflate failed"); + + len = (char *)z.next_out - outbufp; + + /* for a really strange reason, that + * particular byte is decremented */ + if (ntfs_quirk) + outbufp[10]--; + + w = write(out, outbufp, len); + if (w == -1 || (size_t)w != len) + maybe_err("write"); + z.next_out = (unsigned char *)outbufp; + z.avail_out = BUFLEN; + + if (error == Z_STREAM_END) + break; + } + + if (deflateEnd(&z) != Z_OK) + maybe_errx("deflateEnd failed"); + + if (ntfs_quirk) { + /* write NTFS tail magic (?) */ + i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c", + 0x00, 0x00, 0xff, 0xff, 0x03, 0x00); + if (i != 6) + maybe_err("snprintf"); + if (write(out, outbufp, i) != i) + maybe_err("write"); + } + + /* write CRC32 and input size (ISIZE) at the tail */ + i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", + (int)crc & 0xff, + (int)(crc >> 8) & 0xff, + (int)(crc >> 16) & 0xff, + (int)(crc >> 24) & 0xff, + (int)in_tot & 0xff, + (int)(in_tot >> 8) & 0xff, + (int)(in_tot >> 16) & 0xff, + (int)(in_tot >> 24) & 0xff); + if (i != 8) + maybe_err("snprintf"); + if (write(out, outbufp, i) != i) + maybe_err("write"); + + free(inbufp); + free(outbufp); +} + +/* runs an external, reanimated compressor program */ +static void +shamble(char *zombie, int level) +{ + char exec_buf[PATH_MAX]; + char level_buf[3]; + char *argv[3]; + int i; + + snprintf(exec_buf, sizeof(exec_buf), "%s/%s", PKGLIBDIR, zombie); + snprintf(level_buf, sizeof(level_buf), "-%i", level); + + i = 0; + argv[i++] = exec_buf; + argv[i++] = level_buf; + argv[i++] = NULL; + + execvp(exec_buf, argv); + perror("Failed to run external program"); + exit(1); +} + +/* swaps in a different library and runs a system program */ +static void +rebrain(char *zombie, char *program, int level) +{ + char path_buf[PATH_MAX]; + char level_buf[3]; + char *argv[3]; + int i; + +#if defined(__APPLE__) && defined(__MACH__) +# define LD_PATH_VAR "DYLD_LIBRARY_PATH" +#else +# define LD_PATH_VAR "LD_LIBRARY_PATH" +#endif + + snprintf(path_buf, sizeof(path_buf), "%s/%s", PKGLIBDIR, zombie); + /* FIXME - should append, not overwrite */ + setenv(LD_PATH_VAR, path_buf, 1); + + snprintf(level_buf, sizeof(level_buf), "-%i", level); + + i = 0; + argv[i++] = program; + argv[i++] = level_buf; + argv[i++] = NULL; + + execvp(program, argv); + perror("Failed to run external program"); + exit(1); +} + +/* display usage */ +static void +usage(void) +{ + + fprintf(stderr, "%s\n", gzip_version); + fprintf(stderr, + "usage: zgz [-" OPT_LIST "] < > \n" + " -G --gnu use GNU gzip implementation\n" + " -Z --zlib use zlib's implementation (default)\n" + " -O --old-bzip2 generate bzip2 (0.9.5d) output\n" + " -S --suse-bzip2 generate suse bzip2 output\n" + " -P --suse-pbzip2 generate suse pbzip2 output\n" + " -1 --fast fastest (worst) compression\n" + " -2 .. -8 set compression level\n" + " -9 --best best (slowest) compression\n" + " -f --force force writing compressed data to a terminal\n" + " -N --name save or restore original file name and time stamp\n" + " -n --no-name don't save original file name or time stamp\n" + " -m --no-timestamp don't save original time stamp\n" + " -M --force-timestemp save the timestamp even if -n was passed\n" + " -q --quiet output no warnings\n" + " -V --version display program version\n" + " -h --help display this help\n" + " -o NAME\n" + " --original-name NAME use NAME as the original file name\n" + " -F NAME --filename NAME same as --original-name\n" + " -s --osflag set the OS flag to something different than 03 (Unix)\n" + " -T --timestamp SECONDS set the timestamp to the specified number of seconds\n" + " \ngnu-specific options:\n" + " -R --rsyncable make rsync-friendly archive\n" + " -r --new-rsyncable make rsync-friendly archive (new version)\n" + " \nzlib-specific options:\n" + " -k --quirk QUIRK enable a format quirk (buggy-bsd, ntfs, perl)\n"); + exit(0); +} + +/* display the license information of NetBSD gzip */ +static void +display_license(void) +{ + + fprintf(stderr, "%s\n", gzip_version); + fprintf(stderr, "%s\n", gzip_copyright); + exit(0); +} + +/* display the version of NetBSD gzip */ +static void +display_version(void) +{ + + fprintf(stderr, "%s\n", gzip_version); + exit(0); +} diff --git a/zgz/zgz.pod b/zgz/zgz.pod new file mode 100644 index 0000000..0f49aaf --- /dev/null +++ b/zgz/zgz.pod @@ -0,0 +1,33 @@ +=head1 NAME + +zgz - Frankenstein's compressor + +=head1 SYNOPSIS + +B [options] < file > file.gz + +=head1 DESCRIPTION + +This program is an unholy combination of the BSD gzip program, a modified +GNU gzip that supports setting an arbitrary file name and timestamp, +and an old, rotting version of bzip2 that we dug up somewhere at midnight. +Only the bits to do with file compression were kept. + +There are many arcane options which aid L(1) in re-animating +files. Use --help to see all the gory details. + +=head1 AUTHOR + +Matthew R. Green, +Jean-loup Gailly, +Julian R Seward, +Faidon Liambotis, +Josh Triplett, +Joey Hess + +Note that several of the above donors would be very suprised at finding +parts of themselves ... er, their work ... shambling around here. Please +direct correspondance and/or flaming pitchforks to the pristine-tar +maintainers. + +=cut -- cgit v1.2.3