diff options
author | Joey Hess <joey@kitenet.net> | 2010-07-24 01:02:51 -0400 |
---|---|---|
committer | Joey Hess <joey@kitenet.net> | 2010-07-24 01:02:51 -0400 |
commit | f0d217736722de9124e67d33d9752d985498ed52 (patch) | |
tree | 0346d4fc2a15418818e418abcb6f543b8d082329 | |
parent | 10798d26b77ccd680ae35faad241b741768b51c7 (diff) | |
download | pristine-tar-f0d217736722de9124e67d33d9752d985498ed52.tar.gz pristine-tar-f0d217736722de9124e67d33d9752d985498ed52.tar.bz2 pristine-tar-f0d217736722de9124e67d33d9752d985498ed52.zip |
refactored file format magic numbers code
-rw-r--r-- | Pristine/Tar.pm | 48 | ||||
-rw-r--r-- | Pristine/Tar/Formats.pm | 81 | ||||
-rwxr-xr-x | pristine-bz2 | 14 | ||||
-rwxr-xr-x | pristine-gz | 28 | ||||
-rwxr-xr-x | pristine-tar | 32 |
5 files changed, 113 insertions, 90 deletions
diff --git a/Pristine/Tar.pm b/Pristine/Tar.pm index abe0255..a6a8446 100644 --- a/Pristine/Tar.pm +++ b/Pristine/Tar.pm @@ -93,52 +93,4 @@ sub dispatch { $i->[0]->(@ARGV); } - -# magic identification -use constant GZIP_ID1 => 0x1F; -use constant GZIP_ID2 => 0x8B; -use constant BZIP2_ID1 => 0x42; -use constant BZIP2_ID2 => 0x5a; - -# compression methods -# 0x00-0x07 are reserved -use constant GZIP_METHOD_DEFLATE => 0x08; -# 'h' for Bzip2 ('H'uffman coding) -use constant BZIP2_METHOD_HUFFMAN => 0x68; - -# flags -use constant { - GZIP_FLAG_FTEXT => 0, - GZIP_FLAG_FHCRC => 1, - GZIP_FLAG_FEXTRA => 2, - GZIP_FLAG_FNAME => 3, - GZIP_FLAG_FCOMMENT => 4, - # the rest are reserved -}; -# compression level -use constant { - GZIP_COMPRESSION_NORMAL => 0, - GZIP_COMPRESSION_BEST => 2, - GZIP_COMPRESSION_FAST => 4, -}; -# operating systems -use constant { - GZIP_OS_MSDOS => 0, - GZIP_OS_AMIGA => 1, - GZIP_OS_VMS => 2, - GZIP_OS_UNIX => 3, - GZIP_OS_VMCMS => 4, - GZIP_OS_ATARI => 5, - GZIP_OS_HPFS => 6, - GZIP_OS_MACINTOSH => 7, - GZIP_OS_ZSYSTEM => 8, - GZIP_OS_CPM => 9, - GZIP_OS_TOPS => 10, - GZIP_OS_NTFS => 11, - GZIP_OS_QDOS => 12, - GZIP_OS_RISCOS => 13, - GZIP_OS_VFAT => 14, - GZIP_OS_UNKNOWN => 255, -}; - 1 diff --git a/Pristine/Tar/Formats.pm b/Pristine/Tar/Formats.pm new file mode 100644 index 0000000..a141a3d --- /dev/null +++ b/Pristine/Tar/Formats.pm @@ -0,0 +1,81 @@ +#!/usr/bin/perl +# pristine-tar file format parsing + +package Pristine::Tar::Formats; + +use warnings; +use strict; +use Exporter q{import}; +our @EXPORT=qw{is_gz is_bz2 %fconstants}; + +our %fconstants=( + # magic identification + GZIP_ID1 => 0x1F, + GZIP_ID2 => 0x8B, + BZIP2_ID1 => 0x42, + BZIP2_ID2 => 0x5a, + + # compression methods + # 0x00-0x07 are reserved + GZIP_METHOD_DEFLATE => 0x08, + # 'h' for Bzip2 ('H'uffman coding) + BZIP2_METHOD_HUFFMAN => 0x68, + + # flags + GZIP_FLAG_FTEXT => 0, + GZIP_FLAG_FHCRC => 1, + GZIP_FLAG_FEXTRA => 2, + GZIP_FLAG_FNAME => 3, + GZIP_FLAG_FCOMMENT => 4, + # the rest are reserved + + # compression level + GZIP_COMPRESSION_NORMAL => 0, + GZIP_COMPRESSION_BEST => 2, + GZIP_COMPRESSION_FAST => 4, + + # operating systems + GZIP_OS_MSDOS => 0, + GZIP_OS_AMIGA => 1, + GZIP_OS_VMS => 2, + GZIP_OS_UNIX => 3, + GZIP_OS_VMCMS => 4, + GZIP_OS_ATARI => 5, + GZIP_OS_HPFS => 6, + GZIP_OS_MACINTOSH => 7, + GZIP_OS_ZSYSTEM => 8, + GZIP_OS_CPM => 9, + GZIP_OS_TOPS => 10, + GZIP_OS_NTFS => 11, + GZIP_OS_QDOS => 12, + GZIP_OS_RISCOS => 13, + GZIP_OS_VFAT => 14, + GZIP_OS_UNKNOWN => 255, +); + +sub magic { + my $file=shift; + + open (my $in, "<", $file) || die "$file: $!"; + my $count=$#_+1; + my ($chars, @bits); + my $ret = ( + read($in, $chars, $count) == $count && + (@bits = unpack(("C" x $count), $chars)) && + (! grep { $bits[$_] != $_[$_] } (0..$count-1)) + ); + close $in; + return $ret; +} + +sub is_gz { + magic(shift, $fconstants{GZIP_ID1}, $fconstants{GZIP_ID2}, + $fconstants{GZIP_METHOD_DEFLATE}); +} + +sub is_bz2 { + magic(shift, $fconstants{BZIP2_ID1}, $fconstants{BZIP2_ID2}, + $fconstants{BZIP2_METHOD_HUFFMAN}); +} + +1 diff --git a/pristine-bz2 b/pristine-bz2 index 1c69c91..f43a0f0 100755 --- a/pristine-bz2 +++ b/pristine-bz2 @@ -84,6 +84,7 @@ use warnings; use strict; use Pristine::Tar; use Pristine::Tar::Delta; +use Pristine::Tar::Formats; use File::Basename qw/basename/; use IO::Handle; @@ -109,11 +110,15 @@ sub usage { sub readbzip2 { my $filename = shift; - my $chars; + + if (! is_bz2($filename)) { + error "This is not a valid BZip2 archive."; + } open(BZIP2, "< $filename") or die("Could not open '$filename' for reading: $!\n"); + my $chars; if (read(BZIP2, $chars, 4) != 4) { die("Unable to read from input\n"); } @@ -123,11 +128,8 @@ sub readbzip2 { # we actually want the value, not the ascii position $level-=48; - if ($id1 != Pristine::Tar::BZIP2_ID1 || - $id2 != Pristine::Tar::BZIP2_ID2 || - $method != Pristine::Tar::BZIP2_METHOD_HUFFMAN || - $level !~ /^[1-9]$/) { - die("This is not a valid BZip2 archive.\n"); + if ($level !~ /^[1-9]$/) { + error "Unknown compression level $level\n"; } close(BZIP2); diff --git a/pristine-gz b/pristine-gz index 9687c49..8caf59d 100755 --- a/pristine-gz +++ b/pristine-gz @@ -91,6 +91,7 @@ use warnings; use strict; use Pristine::Tar; use Pristine::Tar::Delta; +use Pristine::Tar::Formats; use File::Basename qw/basename/; dispatch( @@ -108,26 +109,25 @@ sub usage { sub readgzip { my $filename = shift; - my $chars; + + if (! is_gz($filename)) { + error "This is not a valid GZip archive."; + } open(GZIP, "< $filename") or die("Could not open '$filename' for reading: $!\n"); + my $chars; if (read(GZIP, $chars, 10) != 10) { - die("Unable to read from input\n"); + die("Unable to read 10 bytes from input\n"); } my ($id1, $id2, $method, $flags, $timestamp, $level, $os, $name) = (unpack("CCCb8VCC", $chars), ''); - if ($id1 != Pristine::Tar::GZIP_ID1 || - $id2 != Pristine::Tar::GZIP_ID2 || - $method != Pristine::Tar::GZIP_METHOD_DEFLATE) { - die("This is not a valid GZip archive.\n"); - } my @flags = split(//, $flags); - if ($flags[Pristine::Tar::GZIP_FLAG_FNAME]) { + if ($flags[$fconstants{GZIP_FLAG_FNAME}]) { # read a null-terminated string $name .= $chars while (read(GZIP, $chars, 1) == 1 && ord($chars) != 0); @@ -142,15 +142,15 @@ sub predictgzipargs { my @flags = @$flags; my @args; - unless ($flags[Pristine::Tar::GZIP_FLAG_FNAME]) { + unless ($flags[$fconstants{GZIP_FLAG_FNAME}]) { push @args, '-n'; push @args, '-M' if $timestamp; } - if ($level == Pristine::Tar::GZIP_COMPRESSION_BEST) { + if ($level == $fconstants{GZIP_COMPRESSION_BEST}) { push @args, '-9' } - elsif ($level == Pristine::Tar::GZIP_COMPRESSION_FAST) { + elsif ($level == $fconstants{GZIP_COMPRESSION_FAST}) { push @args, '-1' } @@ -185,7 +185,7 @@ sub reproducegz { my @try; - if ($os == Pristine::Tar::GZIP_OS_UNIX) { + if ($os == $fconstants{GZIP_OS_UNIX}) { # for 98% of the cases the simple heuristic above works # and it was produced by gnu gzip. push @try, ['--gnu', @args]; @@ -200,7 +200,7 @@ sub reproducegz { # set the Operating System flag to the one found in the original # archive - push @args, ("--osflag", $os) if $os != Pristine::Tar::GZIP_OS_UNIX; + push @args, ("--osflag", $os) if $os != $fconstants{GZIP_OS_UNIX}; # many of the .gz out there are created using the BSD version of # gzip which is using the zlib library; try with our version of @@ -216,7 +216,7 @@ sub reproducegz { # Windows' NTFS gzip implementation; quirk is really really evil # it should be the last test: it can result in a corrupted archive! - if ($os == Pristine::Tar::GZIP_OS_NTFS) { + if ($os == $fconstants{GZIP_OS_NTFS}) { pop @args; pop @args; # ntfs quirk implies NTFS osflag push @try, [@args, '--quirk', 'ntfs']; } diff --git a/pristine-tar b/pristine-tar index a741b92..e1621ab 100755 --- a/pristine-tar +++ b/pristine-tar @@ -167,6 +167,7 @@ use warnings; use strict; use Pristine::Tar; use Pristine::Tar::Delta; +use Pristine::Tar::Formats; use File::Path; use File::Basename; use Cwd qw{getcwd abs_path}; @@ -386,15 +387,9 @@ sub gendelta { my $tempdir=tempdir(); my %delta; - # Check to see if it's compressed. + # Check to see if it's compressed, and get uncompressed tarball. my $compression=undef; - open (IN, "<", $tarball) || error "Cannot read $tarball: $!\n"; - my ($chars, $id1, $id2, $method); - if (read(IN, $chars, 10) == 10 && - (($id1, $id2, $method) = unpack("CCC", $chars)) && - $id1 == Pristine::Tar::GZIP_ID1 && - $id2 == Pristine::Tar::GZIP_ID2 && - $method == Pristine::Tar::GZIP_METHOD_DEFLATE) { + if (is_gz($tarball)) { $compression='gz'; open(IN, "-|", "zcat", $tarball) || die "zcat: $!"; open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; @@ -402,20 +397,13 @@ sub gendelta { close IN || die "zcat: $!"; close OUT || die "$tempdir/origtarball: $!"; } - else { - seek(IN, 0, 0) || die "seek: $!"; - if (read(IN, $chars, 3) == 3 && - (($id1, $id2, $method) = unpack("CCC", $chars)) && - $id1 == Pristine::Tar::BZIP2_ID1 && - $id2 == Pristine::Tar::BZIP2_ID2 && - $method == Pristine::Tar::BZIP2_METHOD_HUFFMAN) { - $compression='bz2'; - open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!"; - open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; - print OUT $_ while <IN>; - close IN || die "bzcat: $!"; - close OUT || die "$tempdir/origtarball: $!"; - } + elsif (is_bz2($tarball)) { + $compression='bz2'; + open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!"; + open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; + print OUT $_ while <IN>; + close IN || die "bzcat: $!"; + close OUT || die "$tempdir/origtarball: $!"; } close IN; |