summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joey@kitenet.net>2010-07-24 01:02:51 -0400
committerJoey Hess <joey@kitenet.net>2010-07-24 01:02:51 -0400
commitf0d217736722de9124e67d33d9752d985498ed52 (patch)
tree0346d4fc2a15418818e418abcb6f543b8d082329
parent10798d26b77ccd680ae35faad241b741768b51c7 (diff)
downloadpristine-tar-f0d217736722de9124e67d33d9752d985498ed52.tar.gz
pristine-tar-f0d217736722de9124e67d33d9752d985498ed52.tar.bz2
pristine-tar-f0d217736722de9124e67d33d9752d985498ed52.zip
refactored file format magic numbers code
-rw-r--r--Pristine/Tar.pm48
-rw-r--r--Pristine/Tar/Formats.pm81
-rwxr-xr-xpristine-bz214
-rwxr-xr-xpristine-gz28
-rwxr-xr-xpristine-tar32
5 files changed, 113 insertions, 90 deletions
diff --git a/Pristine/Tar.pm b/Pristine/Tar.pm
index abe0255..a6a8446 100644
--- a/Pristine/Tar.pm
+++ b/Pristine/Tar.pm
@@ -93,52 +93,4 @@ sub dispatch {
$i->[0]->(@ARGV);
}
-
-# magic identification
-use constant GZIP_ID1 => 0x1F;
-use constant GZIP_ID2 => 0x8B;
-use constant BZIP2_ID1 => 0x42;
-use constant BZIP2_ID2 => 0x5a;
-
-# compression methods
-# 0x00-0x07 are reserved
-use constant GZIP_METHOD_DEFLATE => 0x08;
-# 'h' for Bzip2 ('H'uffman coding)
-use constant BZIP2_METHOD_HUFFMAN => 0x68;
-
-# flags
-use constant {
- GZIP_FLAG_FTEXT => 0,
- GZIP_FLAG_FHCRC => 1,
- GZIP_FLAG_FEXTRA => 2,
- GZIP_FLAG_FNAME => 3,
- GZIP_FLAG_FCOMMENT => 4,
- # the rest are reserved
-};
-# compression level
-use constant {
- GZIP_COMPRESSION_NORMAL => 0,
- GZIP_COMPRESSION_BEST => 2,
- GZIP_COMPRESSION_FAST => 4,
-};
-# operating systems
-use constant {
- GZIP_OS_MSDOS => 0,
- GZIP_OS_AMIGA => 1,
- GZIP_OS_VMS => 2,
- GZIP_OS_UNIX => 3,
- GZIP_OS_VMCMS => 4,
- GZIP_OS_ATARI => 5,
- GZIP_OS_HPFS => 6,
- GZIP_OS_MACINTOSH => 7,
- GZIP_OS_ZSYSTEM => 8,
- GZIP_OS_CPM => 9,
- GZIP_OS_TOPS => 10,
- GZIP_OS_NTFS => 11,
- GZIP_OS_QDOS => 12,
- GZIP_OS_RISCOS => 13,
- GZIP_OS_VFAT => 14,
- GZIP_OS_UNKNOWN => 255,
-};
-
1
diff --git a/Pristine/Tar/Formats.pm b/Pristine/Tar/Formats.pm
new file mode 100644
index 0000000..a141a3d
--- /dev/null
+++ b/Pristine/Tar/Formats.pm
@@ -0,0 +1,81 @@
+#!/usr/bin/perl
+# pristine-tar file format parsing
+
+package Pristine::Tar::Formats;
+
+use warnings;
+use strict;
+use Exporter q{import};
+our @EXPORT=qw{is_gz is_bz2 %fconstants};
+
+our %fconstants=(
+ # magic identification
+ GZIP_ID1 => 0x1F,
+ GZIP_ID2 => 0x8B,
+ BZIP2_ID1 => 0x42,
+ BZIP2_ID2 => 0x5a,
+
+ # compression methods
+ # 0x00-0x07 are reserved
+ GZIP_METHOD_DEFLATE => 0x08,
+ # 'h' for Bzip2 ('H'uffman coding)
+ BZIP2_METHOD_HUFFMAN => 0x68,
+
+ # flags
+ GZIP_FLAG_FTEXT => 0,
+ GZIP_FLAG_FHCRC => 1,
+ GZIP_FLAG_FEXTRA => 2,
+ GZIP_FLAG_FNAME => 3,
+ GZIP_FLAG_FCOMMENT => 4,
+ # the rest are reserved
+
+ # compression level
+ GZIP_COMPRESSION_NORMAL => 0,
+ GZIP_COMPRESSION_BEST => 2,
+ GZIP_COMPRESSION_FAST => 4,
+
+ # operating systems
+ GZIP_OS_MSDOS => 0,
+ GZIP_OS_AMIGA => 1,
+ GZIP_OS_VMS => 2,
+ GZIP_OS_UNIX => 3,
+ GZIP_OS_VMCMS => 4,
+ GZIP_OS_ATARI => 5,
+ GZIP_OS_HPFS => 6,
+ GZIP_OS_MACINTOSH => 7,
+ GZIP_OS_ZSYSTEM => 8,
+ GZIP_OS_CPM => 9,
+ GZIP_OS_TOPS => 10,
+ GZIP_OS_NTFS => 11,
+ GZIP_OS_QDOS => 12,
+ GZIP_OS_RISCOS => 13,
+ GZIP_OS_VFAT => 14,
+ GZIP_OS_UNKNOWN => 255,
+);
+
+sub magic {
+ my $file=shift;
+
+ open (my $in, "<", $file) || die "$file: $!";
+ my $count=$#_+1;
+ my ($chars, @bits);
+ my $ret = (
+ read($in, $chars, $count) == $count &&
+ (@bits = unpack(("C" x $count), $chars)) &&
+ (! grep { $bits[$_] != $_[$_] } (0..$count-1))
+ );
+ close $in;
+ return $ret;
+}
+
+sub is_gz {
+ magic(shift, $fconstants{GZIP_ID1}, $fconstants{GZIP_ID2},
+ $fconstants{GZIP_METHOD_DEFLATE});
+}
+
+sub is_bz2 {
+ magic(shift, $fconstants{BZIP2_ID1}, $fconstants{BZIP2_ID2},
+ $fconstants{BZIP2_METHOD_HUFFMAN});
+}
+
+1
diff --git a/pristine-bz2 b/pristine-bz2
index 1c69c91..f43a0f0 100755
--- a/pristine-bz2
+++ b/pristine-bz2
@@ -84,6 +84,7 @@ use warnings;
use strict;
use Pristine::Tar;
use Pristine::Tar::Delta;
+use Pristine::Tar::Formats;
use File::Basename qw/basename/;
use IO::Handle;
@@ -109,11 +110,15 @@ sub usage {
sub readbzip2 {
my $filename = shift;
- my $chars;
+
+ if (! is_bz2($filename)) {
+ error "This is not a valid BZip2 archive.";
+ }
open(BZIP2, "< $filename")
or die("Could not open '$filename' for reading: $!\n");
+ my $chars;
if (read(BZIP2, $chars, 4) != 4) {
die("Unable to read from input\n");
}
@@ -123,11 +128,8 @@ sub readbzip2 {
# we actually want the value, not the ascii position
$level-=48;
- if ($id1 != Pristine::Tar::BZIP2_ID1 ||
- $id2 != Pristine::Tar::BZIP2_ID2 ||
- $method != Pristine::Tar::BZIP2_METHOD_HUFFMAN ||
- $level !~ /^[1-9]$/) {
- die("This is not a valid BZip2 archive.\n");
+ if ($level !~ /^[1-9]$/) {
+ error "Unknown compression level $level\n";
}
close(BZIP2);
diff --git a/pristine-gz b/pristine-gz
index 9687c49..8caf59d 100755
--- a/pristine-gz
+++ b/pristine-gz
@@ -91,6 +91,7 @@ use warnings;
use strict;
use Pristine::Tar;
use Pristine::Tar::Delta;
+use Pristine::Tar::Formats;
use File::Basename qw/basename/;
dispatch(
@@ -108,26 +109,25 @@ sub usage {
sub readgzip {
my $filename = shift;
- my $chars;
+
+ if (! is_gz($filename)) {
+ error "This is not a valid GZip archive.";
+ }
open(GZIP, "< $filename")
or die("Could not open '$filename' for reading: $!\n");
+ my $chars;
if (read(GZIP, $chars, 10) != 10) {
- die("Unable to read from input\n");
+ die("Unable to read 10 bytes from input\n");
}
my ($id1, $id2, $method, $flags, $timestamp, $level, $os, $name)
= (unpack("CCCb8VCC", $chars), '');
- if ($id1 != Pristine::Tar::GZIP_ID1 ||
- $id2 != Pristine::Tar::GZIP_ID2 ||
- $method != Pristine::Tar::GZIP_METHOD_DEFLATE) {
- die("This is not a valid GZip archive.\n");
- }
my @flags = split(//, $flags);
- if ($flags[Pristine::Tar::GZIP_FLAG_FNAME]) {
+ if ($flags[$fconstants{GZIP_FLAG_FNAME}]) {
# read a null-terminated string
$name .= $chars
while (read(GZIP, $chars, 1) == 1 && ord($chars) != 0);
@@ -142,15 +142,15 @@ sub predictgzipargs {
my @flags = @$flags;
my @args;
- unless ($flags[Pristine::Tar::GZIP_FLAG_FNAME]) {
+ unless ($flags[$fconstants{GZIP_FLAG_FNAME}]) {
push @args, '-n';
push @args, '-M' if $timestamp;
}
- if ($level == Pristine::Tar::GZIP_COMPRESSION_BEST) {
+ if ($level == $fconstants{GZIP_COMPRESSION_BEST}) {
push @args, '-9'
}
- elsif ($level == Pristine::Tar::GZIP_COMPRESSION_FAST) {
+ elsif ($level == $fconstants{GZIP_COMPRESSION_FAST}) {
push @args, '-1'
}
@@ -185,7 +185,7 @@ sub reproducegz {
my @try;
- if ($os == Pristine::Tar::GZIP_OS_UNIX) {
+ if ($os == $fconstants{GZIP_OS_UNIX}) {
# for 98% of the cases the simple heuristic above works
# and it was produced by gnu gzip.
push @try, ['--gnu', @args];
@@ -200,7 +200,7 @@ sub reproducegz {
# set the Operating System flag to the one found in the original
# archive
- push @args, ("--osflag", $os) if $os != Pristine::Tar::GZIP_OS_UNIX;
+ push @args, ("--osflag", $os) if $os != $fconstants{GZIP_OS_UNIX};
# many of the .gz out there are created using the BSD version of
# gzip which is using the zlib library; try with our version of
@@ -216,7 +216,7 @@ sub reproducegz {
# Windows' NTFS gzip implementation; quirk is really really evil
# it should be the last test: it can result in a corrupted archive!
- if ($os == Pristine::Tar::GZIP_OS_NTFS) {
+ if ($os == $fconstants{GZIP_OS_NTFS}) {
pop @args; pop @args; # ntfs quirk implies NTFS osflag
push @try, [@args, '--quirk', 'ntfs'];
}
diff --git a/pristine-tar b/pristine-tar
index a741b92..e1621ab 100755
--- a/pristine-tar
+++ b/pristine-tar
@@ -167,6 +167,7 @@ use warnings;
use strict;
use Pristine::Tar;
use Pristine::Tar::Delta;
+use Pristine::Tar::Formats;
use File::Path;
use File::Basename;
use Cwd qw{getcwd abs_path};
@@ -386,15 +387,9 @@ sub gendelta {
my $tempdir=tempdir();
my %delta;
- # Check to see if it's compressed.
+ # Check to see if it's compressed, and get uncompressed tarball.
my $compression=undef;
- open (IN, "<", $tarball) || error "Cannot read $tarball: $!\n";
- my ($chars, $id1, $id2, $method);
- if (read(IN, $chars, 10) == 10 &&
- (($id1, $id2, $method) = unpack("CCC", $chars)) &&
- $id1 == Pristine::Tar::GZIP_ID1 &&
- $id2 == Pristine::Tar::GZIP_ID2 &&
- $method == Pristine::Tar::GZIP_METHOD_DEFLATE) {
+ if (is_gz($tarball)) {
$compression='gz';
open(IN, "-|", "zcat", $tarball) || die "zcat: $!";
open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
@@ -402,20 +397,13 @@ sub gendelta {
close IN || die "zcat: $!";
close OUT || die "$tempdir/origtarball: $!";
}
- else {
- seek(IN, 0, 0) || die "seek: $!";
- if (read(IN, $chars, 3) == 3 &&
- (($id1, $id2, $method) = unpack("CCC", $chars)) &&
- $id1 == Pristine::Tar::BZIP2_ID1 &&
- $id2 == Pristine::Tar::BZIP2_ID2 &&
- $method == Pristine::Tar::BZIP2_METHOD_HUFFMAN) {
- $compression='bz2';
- open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!";
- open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
- print OUT $_ while <IN>;
- close IN || die "bzcat: $!";
- close OUT || die "$tempdir/origtarball: $!";
- }
+ elsif (is_bz2($tarball)) {
+ $compression='bz2';
+ open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!";
+ open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
+ print OUT $_ while <IN>;
+ close IN || die "bzcat: $!";
+ close OUT || die "$tempdir/origtarball: $!";
}
close IN;