#!/usr/bin/perl # RPM (and it's source code) is covered under two separate licenses. # The entire code base may be distributed under the terms of the GNU # General Public License (GPL), which appears immediately below. # Alternatively, all of the source code in the lib subdirectory of the # RPM source code distribution as well as any code derived from that # code may instead be distributed under the GNU Library General Public # License (LGPL), at the choice of the distributor. The complete text # of the LGPL appears at the bottom of this file. # This alternatively is allowed to enable applications to be linked # against the RPM library (commonly called librpm) without forcing # such applications to be distributed under the GPL. # Any questions regarding the licensing of RPM should be addressed to # Erik Troan . # rpmdiff - a program for comparing two rpm files for differences. # Written by Ken Estes, Mail.com. use Getopt::Long; # much of this code comes from reading the book # "Maximum RPM" by Edward C. Bailey sub usage { my $args = "[--".join("] [--", @ARGS)."]"; my @default_args = map $RPMTAG->{$_}->{'arg'}, @DEFAULT_CMP; my $default_args = "--".join(" --", @default_args).""; my $usage =< { 'tag_name' => 'NAME', }, 1001 => { 'tag_name' => 'VERSION', }, 1002 => { 'tag_name' => 'RELEASE', }, 1006 => { 'tag_name' => 'BUILDTIME', }, 1027 => { 'tag_name' => 'OLDFILENAMES', }, 1028 => { 'tag_name' => 'FILESIZES', 'diff_order' => 0, 'diff_char' => 'S', 'arg' => 'size', 'script_cmp' => sub { return (length($_[0]) ne length($_[1])); }, }, 1029 => { 'tag_name' => 'FILESTATES', }, 1030 => { 'tag_name' => 'FILEMODES', 'diff_order' => 1, 'diff_char' => 'M', 'arg' => 'mode', }, 1033 => { 'tag_name' => 'FILERDEVS', 'diff_order' => 3, 'diff_char' => 'D', 'arg' => 'dev', }, 1034 => { 'tag_name' => 'FILEMTIMES', 'diff_order' => 7, 'diff_char' => 'T', 'arg' => 'mtime', }, 1035 => { 'tag_name' => 'FILEMD5S', 'diff_order' => 2, 'diff_char' => '5', 'arg' => 'md5', 'script_cmp' => sub{ return ($_[0] ne $_[1]); }, }, 1036 => { 'tag_name' => 'FILELINKTOS', 'diff_order' => 4, 'diff_char' => 'L', 'arg' => 'link', }, 1037 => { 'tag_name' => 'FILEFLAGS', }, 1038 => { 'tag_name' => 'ROOT', }, 1039 => { 'tag_name' => 'FILEUSERNAME', 'diff_order' => 5, 'diff_char' => 'U', 'arg' => 'user', }, 1040 => { 'tag_name' => 'FILEGROUPNAME', 'diff_order' => 6, 'diff_char' => 'G', 'arg' => 'group', }, 1098 => { 'tag_name' => 'PREFIXES', }, 1099 => { 'tag_name' => 'INSTPREFIXES', }, # support for differences of scripts 1023 => { 'tag_name' => 'PREIN', 'is_script' => 1, }, 1024 => { 'tag_name' => 'POSTIN', 'is_script' => 1, }, 1025 => { 'tag_name' => 'PREUN', 'is_script' => 1, }, 1026 => { 'tag_name' => 'POSTUN', 'is_script' => 1, }, 1079 => { 'tag_name' => 'VERIFYSCRIPT', 'is_script' => 1, }, 1065 => { 'tag_name' => 'TRIGGERSCRIPTS', 'is_script' => 1, }, 1091 => { 'tag_name' => 'VERIFYSCRIPTPROG', 'is_script' => 1, }, 1092 => { 'tag_name' => 'TRIGGERSCRIPTPROG', 'is_script' => 1, }, }; # by default check these options, which are the "contents" of the # files. @DEFAULT_CMP = ( 1028, 1035, 1036, ); $RPM_FILE_MAGIC = chr(0xed).chr(0xab).chr(0xee).chr(0xdb); $RPM_HEADER_MAGIC = chr(0x8e).chr(0xad).chr(0xe8); # we want the second header block, as the first header is the # signature block. $HEADER_BLOCK_NUM = 2; # number of bytes in the file to skip when looking for the first # header. Actually I think the lead is bigger then this like 96, but # I am sure this minimum value is correct. $LEAD_LENGTH = 66; $HEADER_RECORD_SIZE = 16; # largest exit code we allow. $MAX_EXIT = 250; $NUM_DIFFERENCES = 0; $RCS_REVISION = ' $Revision: 1.5 $ '; # set a known path. $ENV{'PATH'}= ( '/opt/gnu/bin'. ':/usr/local/bin'. ':/usr/bin'. ':/bin'. ''); # taint perl requires we clean up these bad environmental variables. delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; $VERSION = 'NONE'; if ( $RCS_REVISION =~ m/([.0-9]+)/ ) { $VERSION = $1; } return ; } sub parse_args{ my $arg_include = ''; my $arg_exclude = ''; my %arg_tags= (); my @args_with_bang = (); my %arg2tag = (); # find out what arguments are availible and build some # data structures to work with them. foreach $tag (keys %$RPMTAG ) { my $arg = $RPMTAG->{$tag}->{'arg'}; ($arg) || next; push @ARGS, $arg; push @ALL_CMP_TAGS, $tag; push @args_with_bang, "$arg!"; $arg2tag{$arg}=$tag; } # sort the tags to determine the proper comparison order. # use the order stored in the RPMTAG table. # If this code is too confusing, look up # 'Schwartzian Transform' in perlfaq4 or an advanced perl book. @ALL_CMP_TAGS = map { $_->[0] } sort{ $a->[1] <=> $b->[1] } map { [ $_, $RPMTAG->{$_}->{'diff_order'} ] } @ALL_CMP_TAGS; $FILES_EQ_STRING = '.' x scalar(@ALL_CMP_TAGS); if( !GetOptions("version", "help", "all", "cmpmode!", @args_with_bang) ) { print("Illegal options in \@ARGV: '@ARGV'\n"); usage() ; exit 1 ; } if($opt_version) { print "$0: Version: $VERSION\n"; exit 0; } if ($opt_help) { usage(); } if ($opt_all) { # all is just an exclude with nothing to exclude $arg_exclude = 1; } # process each of the arguments derived from the $RPMTAG hash foreach $arg (@ARGS) { my $arg_var = "opt_$arg"; if (defined($$arg_var)) { $arg_tags{$arg2tag{$arg}} = 1; if ($$arg_var) { $arg_include = 1; } else { $arg_exclude = 1; } } } ($arg_include) && ($arg_exclude) && die("$0: Can not mix both include and exclude arguements ". "on the command line.\n"); if ($arg_include) { # check only the options listed foreach $tag (keys %arg_tags) { $CMP_TAGS{$tag} = 1; } } elsif ($arg_exclude) { # check everything but the options listed foreach $tag (@ALL_CMP_TAGS) { $CMP_TAGS{$tag} = 1; } foreach $tag (keys %arg_tags) { delete $CMP_TAGS{$tag}; } } else { # check the default options foreach $tag (@DEFAULT_CMP) { $CMP_TAGS{$tag} = 1; } } ($#ARGV == 1) || die("$0: Argument list must include two file names\n"); $RPMFILE0 = $ARGV[0]; $RPMFILE1 = $ARGV[1]; ( !(-f $RPMFILE0) || !(-r $RPMFILE0) ) && die("$0: '$RPMFILE0' is not a readable file\n"); ( !(-f $RPMFILE1) || !(-r $RPMFILE1) ) && die("$0: '$RPMFILE1' is not a readable file\n"); $CMP_MODE = ($opt_cmpmode == 1); return ; } # read the rpmfile and extract the header information. sub parse_rpm_headers { my ($filename) = @_; my $file = ''; my $out = {}; # read whole file into memory { open (RPMFILE, "<$filename")|| die("$0: Could not open: $filename for reading. $!\n"); # not needed on unix but lets be very clear binmode (RPMFILE); # slurp whole file my $old_irs = $/; undef $/; $file = ; $/ = $old_irs; close(RPMFILE)|| die("$0: Could not close: $filename. $!\n"); $file =~ m/^$RPM_FILE_MAGIC/ || die("$0: file: $filename is not an RPM file. ". "No magic number found.\n"); } # we want the second header block, as the first header is the # signature block. my ($header_start, $store_start) = ($LEAD_LENGTH,0); my ($_version, $_reserved, $num_header_entries, $num_store_bytes) = (); foreach $i (1 .. $HEADER_BLOCK_NUM) { # find beginning of header, $header_start = index($file, $RPM_HEADER_MAGIC, $header_start); ($header_start < 0) && die("$0: file: $filename is not an RPM file. ". "No: $i, header found.\n"); $header_start += length($RPM_HEADER_MAGIC); ($_version, $_reserved, $num_header_entries, $num_store_bytes) = unpack("CNNN", substr($file, $header_start, 1+(4*3))); $header_start += 1+(4*3); # find beginning of store $store_start = $header_start + ($num_header_entries * $HEADER_RECORD_SIZE); ( ($store_start + $num_store_bytes) < length($file) ) || die("$0: File Parse Error, file: $filename, ". "is not long enough to hold store.\n"); } # the header is just a list of information about data. # the data is stored in the store futher down the file. my $header_position = $header_start; foreach $i (0 .. $num_header_entries-1) { my ($tag, $data_type, $offset, $data_count) = unpack("N4", substr($file, $header_position, $HEADER_RECORD_SIZE)); $header_position += $HEADER_RECORD_SIZE; ( ( ($tag < 100) || ($tag > 1200) ) || ( ($data_type < 0) || ($data_type > 10) ) || ($offset < 0) ) && die("$0: Error parsing header in rpm file: $filename, ". "record number: $i.\n"); # we are only interested in the tags which are defined $RPMTAG->{$tag} || next; foreach $j (0 .. $data_count-1) { my $value =''; if (0) { # dummy for aliging the code like a case statement } elsif ($data_type == 0) { # null $value = ''; } elsif ($data_type == 1) { # char $value = substr($file, $store_start+$offset, 1); $offset += 1; } elsif ($data_type == 2) { # int8 $value = ord(substr($file, $store_start+$offset, 1)); $offset += 1; } elsif ($data_type == 3) { # int16 $value = unpack("n", substr($file, $store_start+$offset, 2)); $offset += 2; } elsif ($data_type == 4) { # int32 $value = unpack("N", substr($file, $store_start+$offset, 4)); $offset += 4; } elsif ($data_type == 5) { # int64 # ---- These aren't supported by RPM (yet) */ die("$0: int64 type found in rpm file: $filename, ". "record number: $i.\n"); } elsif ($data_type == 6) { # string my $null_position = index ($file, "\0", $store_start+$offset); my $length = $null_position - ($store_start+$offset); $value = substr($file, $store_start+$offset, $length); $offset += $length; } elsif ($data_type == 7) { # bin # to properly support this I need to move it outside the $j # loop. However I do not need it. die("$0: Bin type found in rpm file: $filename, ". "record number: $i.\n"); } elsif ($data_type == 8) { # string_array my $null_position = index ($file, "\0", $store_start+$offset); my $length = $null_position - ($store_start+$offset); $value = substr($file, $store_start+$offset, $length); $offset += $length+1 } elsif ($data_type == 9) { # this is listed as both RPM_I18NSTRING_TYPE and RPM_MAX_TYPE # in file ~rpm/lib/header.h but I ignore it die("$0: I18NSTRING type found in rpm file: $filename, ". "record number: $i.\n"); } push @{$out->{$tag}}, $value; if ($RPMTAG->{$tag}->{"tag_name"} eq 'OLDFILENAMES') { $out->{'name2index'}->{$value} = $j; } } # foreach $j } # foreach $i return $out; } # traverse the datastructures to create a text representation of the # critical differences between rpmscripts. If we are running in # cmpmode and a difference is found exit early. sub format_script_differences { my ($rpm0, $rpm1) = @_; my $out = '';; my %seen = (); foreach $script ( sort (keys %$RPMTAG) ) { ($RPMTAG->{$script}->{'is_script'}) || next; ($rpm0->{$script} || $rpm1->{$script}) || next; my $prefix=''; if ( ($rpm0->{$script}) && (!($rpm1->{$script})) ) { $prefix = 'missing '; } elsif ( (!($rpm0->{$script})) && ($rpm1->{$script}) ) { $prefix = 'added '; } else { my $diff_str = ''; foreach $cmp_tag (@ALL_CMP_TAGS) { if ( !($CMP_TAGS{$cmp_tag}) || !($RPMTAG->{$cmp_tag}->{'script_cmp'}) ){ $diff_str .= '.'; next; } # In the rare case where an tag is defined in one RPM and not # in another we charitably assume that the RPMs match on this # tag. There is a warning in the stderr anyway. if ( ($rpm0->{$cmp_tag}) && ($rpm1->{$cmp_tag}) && # use the anonymous comparison function (stored in the # table) to compare the two scripts (&{$RPMTAG->{$cmp_tag}->{'script_cmp'}} ($rpm0->{$script}->[0], $rpm1->{$script}->[0])) ) { $diff_str .= $RPMTAG->{$cmp_tag}->{'diff_char'}; } else { $diff_str .= '.'; } } # foreach $tag if ($diff_str ne $FILES_EQ_STRING) { $prefix = $diff_str; } } ($prefix) || next; if ($CMP_MODE) { exit 1; } ($NUM_DIFFERENCES < $MAX_EXIT) && $NUM_DIFFERENCES++; $out .= "$prefix $RPMTAG->{$script}->{'tag_name'}\n"; } # foreach $filename return $out; } # traverse the datastructures to create a text representation of the # critical differences between file stored in the pacakge. If we are # running in cmpmode and a difference is found exit early. sub format_file_differences { my ($rpm0, $rpm1) = @_; my $out = '';; my %seen = (); foreach $filename ( sort ( (keys %{$rpm0->{'name2index'}}), (keys %{$rpm1->{'name2index'}}) ) ) { $seen{$filename} && next; $seen{$filename} = 1; $index0 = $rpm0->{'name2index'}->{$filename}; $index1 = $rpm1->{'name2index'}->{$filename}; my $prefix=''; if ( ($index0) && (!($index1)) ) { $prefix = 'missing '; } elsif ( (!($index0)) && ($index1) ) { $prefix = 'added '; } else { my $diff_str = ''; foreach $cmp_tag (@ALL_CMP_TAGS) { if (!($CMP_TAGS{$cmp_tag})){ $diff_str .= '.'; next; } # In the rare case where an tag is defined in one RPM and not # in another we charitably assume that the RPMs match on this # tag. There is a warning in the stderr anyway. if ( ($rpm0->{$cmp_tag}->[$index0]) && ($rpm1->{$cmp_tag}->[$index1]) && ($rpm0->{$cmp_tag}->[$index0] ne $rpm1->{$cmp_tag}->[$index1]) ) { $diff_str .= $RPMTAG->{$cmp_tag}->{'diff_char'}; } else { $diff_str .= '.'; } } # foreach $tag if ($diff_str ne $FILES_EQ_STRING) { $prefix = $diff_str; } } ($prefix) || next; if ($CMP_MODE) { die 1; } ($NUM_DIFFERENCES < $MAX_EXIT) && $NUM_DIFFERENCES++; # this set of blanks would contain information from the flags, if # only I was not so lazy $out .= "$prefix $filename\n"; } # foreach $filename return $out; } # warn user of a cmp that was requested can not be carried out due to # lack of data in the header of atleast one file. sub data_missing_warnings { my ($rpm0, $rpm1) = @_; my $out = '';; foreach $cmp_tag (@ALL_CMP_TAGS) { if (!($CMP_TAGS{$cmp_tag})) { next; } if ( ($CMP_TAGS{$cmp_tag}) && (!$rpm0->{$cmp_tag}) ){ $out .= ("Comparison: '$RPMTAG->{$cmp_tag}->{'arg'}' ". "specified, but data is not availible in ". "rpm: $RPMFILE0.\n"); } if ( ($CMP_TAGS{$cmp_tag}) && (!$rpm1->{$cmp_tag}) ){ $out .= ("Comparison: '$RPMTAG->{$cmp_tag}->{'arg'}' ". "specified, but data is not availible in ". "rpm: $RPMFILE1.\n"); } } return $out; } # -------------- main -------------- { set_static_vars(); parse_args(); $RPM0 = parse_rpm_headers($RPMFILE0); $RPM1 = parse_rpm_headers($RPMFILE1); my $warnings = data_missing_warnings($RPM0, $RPM1); # we must print warnings before running diff as we may exit early. ($warnings) && warn($warnings); my $header = "oldpkg $RPMFILE0\n"."newpkg $RPMFILE1\n"."\n\n"; my $script_diffs = format_script_differences($RPM0, $RPM1); my $file_diffs = format_file_differences($RPM0, $RPM1); ($script_diffs || $file_diffs) && print $header, $script_diffs, $file_diffs; exit $NUM_DIFFERENCES; }