summaryrefslogtreecommitdiff
path: root/src/cmp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmp.c')
-rw-r--r--src/cmp.c691
1 files changed, 691 insertions, 0 deletions
diff --git a/src/cmp.c b/src/cmp.c
new file mode 100644
index 0000000..adf1cf9
--- /dev/null
+++ b/src/cmp.c
@@ -0,0 +1,691 @@
+/* cmp - compare two files byte by byte
+
+ Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2011 Free
+ Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "system.h"
+#include "paths.h"
+
+#include <stdio.h>
+
+#include <c-stack.h>
+#include <cmpbuf.h>
+#include <error.h>
+#include <exitfail.h>
+#include <file-type.h>
+#include <getopt.h>
+#include <hard-locale.h>
+#include <inttostr.h>
+#include <progname.h>
+#include <unlocked-io.h>
+#include <version-etc.h>
+#include <xalloc.h>
+#include <xfreopen.h>
+#include <xstrtol.h>
+
+/* The official name of this program (e.g., no `g' prefix). */
+#define PROGRAM_NAME "cmp"
+
+#define AUTHORS \
+ proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
+ proper_name ("David MacKenzie")
+
+#if defined LC_MESSAGES && ENABLE_NLS
+# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
+#else
+# define hard_locale_LC_MESSAGES 0
+#endif
+
+static int cmp (void);
+static off_t file_position (int);
+static size_t block_compare (word const *, word const *);
+static size_t block_compare_and_count (word const *, word const *, off_t *);
+static void sprintc (char *, unsigned char);
+
+/* Filenames of the compared files. */
+static char const *file[2];
+
+/* File descriptors of the files. */
+static int file_desc[2];
+
+/* Status of the files. */
+static struct stat stat_buf[2];
+
+/* Read buffers for the files. */
+static word *buffer[2];
+
+/* Optimal block size for the files. */
+static size_t buf_size;
+
+/* Initial prefix to ignore for each file. */
+static off_t ignore_initial[2];
+
+/* Number of bytes to compare. */
+static uintmax_t bytes = UINTMAX_MAX;
+
+/* Output format. */
+static enum comparison_type
+ {
+ type_first_diff, /* Print the first difference. */
+ type_all_diffs, /* Print all differences. */
+ type_no_stdout, /* Do not output to stdout; only stderr. */
+ type_status /* Exit status only. */
+ } comparison_type;
+
+/* If nonzero, print values of bytes quoted like cat -t does. */
+static bool opt_print_bytes;
+
+/* Values for long options that do not have single-letter equivalents. */
+enum
+{
+ HELP_OPTION = CHAR_MAX + 1
+};
+
+static struct option const long_options[] =
+{
+ {"print-bytes", 0, 0, 'b'},
+ {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
+ {"ignore-initial", 1, 0, 'i'},
+ {"verbose", 0, 0, 'l'},
+ {"bytes", 1, 0, 'n'},
+ {"silent", 0, 0, 's'},
+ {"quiet", 0, 0, 's'},
+ {"version", 0, 0, 'v'},
+ {"help", 0, 0, HELP_OPTION},
+ {0, 0, 0, 0}
+};
+
+static void try_help (char const *, char const *) __attribute__((noreturn));
+static void
+try_help (char const *reason_msgid, char const *operand)
+{
+ if (reason_msgid)
+ error (0, 0, _(reason_msgid), operand);
+ error (EXIT_TROUBLE, 0,
+ _("Try `%s --help' for more information."), program_name);
+ abort ();
+}
+
+static char const valid_suffixes[] = "kKMGTPEZY0";
+
+/* Update ignore_initial[F] according to the result of parsing an
+ *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
+ *after the operand. If DELIMITER is nonzero, the operand may be
+ *followed by DELIMITER; otherwise it must be null-terminated. */
+static void
+specify_ignore_initial (int f, char **argptr, char delimiter)
+{
+ uintmax_t val;
+ char const *arg = *argptr;
+ strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
+ if (! (e == LONGINT_OK
+ || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
+ || TYPE_MAXIMUM (off_t) < val)
+ try_help ("invalid --ignore-initial value `%s'", arg);
+ if (ignore_initial[f] < val)
+ ignore_initial[f] = val;
+}
+
+/* Specify the output format. */
+static void
+specify_comparison_type (enum comparison_type t)
+{
+ if (comparison_type && comparison_type != t)
+ try_help ("options -l and -s are incompatible", 0);
+ comparison_type = t;
+}
+
+static void
+check_stdout (void)
+{
+ if (ferror (stdout))
+ error (EXIT_TROUBLE, 0, "%s", _("write failed"));
+ else if (fclose (stdout) != 0)
+ error (EXIT_TROUBLE, errno, "%s", _("standard output"));
+}
+
+static char const * const option_help_msgid[] = {
+ N_("-b, --print-bytes print differing bytes"),
+ N_("-i, --ignore-initial=SKIP skip first SKIP bytes of both inputs"),
+ N_("-i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and\n"
+ " first SKIP2 bytes of FILE2"),
+ N_("-l, --verbose output byte numbers and differing byte values"),
+ N_("-n, --bytes=LIMIT compare at most LIMIT bytes"),
+ N_("-s, --quiet, --silent suppress all normal output"),
+ N_(" --help display this help and exit"),
+ N_("-v, --version output version information and exit"),
+ 0
+};
+
+static void
+usage (void)
+{
+ char const * const *p;
+
+ printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
+ program_name);
+ printf ("%s\n", _("Compare two files byte by byte."));
+ printf ("\n%s\n\n",
+_("The optional SKIP1 and SKIP2 specify the number of bytes to skip\n"
+ "at the beginning of each file (zero by default)."));
+
+ fputs (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"), stdout);
+ for (p = option_help_msgid; *p; p++)
+ printf (" %s\n", _(*p));
+ printf ("\n%s\n\n%s\n%s\n",
+ _("SKIP values may be followed by the following multiplicative suffixes:\n\
+kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
+GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
+ _("If a FILE is `-' or missing, read standard input."),
+ _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."));
+ emit_bug_reporting_address ();
+}
+
+int
+main (int argc, char **argv)
+{
+ int c, f, exit_status;
+ size_t words_per_buffer;
+
+ exit_failure = EXIT_TROUBLE;
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+ c_stack_action (0);
+
+ /* Parse command line options. */
+
+ while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
+ != -1)
+ switch (c)
+ {
+ case 'b':
+ case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
+ opt_print_bytes = true;
+ break;
+
+ case 'i':
+ specify_ignore_initial (0, &optarg, ':');
+ if (*optarg++ == ':')
+ specify_ignore_initial (1, &optarg, 0);
+ else if (ignore_initial[1] < ignore_initial[0])
+ ignore_initial[1] = ignore_initial[0];
+ break;
+
+ case 'l':
+ specify_comparison_type (type_all_diffs);
+ break;
+
+ case 'n':
+ {
+ uintmax_t n;
+ if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
+ try_help ("invalid --bytes value `%s'", optarg);
+ if (n < bytes)
+ bytes = n;
+ }
+ break;
+
+ case 's':
+ specify_comparison_type (type_status);
+ break;
+
+ case 'v':
+ version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
+ AUTHORS, (char *) NULL);
+ check_stdout ();
+ return EXIT_SUCCESS;
+
+ case HELP_OPTION:
+ usage ();
+ check_stdout ();
+ return EXIT_SUCCESS;
+
+ default:
+ try_help (0, 0);
+ }
+
+ if (optind == argc)
+ try_help ("missing operand after `%s'", argv[argc - 1]);
+
+ file[0] = argv[optind++];
+ file[1] = optind < argc ? argv[optind++] : "-";
+
+ for (f = 0; f < 2 && optind < argc; f++)
+ {
+ char *arg = argv[optind++];
+ specify_ignore_initial (f, &arg, 0);
+ }
+
+ if (optind < argc)
+ try_help ("extra operand `%s'", argv[optind]);
+
+ for (f = 0; f < 2; f++)
+ {
+ /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
+ stdin is closed and opening file[0] yields file descriptor 0. */
+ int f1 = f ^ (STREQ (file[1], "-"));
+
+ /* Two files with the same name and offset are identical.
+ But wait until we open the file once, for proper diagnostics. */
+ if (f && ignore_initial[0] == ignore_initial[1]
+ && file_name_cmp (file[0], file[1]) == 0)
+ return EXIT_SUCCESS;
+
+ if (STREQ (file[f1], "-"))
+ {
+ file_desc[f1] = STDIN_FILENO;
+ if (O_BINARY && ! isatty (STDIN_FILENO))
+ xfreopen (NULL, "rb", stdin);
+ }
+ else
+ file_desc[f1] = open (file[f1], O_RDONLY | O_BINARY, 0);
+
+ if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
+ {
+ if (file_desc[f1] < 0 && comparison_type == type_status)
+ exit (EXIT_TROUBLE);
+ else
+ error (EXIT_TROUBLE, errno, "%s", file[f1]);
+ }
+ }
+
+ /* If the files are links to the same inode and have the same file position,
+ they are identical. */
+
+ if (0 < same_file (&stat_buf[0], &stat_buf[1])
+ && same_file_attributes (&stat_buf[0], &stat_buf[1])
+ && file_position (0) == file_position (1))
+ return EXIT_SUCCESS;
+
+ /* If output is redirected to the null device, we can avoid some of
+ the work. */
+
+ if (comparison_type != type_status)
+ {
+ struct stat outstat, nullstat;
+
+ if (fstat (STDOUT_FILENO, &outstat) == 0
+ && stat (NULL_DEVICE, &nullstat) == 0
+ && 0 < same_file (&outstat, &nullstat))
+ comparison_type = type_no_stdout;
+ }
+
+ /* If only a return code is needed,
+ and if both input descriptors are associated with plain files,
+ conclude that the files differ if they have different sizes
+ and if more bytes will be compared than are in the smaller file. */
+
+ if (comparison_type == type_status
+ && S_ISREG (stat_buf[0].st_mode)
+ && S_ISREG (stat_buf[1].st_mode))
+ {
+ off_t s0 = stat_buf[0].st_size - file_position (0);
+ off_t s1 = stat_buf[1].st_size - file_position (1);
+ if (s0 < 0)
+ s0 = 0;
+ if (s1 < 0)
+ s1 = 0;
+ if (s0 != s1 && MIN (s0, s1) < bytes)
+ exit (EXIT_FAILURE);
+ }
+
+ /* Get the optimal block size of the files. */
+
+ buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
+ STAT_BLOCKSIZE (stat_buf[1]),
+ PTRDIFF_MAX - sizeof (word));
+
+ /* Allocate word-aligned buffers, with space for sentinels at the end. */
+
+ words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
+ buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
+ buffer[1] = buffer[0] + words_per_buffer;
+
+ exit_status = cmp ();
+
+ for (f = 0; f < 2; f++)
+ if (close (file_desc[f]) != 0)
+ error (EXIT_TROUBLE, errno, "%s", file[f]);
+ if (exit_status != EXIT_SUCCESS && comparison_type < type_no_stdout)
+ check_stdout ();
+ exit (exit_status);
+ return exit_status;
+}
+
+/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
+ using `buffer[0]' and `buffer[1]'.
+ Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
+ >1 if error. */
+
+static int
+cmp (void)
+{
+ off_t line_number = 1; /* Line number (1...) of difference. */
+ off_t byte_number = 1; /* Byte number (1...) of difference. */
+ uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
+ size_t read0, read1; /* Number of bytes read from each file. */
+ size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
+ size_t smaller; /* The lesser of `read0' and `read1'. */
+ word *buffer0 = buffer[0];
+ word *buffer1 = buffer[1];
+ char *buf0 = (char *) buffer0;
+ char *buf1 = (char *) buffer1;
+ int differing = 0;
+ int f;
+ int offset_width IF_LINT (= 0);
+
+ if (comparison_type == type_all_diffs)
+ {
+ off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
+
+ for (f = 0; f < 2; f++)
+ if (S_ISREG (stat_buf[f].st_mode))
+ {
+ off_t file_bytes = stat_buf[f].st_size - file_position (f);
+ if (file_bytes < byte_number_max)
+ byte_number_max = file_bytes;
+ }
+
+ for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
+ continue;
+ }
+
+ for (f = 0; f < 2; f++)
+ {
+ off_t ig = ignore_initial[f];
+ if (ig && file_position (f) == -1)
+ {
+ /* lseek failed; read and discard the ignored initial prefix. */
+ do
+ {
+ size_t bytes_to_read = MIN (ig, buf_size);
+ size_t r = block_read (file_desc[f], buf0, bytes_to_read);
+ if (r != bytes_to_read)
+ {
+ if (r == SIZE_MAX)
+ error (EXIT_TROUBLE, errno, "%s", file[f]);
+ break;
+ }
+ ig -= r;
+ }
+ while (ig);
+ }
+ }
+
+ do
+ {
+ size_t bytes_to_read = buf_size;
+
+ if (remaining != UINTMAX_MAX)
+ {
+ if (remaining < bytes_to_read)
+ bytes_to_read = remaining;
+ remaining -= bytes_to_read;
+ }
+
+ read0 = block_read (file_desc[0], buf0, bytes_to_read);
+ if (read0 == SIZE_MAX)
+ error (EXIT_TROUBLE, errno, "%s", file[0]);
+ read1 = block_read (file_desc[1], buf1, bytes_to_read);
+ if (read1 == SIZE_MAX)
+ error (EXIT_TROUBLE, errno, "%s", file[1]);
+
+ /* Insert sentinels for the block compare. */
+
+ buf0[read0] = ~buf1[read0];
+ buf1[read1] = ~buf0[read1];
+
+ /* If the line number should be written for differing files,
+ compare the blocks and count the number of newlines
+ simultaneously. */
+ first_diff = (comparison_type == type_first_diff
+ ? block_compare_and_count (buffer0, buffer1, &line_number)
+ : block_compare (buffer0, buffer1));
+
+ byte_number += first_diff;
+ smaller = MIN (read0, read1);
+
+ if (first_diff < smaller)
+ {
+ switch (comparison_type)
+ {
+ case type_first_diff:
+ {
+ char byte_buf[INT_BUFSIZE_BOUND (off_t)];
+ char line_buf[INT_BUFSIZE_BOUND (off_t)];
+ char const *byte_num = offtostr (byte_number, byte_buf);
+ char const *line_num = offtostr (line_number, line_buf);
+ if (!opt_print_bytes)
+ {
+ /* See POSIX 1003.1-2001 for this format. This
+ message is used only in the POSIX locale, so it
+ need not be translated. */
+ static char const char_message[] =
+ "%s %s differ: char %s, line %s\n";
+
+ /* The POSIX rationale recommends using the word
+ "byte" outside the POSIX locale. Some gettext
+ implementations translate even in the POSIX
+ locale if certain other environment variables
+ are set, so use "byte" if a translation is
+ available, or if outside the POSIX locale. */
+ static char const byte_msgid[] =
+ N_("%s %s differ: byte %s, line %s\n");
+ char const *byte_message = _(byte_msgid);
+ bool use_byte_message = (byte_message != byte_msgid
+ || hard_locale_LC_MESSAGES);
+
+ printf (use_byte_message ? byte_message : char_message,
+ file[0], file[1], byte_num, line_num);
+ }
+ else
+ {
+ unsigned char c0 = buf0[first_diff];
+ unsigned char c1 = buf1[first_diff];
+ char s0[5];
+ char s1[5];
+ sprintc (s0, c0);
+ sprintc (s1, c1);
+ printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
+ file[0], file[1], byte_num, line_num,
+ c0, s0, c1, s1);
+ }
+ }
+ /* Fall through. */
+ case type_status:
+ return EXIT_FAILURE;
+
+ case type_all_diffs:
+ do
+ {
+ unsigned char c0 = buf0[first_diff];
+ unsigned char c1 = buf1[first_diff];
+ if (c0 != c1)
+ {
+ char byte_buf[INT_BUFSIZE_BOUND (off_t)];
+ char const *byte_num = offtostr (byte_number, byte_buf);
+ if (!opt_print_bytes)
+ {
+ /* See POSIX 1003.1-2001 for this format. */
+ printf ("%*s %3o %3o\n",
+ offset_width, byte_num, c0, c1);
+ }
+ else
+ {
+ char s0[5];
+ char s1[5];
+ sprintc (s0, c0);
+ sprintc (s1, c1);
+ printf ("%*s %3o %-4s %3o %s\n",
+ offset_width, byte_num, c0, s0, c1, s1);
+ }
+ }
+ byte_number++;
+ first_diff++;
+ }
+ while (first_diff < smaller);
+ differing = -1;
+ break;
+
+ case type_no_stdout:
+ differing = 1;
+ break;
+ }
+ }
+
+ if (read0 != read1)
+ {
+ if (differing <= 0 && comparison_type != type_status)
+ {
+ /* See POSIX 1003.1-2001 for this format. */
+ fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
+ }
+
+ return EXIT_FAILURE;
+ }
+ }
+ while (differing <= 0 && read0 == buf_size);
+
+ return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+/* Compare two blocks of memory P0 and P1 until they differ,
+ and count the number of '\n' occurrences in the common
+ part of P0 and P1.
+ If the blocks are not guaranteed to be different, put sentinels at the ends
+ of the blocks before calling this function.
+
+ Return the offset of the first byte that differs.
+ Increment *COUNT by the count of '\n' occurrences. */
+
+static size_t
+block_compare_and_count (word const *p0, word const *p1, off_t *count)
+{
+ word l; /* One word from first buffer. */
+ word const *l0, *l1; /* Pointers into each buffer. */
+ char const *c0, *c1; /* Pointers for finding exact address. */
+ size_t cnt = 0; /* Number of '\n' occurrences. */
+ word nnnn; /* Newline, sizeof (word) times. */
+ int i;
+
+ nnnn = 0;
+ for (i = 0; i < sizeof nnnn; i++)
+ nnnn = (nnnn << CHAR_BIT) | '\n';
+
+ /* Find the rough position of the first difference by reading words,
+ not bytes. */
+
+ for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
+ {
+ l ^= nnnn;
+ for (i = 0; i < sizeof l; i++)
+ {
+ unsigned char uc = l;
+ cnt += ! uc;
+ l >>= CHAR_BIT;
+ }
+ }
+
+ /* Find the exact differing position (endianness independent). */
+
+ for (c0 = (char const *) l0, c1 = (char const *) l1;
+ *c0 == *c1;
+ c0++, c1++)
+ cnt += *c0 == '\n';
+
+ *count += cnt;
+ return c0 - (char const *) p0;
+}
+
+/* Compare two blocks of memory P0 and P1 until they differ.
+ If the blocks are not guaranteed to be different, put sentinels at the ends
+ of the blocks before calling this function.
+
+ Return the offset of the first byte that differs. */
+
+static size_t
+block_compare (word const *p0, word const *p1)
+{
+ word const *l0, *l1;
+ char const *c0, *c1;
+
+ /* Find the rough position of the first difference by reading words,
+ not bytes. */
+
+ for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
+ continue;
+
+ /* Find the exact differing position (endianness independent). */
+
+ for (c0 = (char const *) l0, c1 = (char const *) l1;
+ *c0 == *c1;
+ c0++, c1++)
+ continue;
+
+ return c0 - (char const *) p0;
+}
+
+/* Put into BUF the unsigned char C, making unprintable bytes
+ visible by quoting like cat -t does. */
+
+static void
+sprintc (char *buf, unsigned char c)
+{
+ if (! isprint (c))
+ {
+ if (c >= 128)
+ {
+ *buf++ = 'M';
+ *buf++ = '-';
+ c -= 128;
+ }
+ if (c < 32)
+ {
+ *buf++ = '^';
+ c += 64;
+ }
+ else if (c == 127)
+ {
+ *buf++ = '^';
+ c = '?';
+ }
+ }
+
+ *buf++ = c;
+ *buf = 0;
+}
+
+/* Position file F to ignore_initial[F] bytes from its initial position,
+ and yield its new position. Don't try more than once. */
+
+static off_t
+file_position (int f)
+{
+ static bool positioned[2];
+ static off_t position[2];
+
+ if (! positioned[f])
+ {
+ positioned[f] = true;
+ position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
+ }
+ return position[f];
+}