summaryrefslogtreecommitdiff
path: root/src/util.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c788
1 files changed, 788 insertions, 0 deletions
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..3be03e9
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,788 @@
+/* Support routines for GNU DIFF.
+
+ Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2010
+ Free Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "diff.h"
+#include <dirname.h>
+#include <error.h>
+#include <sh-quote.h>
+#include <xalloc.h>
+
+char const pr_program[] = PR_PROGRAM;
+
+/* Queue up one-line messages to be printed at the end,
+ when -l is specified. Each message is recorded with a `struct msg'. */
+
+struct msg
+{
+ struct msg *next;
+ char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
+};
+
+/* Head of the chain of queues messages. */
+
+static struct msg *msg_chain;
+
+/* Tail of the chain of queues messages. */
+
+static struct msg **msg_chain_end = &msg_chain;
+
+/* Use when a system call returns non-zero status.
+ NAME should normally be the file name. */
+
+void
+perror_with_name (char const *name)
+{
+ error (0, errno, "%s", name);
+}
+
+/* Use when a system call returns non-zero status and that is fatal. */
+
+void
+pfatal_with_name (char const *name)
+{
+ int e = errno;
+ print_message_queue ();
+ error (EXIT_TROUBLE, e, "%s", name);
+ abort ();
+}
+
+/* Print an error message containing MSGID, then exit. */
+
+void
+fatal (char const *msgid)
+{
+ print_message_queue ();
+ error (EXIT_TROUBLE, 0, "%s", _(msgid));
+ abort ();
+}
+
+/* Like printf, except if -l in effect then save the message and print later.
+ This is used for things like "Only in ...". */
+
+void
+message (char const *format_msgid, char const *arg1, char const *arg2)
+{
+ message5 (format_msgid, arg1, arg2, 0, 0);
+}
+
+void
+message5 (char const *format_msgid, char const *arg1, char const *arg2,
+ char const *arg3, char const *arg4)
+{
+ if (paginate)
+ {
+ char *p;
+ char const *arg[5];
+ int i;
+ size_t size[5];
+ size_t total_size = offsetof (struct msg, args);
+ struct msg *new;
+
+ arg[0] = format_msgid;
+ arg[1] = arg1;
+ arg[2] = arg2;
+ arg[3] = arg3 ? arg3 : "";
+ arg[4] = arg4 ? arg4 : "";
+
+ for (i = 0; i < 5; i++)
+ total_size += size[i] = strlen (arg[i]) + 1;
+
+ new = xmalloc (total_size);
+
+ for (i = 0, p = new->args; i < 5; p += size[i++])
+ memcpy (p, arg[i], size[i]);
+
+ *msg_chain_end = new;
+ new->next = 0;
+ msg_chain_end = &new->next;
+ }
+ else
+ {
+ if (sdiff_merge_assist)
+ putchar (' ');
+ printf (_(format_msgid), arg1, arg2, arg3, arg4);
+ }
+}
+
+/* Output all the messages that were saved up by calls to `message'. */
+
+void
+print_message_queue (void)
+{
+ char const *arg[5];
+ int i;
+ struct msg *m = msg_chain;
+
+ while (m)
+ {
+ struct msg *next = m->next;
+ arg[0] = m->args;
+ for (i = 0; i < 4; i++)
+ arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
+ printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
+ free (m);
+ m = next;
+ }
+}
+
+/* Call before outputting the results of comparing files NAME0 and NAME1
+ to set up OUTFILE, the stdio stream for the output to go to.
+
+ Usually, OUTFILE is just stdout. But when -l was specified
+ we fork off a `pr' and make OUTFILE a pipe to it.
+ `pr' then outputs to our stdout. */
+
+static char const *current_name0;
+static char const *current_name1;
+static bool currently_recursive;
+
+void
+setup_output (char const *name0, char const *name1, bool recursive)
+{
+ current_name0 = name0;
+ current_name1 = name1;
+ currently_recursive = recursive;
+ outfile = 0;
+}
+
+#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
+static pid_t pr_pid;
+#endif
+
+void
+begin_output (void)
+{
+ char *name;
+
+ if (outfile != 0)
+ return;
+
+ /* Construct the header of this piece of diff. */
+ name = xmalloc (strlen (current_name0) + strlen (current_name1)
+ + strlen (switch_string) + 7);
+
+ /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
+ the standard: it says that we must print only the last component
+ of the pathnames, and it requires two spaces after "diff" if
+ there are no options. These requirements are silly and do not
+ match historical practice. */
+ sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
+
+ if (paginate)
+ {
+ if (fflush (stdout) != 0)
+ pfatal_with_name (_("write failed"));
+
+ /* Make OUTFILE a pipe to a subsidiary `pr'. */
+ {
+#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
+ int pipes[2];
+
+ if (pipe (pipes) != 0)
+ pfatal_with_name ("pipe");
+
+ pr_pid = vfork ();
+ if (pr_pid < 0)
+ pfatal_with_name ("fork");
+
+ if (pr_pid == 0)
+ {
+ close (pipes[1]);
+ if (pipes[0] != STDIN_FILENO)
+ {
+ if (dup2 (pipes[0], STDIN_FILENO) < 0)
+ pfatal_with_name ("dup2");
+ close (pipes[0]);
+ }
+
+ execl (pr_program, pr_program, "-h", name, (char *) 0);
+ _exit (errno == ENOENT ? 127 : 126);
+ }
+ else
+ {
+ close (pipes[0]);
+ outfile = fdopen (pipes[1], "w");
+ if (!outfile)
+ pfatal_with_name ("fdopen");
+ }
+#else
+ char *command = xmalloc (sizeof pr_program - 1 + 7
+ + shell_quote_length (name) + 1);
+ char *p;
+ sprintf (command, "%s -f -h ", pr_program);
+ p = command + sizeof pr_program - 1 + 7;
+ p = shell_quote_copy (p, name);
+ *p = 0;
+ errno = 0;
+ outfile = popen (command, "w");
+ if (!outfile)
+ pfatal_with_name (command);
+ free (command);
+#endif
+ }
+ }
+ else
+ {
+
+ /* If -l was not specified, output the diff straight to `stdout'. */
+
+ outfile = stdout;
+
+ /* If handling multiple files (because scanning a directory),
+ print which files the following output is about. */
+ if (currently_recursive)
+ printf ("%s\n", name);
+ }
+
+ free (name);
+
+ /* A special header is needed at the beginning of context output. */
+ switch (output_style)
+ {
+ case OUTPUT_CONTEXT:
+ print_context_header (files, false);
+ break;
+
+ case OUTPUT_UNIFIED:
+ print_context_header (files, true);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Call after the end of output of diffs for one file.
+ Close OUTFILE and get rid of the `pr' subfork. */
+
+void
+finish_output (void)
+{
+ if (outfile != 0 && outfile != stdout)
+ {
+ int status;
+ int wstatus;
+ int werrno = 0;
+ if (ferror (outfile))
+ fatal ("write failed");
+#if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
+ wstatus = pclose (outfile);
+ if (wstatus == -1)
+ werrno = errno;
+#else
+ if (fclose (outfile) != 0)
+ pfatal_with_name (_("write failed"));
+ if (waitpid (pr_pid, &wstatus, 0) < 0)
+ pfatal_with_name ("waitpid");
+#endif
+ status = (! werrno && WIFEXITED (wstatus)
+ ? WEXITSTATUS (wstatus)
+ : INT_MAX);
+ if (status)
+ error (EXIT_TROUBLE, werrno,
+ _(status == 126
+ ? "subsidiary program `%s' could not be invoked"
+ : status == 127
+ ? "subsidiary program `%s' not found"
+ : status == INT_MAX
+ ? "subsidiary program `%s' failed"
+ : "subsidiary program `%s' failed (exit status %d)"),
+ pr_program, status);
+ }
+
+ outfile = 0;
+}
+
+/* Compare two lines (typically one from each input file)
+ according to the command line options.
+ For efficiency, this is invoked only when the lines do not match exactly
+ but an option like -i might cause us to ignore the difference.
+ Return nonzero if the lines differ. */
+
+bool
+lines_differ (char const *s1, char const *s2)
+{
+ register char const *t1 = s1;
+ register char const *t2 = s2;
+ size_t column = 0;
+
+ while (1)
+ {
+ register unsigned char c1 = *t1++;
+ register unsigned char c2 = *t2++;
+
+ /* Test for exact char equality first, since it's a common case. */
+ if (c1 != c2)
+ {
+ switch (ignore_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ /* For -w, just skip past any white space. */
+ while (isspace (c1) && c1 != '\n') c1 = *t1++;
+ while (isspace (c2) && c2 != '\n') c2 = *t2++;
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ /* For -b, advance past any sequence of white space in
+ line 1 and consider it just one space, or nothing at
+ all if it is at the end of the line. */
+ if (isspace (c1))
+ {
+ while (c1 != '\n')
+ {
+ c1 = *t1++;
+ if (! isspace (c1))
+ {
+ --t1;
+ c1 = ' ';
+ break;
+ }
+ }
+ }
+
+ /* Likewise for line 2. */
+ if (isspace (c2))
+ {
+ while (c2 != '\n')
+ {
+ c2 = *t2++;
+ if (! isspace (c2))
+ {
+ --t2;
+ c2 = ' ';
+ break;
+ }
+ }
+ }
+
+ if (c1 != c2)
+ {
+ /* If we went too far when doing the simple test
+ for equality, go back to the first non-white-space
+ character in both sides and try again. */
+ if (c2 == ' ' && c1 != '\n'
+ && s1 + 1 < t1
+ && isspace ((unsigned char) t1[-2]))
+ {
+ --t1;
+ continue;
+ }
+ if (c1 == ' ' && c2 != '\n'
+ && s2 + 1 < t2
+ && isspace ((unsigned char) t2[-2]))
+ {
+ --t2;
+ continue;
+ }
+ }
+
+ break;
+
+ case IGNORE_TAB_EXPANSION:
+ if ((c1 == ' ' && c2 == '\t')
+ || (c1 == '\t' && c2 == ' '))
+ {
+ size_t column2 = column;
+ for (;; c1 = *t1++)
+ {
+ if (c1 == ' ')
+ column++;
+ else if (c1 == '\t')
+ column += tabsize - column % tabsize;
+ else
+ break;
+ }
+ for (;; c2 = *t2++)
+ {
+ if (c2 == ' ')
+ column2++;
+ else if (c2 == '\t')
+ column2 += tabsize - column2 % tabsize;
+ else
+ break;
+ }
+ if (column != column2)
+ return true;
+ }
+ break;
+
+ case IGNORE_NO_WHITE_SPACE:
+ break;
+ }
+
+ /* Lowercase all letters if -i is specified. */
+
+ if (ignore_case)
+ {
+ c1 = tolower (c1);
+ c2 = tolower (c2);
+ }
+
+ if (c1 != c2)
+ break;
+ }
+ if (c1 == '\n')
+ return false;
+
+ column += c1 == '\t' ? tabsize - column % tabsize : 1;
+ }
+
+ return true;
+}
+
+/* Find the consecutive changes at the start of the script START.
+ Return the last link before the first gap. */
+
+struct change *
+find_change (struct change *start)
+{
+ return start;
+}
+
+struct change *
+find_reverse_change (struct change *start)
+{
+ return start;
+}
+
+/* Divide SCRIPT into pieces by calling HUNKFUN and
+ print each piece with PRINTFUN.
+ Both functions take one arg, an edit script.
+
+ HUNKFUN is called with the tail of the script
+ and returns the last link that belongs together with the start
+ of the tail.
+
+ PRINTFUN takes a subscript which belongs together (with a null
+ link at the end) and prints it. */
+
+void
+print_script (struct change *script,
+ struct change * (*hunkfun) (struct change *),
+ void (*printfun) (struct change *))
+{
+ struct change *next = script;
+
+ while (next)
+ {
+ struct change *this, *end;
+
+ /* Find a set of changes that belong together. */
+ this = next;
+ end = (*hunkfun) (next);
+
+ /* Disconnect them from the rest of the changes,
+ making them a hunk, and remember the rest for next iteration. */
+ next = end->link;
+ end->link = 0;
+#ifdef DEBUG
+ debug_script (this);
+#endif
+
+ /* Print this hunk. */
+ (*printfun) (this);
+
+ /* Reconnect the script so it will all be freed properly. */
+ end->link = next;
+ }
+}
+
+/* Print the text of a single line LINE,
+ flagging it with the characters in LINE_FLAG (which say whether
+ the line is inserted, deleted, changed, etc.). LINE_FLAG must not
+ end in a blank, unless it is a single blank. */
+
+void
+print_1_line (char const *line_flag, char const *const *line)
+{
+ char const *base = line[0], *limit = line[1]; /* Help the compiler. */
+ FILE *out = outfile; /* Help the compiler some more. */
+ char const *flag_format = 0;
+
+ /* If -T was specified, use a Tab between the line-flag and the text.
+ Otherwise use a Space (as Unix diff does).
+ Print neither space nor tab if line-flags are empty.
+ But omit trailing blanks if requested. */
+
+ if (line_flag && *line_flag)
+ {
+ char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s ";
+ char const *line_flag_1 = line_flag;
+
+ if (suppress_blank_empty && **line == '\n')
+ {
+ flag_format_1 = "%s";
+
+ /* This hack to omit trailing blanks takes advantage of the
+ fact that the only way that LINE_FLAG can end in a blank
+ is when LINE_FLAG consists of a single blank. */
+ line_flag_1 += *line_flag_1 == ' ';
+ }
+
+ fprintf (out, flag_format_1, line_flag_1);
+ }
+
+ output_1_line (base, limit, flag_format, line_flag);
+
+ if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
+ fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
+}
+
+/* Output a line from BASE up to LIMIT.
+ With -t, expand white space characters to spaces, and if FLAG_FORMAT
+ is nonzero, output it with argument LINE_FLAG after every
+ internal carriage return, so that tab stops continue to line up. */
+
+void
+output_1_line (char const *base, char const *limit, char const *flag_format,
+ char const *line_flag)
+{
+ if (!expand_tabs)
+ fwrite (base, sizeof (char), limit - base, outfile);
+ else
+ {
+ register FILE *out = outfile;
+ register unsigned char c;
+ register char const *t = base;
+ register size_t column = 0;
+ size_t tab_size = tabsize;
+
+ while (t < limit)
+ switch ((c = *t++))
+ {
+ case '\t':
+ {
+ size_t spaces = tab_size - column % tab_size;
+ column += spaces;
+ do
+ putc (' ', out);
+ while (--spaces);
+ }
+ break;
+
+ case '\r':
+ putc (c, out);
+ if (flag_format && t < limit && *t != '\n')
+ fprintf (out, flag_format, line_flag);
+ column = 0;
+ break;
+
+ case '\b':
+ if (column == 0)
+ continue;
+ column--;
+ putc (c, out);
+ break;
+
+ default:
+ column += isprint (c) != 0;
+ putc (c, out);
+ break;
+ }
+ }
+}
+
+char const change_letter[] = { 0, 'd', 'a', 'c' };
+
+/* Translate an internal line number (an index into diff's table of lines)
+ into an actual line number in the input file.
+ The internal line number is I. FILE points to the data on the file.
+
+ Internal line numbers count from 0 starting after the prefix.
+ Actual line numbers count from 1 within the entire file. */
+
+lin
+translate_line_number (struct file_data const *file, lin i)
+{
+ return i + file->prefix_lines + 1;
+}
+
+/* Translate a line number range. This is always done for printing,
+ so for convenience translate to long int rather than lin, so that the
+ caller can use printf with "%ld" without casting. */
+
+void
+translate_range (struct file_data const *file,
+ lin a, lin b,
+ long int *aptr, long int *bptr)
+{
+ *aptr = translate_line_number (file, a - 1) + 1;
+ *bptr = translate_line_number (file, b + 1) - 1;
+}
+
+/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
+ If the two numbers are identical, print just one number.
+
+ Args A and B are internal line numbers.
+ We print the translated (real) line numbers. */
+
+void
+print_number_range (char sepchar, struct file_data *file, lin a, lin b)
+{
+ long int trans_a, trans_b;
+ translate_range (file, a, b, &trans_a, &trans_b);
+
+ /* Note: we can have B < A in the case of a range of no lines.
+ In this case, we should print the line number before the range,
+ which is B. */
+ if (trans_b > trans_a)
+ fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
+ else
+ fprintf (outfile, "%ld", trans_b);
+}
+
+/* Look at a hunk of edit script and report the range of lines in each file
+ that it applies to. HUNK is the start of the hunk, which is a chain
+ of `struct change'. The first and last line numbers of file 0 are stored in
+ *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
+ Note that these are internal line numbers that count from 0.
+
+ If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
+
+ Return UNCHANGED if only ignorable lines are inserted or deleted,
+ OLD if lines of file 0 are deleted,
+ NEW if lines of file 1 are inserted,
+ and CHANGED if both kinds of changes are found. */
+
+enum changes
+analyze_hunk (struct change *hunk,
+ lin *first0, lin *last0,
+ lin *first1, lin *last1)
+{
+ struct change *next;
+ lin l0, l1;
+ lin show_from, show_to;
+ lin i;
+ bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
+ size_t trivial_length = ignore_blank_lines - 1;
+ /* If 0, ignore zero-length lines;
+ if SIZE_MAX, do not ignore lines just because of their length. */
+ bool skip_leading_white_space =
+ (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
+
+ char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
+ char const * const *linbuf1 = files[1].linbuf;
+
+ show_from = show_to = 0;
+
+ *first0 = hunk->line0;
+ *first1 = hunk->line1;
+
+ next = hunk;
+ do
+ {
+ l0 = next->line0 + next->deleted - 1;
+ l1 = next->line1 + next->inserted - 1;
+ show_from += next->deleted;
+ show_to += next->inserted;
+
+ for (i = next->line0; i <= l0 && trivial; i++)
+ {
+ char const *line = linbuf0[i];
+ char const *newline = linbuf0[i + 1] - 1;
+ size_t len = newline - line;
+ char const *p = line;
+ if (skip_leading_white_space)
+ while (isspace ((unsigned char) *p) && *p != '\n')
+ p++;
+ if (newline - p != trivial_length
+ && (! ignore_regexp.fastmap
+ || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
+ trivial = 0;
+ }
+
+ for (i = next->line1; i <= l1 && trivial; i++)
+ {
+ char const *line = linbuf1[i];
+ char const *newline = linbuf1[i + 1] - 1;
+ size_t len = newline - line;
+ char const *p = line;
+ if (skip_leading_white_space)
+ while (isspace ((unsigned char) *p) && *p != '\n')
+ p++;
+ if (newline - p != trivial_length
+ && (! ignore_regexp.fastmap
+ || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
+ trivial = 0;
+ }
+ }
+ while ((next = next->link) != 0);
+
+ *last0 = l0;
+ *last1 = l1;
+
+ /* If all inserted or deleted lines are ignorable,
+ tell the caller to ignore this hunk. */
+
+ if (trivial)
+ return UNCHANGED;
+
+ return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
+}
+
+/* Concatenate three strings, returning a newly malloc'd string. */
+
+char *
+concat (char const *s1, char const *s2, char const *s3)
+{
+ char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
+ sprintf (new, "%s%s%s", s1, s2, s3);
+ return new;
+}
+
+/* Yield a new block of SIZE bytes, initialized to zero. */
+
+void *
+zalloc (size_t size)
+{
+ void *p = xmalloc (size);
+ memset (p, 0, size);
+ return p;
+}
+
+/* Yield the newly malloc'd pathname
+ of the file in DIR whose filename is FILE. */
+
+char *
+dir_file_pathname (char const *dir, char const *file)
+{
+ char const *base = last_component (dir);
+ size_t baselen = base_len (base);
+ bool omit_slash = baselen == 0 || base[baselen - 1] == '/';
+ return concat (dir, "/" + omit_slash, file);
+}
+
+void
+debug_script (struct change *sp)
+{
+ fflush (stdout);
+
+ for (; sp; sp = sp->link)
+ {
+ long int line0 = sp->line0;
+ long int line1 = sp->line1;
+ long int deleted = sp->deleted;
+ long int inserted = sp->inserted;
+ fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
+ line0, line1, deleted, inserted);
+ }
+
+ fflush (stderr);
+}