summaryrefslogtreecommitdiff
path: root/locate/frcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'locate/frcode.c')
-rw-r--r--locate/frcode.c267
1 files changed, 267 insertions, 0 deletions
diff --git a/locate/frcode.c b/locate/frcode.c
new file mode 100644
index 0000000..19ee758
--- /dev/null
+++ b/locate/frcode.c
@@ -0,0 +1,267 @@
+/* frcode -- front-compress a sorted list
+ Copyright (C) 1994, 2003, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA.
+*/
+
+/* Usage: frcode < sorted-list > compressed-list
+
+ Uses front compression (also known as incremental encoding);
+ see ";login:", March 1983, p. 8.
+
+ The input is a sorted list of NUL-terminated strings.
+ (FIXME newline-terminated, until we figure out how to sort
+ NUL-terminated strings.)
+
+ The output entries are in the same order as the input;
+ each entry consists of an offset-differential count byte
+ (the additional number of characters of prefix of the preceding entry to
+ use beyond the number that the preceding entry is using of its predecessor),
+ followed by a null-terminated ASCII remainder.
+
+ If the offset-differential count is larger than can be stored
+ in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
+ and the count follows in a 2-byte word, with the high byte first
+ (network byte order).
+
+ Example:
+
+ Input, with NULs changed to newlines:
+ /usr/src
+ /usr/src/cmd/aardvark.c
+ /usr/src/cmd/armadillo.c
+ /usr/tmp/zoo
+
+ Length of the longest prefix of the preceding entry to share:
+ 0 /usr/src
+ 8 /cmd/aardvark.c
+ 14 rmadillo.c
+ 5 tmp/zoo
+
+ Output, with NULs changed to newlines and count bytes made printable:
+ 0 LOCATE02
+ 0 /usr/src
+ 8 /cmd/aardvark.c
+ 6 rmadillo.c
+ -9 tmp/zoo
+
+ (6 = 14 - 8, and -9 = 5 - 14)
+
+ Written by James A. Woods <jwoods@adobe.com>.
+ Modified by David MacKenzie <djm@gnu.org>. */
+
+#include <config.h>
+#include <stdio.h>
+#include <limits.h>
+#include <assert.h>
+#include <sys/types.h>
+
+#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
+#include <string.h>
+#else
+#include <strings.h>
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(Text) gettext (Text)
+#else
+# define _(Text) Text
+#define textdomain(Domain)
+#define bindtextdomain(Package, Directory)
+#endif
+#ifdef gettext_noop
+# define N_(String) gettext_noop (String)
+#else
+/* We used to use (String) instead of just String, but apparentl;y ISO C
+ * doesn't allow this (at least, that's what HP said when someone reported
+ * this as a compiler bug). This is HP case number 1205608192. See
+ * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
+ * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
+ * like: static const char buf[] = ("string");
+ */
+# define N_(String) String
+#endif
+
+
+#include "locatedb.h"
+#include <getline.h>
+#include <getopt.h>
+#include "closeout.h"
+
+char *xmalloc PARAMS((size_t));
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Write out a 16-bit int, high byte first (network byte order). */
+
+static void
+put_short (int c, FILE *fp)
+{
+ /* XXX: The value may be negative, but I think ISO C doesn't
+ * guarantee the assumptions we're making here will hold.
+ */
+ assert(c <= SHRT_MAX);
+ assert(c >= SHRT_MIN);
+ putc (c >> 8, fp);
+ putc (c, fp);
+}
+
+/* Return the length of the longest common prefix of strings S1 and S2. */
+
+static int
+prefix_length (char *s1, char *s2)
+{
+ register char *start;
+ int limit = INT_MAX;
+ for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
+ {
+ /* Don't emit a prefix length that will not fit into
+ * our return type.
+ */
+ if (0 == --limit)
+ break;
+ }
+ return s1 - start;
+}
+
+static struct option const longopts[] =
+{
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'v'},
+ {"null", no_argument, NULL, '0'},
+ {NULL, no_argument, NULL, 0}
+};
+
+extern char *version_string;
+
+/* The name this program was run with. */
+char *program_name;
+
+
+static void
+usage (FILE *stream)
+{
+ fprintf (stream,
+ _("Usage: %s [-0 | --null] [--version] [--help]\n"),
+ program_name);
+ fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
+}
+
+
+int
+main (int argc, char **argv)
+{
+ char *path; /* The current input entry. */
+ char *oldpath; /* The previous input entry. */
+ size_t pathsize, oldpathsize; /* Amounts allocated for them. */
+ int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
+ int line_len; /* Length of input line. */
+ int delimiter = '\n';
+ int optc;
+
+ program_name = argv[0];
+ atexit (close_stdout);
+
+ pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
+ path = xmalloc (pathsize);
+ oldpath = xmalloc (oldpathsize);
+
+ oldpath[0] = 0;
+ oldcount = 0;
+
+
+ while ((optc = getopt_long (argc, argv, "hv0", longopts, (int *) 0)) != -1)
+ switch (optc)
+ {
+ case '0':
+ delimiter = 0;
+ break;
+
+ case 'h':
+ usage (stdout);
+ return 0;
+
+ case 'v':
+ printf (_("GNU locate version %s\n"), version_string);
+ return 0;
+
+ default:
+ usage (stderr);
+ return 1;
+ }
+
+ /* We expect to have no arguments. */
+ if (optind != argc)
+ {
+ usage (stderr);
+ return 1;
+ }
+
+
+
+ fwrite (LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC), 1, stdout);
+
+ while ((line_len = getdelim (&path, &pathsize, delimiter, stdin)) > 0)
+ {
+ path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
+
+ count = prefix_length (oldpath, path);
+ diffcount = count - oldcount;
+ if ( (diffcount > SHRT_MAX) || (diffcount < SHRT_MIN) )
+ {
+ /* We do this to prevent overflow of the value we
+ * write with put_short()
+ */
+ count = 0;
+ diffcount = (-oldcount);
+ }
+ oldcount = count;
+
+ /* If the difference is small, it fits in one byte;
+ otherwise, two bytes plus a marker noting that fact. */
+ if (diffcount < LOCATEDB_ONEBYTE_MIN || diffcount > LOCATEDB_ONEBYTE_MAX)
+ {
+ putc (LOCATEDB_ESCAPE, stdout);
+ put_short (diffcount, stdout);
+ }
+ else
+ putc (diffcount, stdout);
+
+ fputs (path + count, stdout);
+ putc ('\0', stdout);
+
+ {
+ /* Swap path and oldpath and their sizes. */
+ char *tmppath = oldpath;
+ size_t tmppathsize = oldpathsize;
+ oldpath = path;
+ oldpathsize = pathsize;
+ path = tmppath;
+ pathsize = tmppathsize;
+ }
+ }
+
+ free (path);
+ free (oldpath);
+
+ return 0;
+}