Add new command line option '--encoding' to strings

author: Nick Clifton <nickc@redhat.com> 2001-09-14 11:32:25 +0000
committer: Nick Clifton <nickc@redhat.com> 2001-09-14 11:32:25 +0000
commit: d132876a03877efa59ef0714d56cd53ae338b1e6 (patch)
tree: 9acbac537e2d34ffa3afce2dd9d7e6ac41f582c7
parent: 2613489e4e5d9f05074e5ab9d30ea4ee7b228492 (diff)
download: binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.tar.gz
binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.tar.bz2
binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.zip
4 files changed, 146 insertions, 40 deletions
diff --git a/binutils/ChangeLog b/binutils/ChangeLog
index b644b2846c7..c9e9087063d 100644
--- a/binutils/ChangeLog
+++ b/binutils/ChangeLog
@@ -1,3 +1,15 @@
+2001-09-14  Nick Clifton  <nickc@cambridge.redhat.com>
+
+	* strings.c (encoding, encoding_bytes): New variables.
+	(long_options): Add --encoding.
+	(main): Accept -e and --encoding.
+	(get_char): New function.  Read a, possibly wide, character from
+	the input stream.
+	(print_strings): Use get_char().
+	(usage): Document new command line option.
+	* doc/binutils.texi: Document new command line option.
+	* NEWS: Announce new command line option.
+
 Wed Sep 12 20:07:16 2001  Alexandre Oliva  <aoliva@redhat.com>
 
 	* readelf.c (guess_is_rela, dump_relocations, get_machine_name,
diff --git a/binutils/NEWS b/binutils/NEWS
index 18c2fd8aeb6..f4745c0f824 100644
--- a/binutils/NEWS
+++ b/binutils/NEWS
@@ -1,5 +1,7 @@
 -*- text -*-
 
+* strings: Add --encoding to display wide character strings.  By Markus Kuhn.
+
 * objcopy: Add --rename-section to change section names.
 
 * readelf: Support added for DWARF 2.1 extensions.  Support added for
diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index ecb767cfabc..6b6f19ef645 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -1853,9 +1853,12 @@ ar(1), objdump(1), readelf(1), and the Info entries for @file{binutils}.
 
 @smallexample
 @c man begin SYNOPSIS strings
-strings [@option{-afov}] [@option{-}@var{min-len}] [@option{-n} @var{min-len}] [@option{-t} @var{radix}] [@option{-}]
-        [@option{--all}] [@option{--print-file-name}] [@option{--bytes=}@var{min-len}]
-        [@option{--radix=}@var{radix}] [@option{--target=}@var{bfdname}]
+strings [@option{-afov}] [@option{-}@var{min-len}]
+        [@option{-n} @var{min-len}] [@option{--bytes=}@var{min-len}]
+        [@option{-t} @var{radix}] [@option{--radix=}@var{radix}]
+        [@option{-e} @var{encoding}] [@option{--encoding=}@var{encoding}]
+        [@option{-}] [@option{--all}] [@option{--print-file-name}]
+        [@option{--target=}@var{bfdname}]
         [@option{--help}] [@option{--version}] @var{file}@dots{}
 @c man end
 @end smallexample
@@ -1907,6 +1910,15 @@ Print the offset within the file before each string.  The single
 character argument specifies the radix of the offset---@samp{o} for
 octal, @samp{x} for hexadecimal, or @samp{d} for decimal.
 
+@item -e @var{encoding}
+@itemx --encoding=@var{encoding}
+Select the character encoding of the strings that are to be found.
+Possible values for @var{encoding} are: @samp{s} = single-byte
+characters (ASCII, ISO 8859, etc., default), @samp{b} = 16-bit
+Bigendian, @samp{l} = 16-bit Littleendian, @samp{B} = 32-bit Bigendian,
+@samp{L} = 32-bit Littleendian. Useful for finding wide character
+strings.
+
 @item --target=@var{bfdname}
 @cindex object code format
 Specify an object code format other than your system's default format.
diff --git a/binutils/strings.c b/binutils/strings.c
index 5d3aa6d4fa7..7326cae07af 100644
--- a/binutils/strings.c
+++ b/binutils/strings.c
@@ -1,5 +1,5 @@
 /* strings -- print the strings of printable characters in files
-   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
    Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
@@ -39,6 +39,11 @@
    -o		Like -to.  (Some other implementations have -o like -to,
 		others like -td.  We chose one arbitrarily.)
 
+   --encoding={s,b,l,B,L}
+   -e {s,b,l,B,L}
+		Select character encoding: single-byte, bigendian 16-bit,
+		littleendian 16-bit, bigendian 32-bit, littleendian 32-bit
+
    --target=BFDNAME
 		Specify a non-default object file format.
 
@@ -113,12 +118,17 @@ static boolean got_a_section;
 /* The BFD object file format.  */
 static char *target;
 
+/* The character encoding format.  */
+static char encoding;
+static int encoding_bytes;
+
 static struct option long_options[] =
 {
   {"all", no_argument, NULL, 'a'},
   {"print-file-name", no_argument, NULL, 'f'},
   {"bytes", required_argument, NULL, 'n'},
   {"radix", required_argument, NULL, 't'},
+  {"encoding", required_argument, NULL, 'e'},
   {"target", required_argument, NULL, 'T'},
   {"help", no_argument, NULL, 'h'},
   {"version", no_argument, NULL, 'v'},
@@ -156,8 +166,9 @@ main (argc, argv)
   print_filenames = false;
   datasection_only = true;
   target = NULL;
+  encoding = 's';
 
-  while ((optc = getopt_long (argc, argv, "afn:ot:v0123456789",
+  while ((optc = getopt_long (argc, argv, "afn:ot:e:v0123456789",
 			      long_options, (int *) 0)) != EOF)
     {
       switch (optc)
@@ -213,6 +224,12 @@ main (argc, argv)
 	  target = optarg;
 	  break;
 
+	case 'e':
+	  if (optarg[1] != '\0')
+	    usage (stderr, 1);
+	  encoding = optarg[0];
+	  break;
+
 	case 'v':
 	  print_version ("strings");
 	  break;
@@ -232,6 +249,23 @@ main (argc, argv)
   if (string_min < 0)
     string_min = 4;
 
+  switch (encoding)
+    {
+    case 's':
+      encoding_bytes = 1;
+      break;
+    case 'b':
+    case 'l':
+      encoding_bytes = 2;
+      break;
+    case 'B':
+    case 'L':
+      encoding_bytes = 4;
+      break;
+    default:
+      usage (stderr, 1);
+    }
+
   bfd_init ();
   set_default_bfd_target ();
 
@@ -366,6 +400,74 @@ strings_file (file)
   return true;
 }
 
+/* Read the next character, return EOF if none available.
+   Assume that STREAM is positioned so that the next byte read
+   is at address ADDRESS in the file.
+
+   If STREAM is NULL, do not read from it.
+   The caller can supply a buffer of characters
+   to be processed before the data in STREAM.
+   MAGIC is the address of the buffer and
+   MAGICCOUNT is how many characters are in it.  */
+
+static long
+get_char (stream, address, magiccount, magic)
+     FILE *stream;
+     file_ptr *address;
+     int *magiccount;
+     char **magic;
+{
+  int c, i;
+  long r;
+  unsigned char buf[4];
+
+  for (i = 0; i < encoding_bytes; i++)
+    {
+      if (*magiccount)
+	{
+	  (*magiccount)--;
+	  c = *(*magic)++;
+	}
+      else
+	{
+	  if (stream == NULL)
+	    return EOF;
+	  c = getc (stream);
+	  if (c == EOF)
+	    return EOF;
+	}
+
+      (*address)++;
+      buf[i] = c;
+    }
+
+  switch (encoding)
+    {
+    case 's':
+      r = buf[0];
+      break;
+    case 'b':
+      r = (buf[0] << 8) | buf[1];
+      break;
+    case 'l':
+      r = buf[0] | (buf[1] << 8);
+      break;
+    case 'B':
+      r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
+	((long) buf[2] << 8) | buf[3];
+      break;
+    case 'L':
+      r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
+	((long) buf[3] << 24);
+      break;
+    }
+
+  if (r == EOF)
+    return 0;
+
+  return r;
+}
+
 /* Find the strings in file FILENAME, read from STREAM.
    Assume that STREAM is positioned so that the next byte read
    is at address ADDRESS in the file.
@@ -387,13 +489,13 @@ print_strings (filename, stream, address, stop_point, magiccount, magic)
      int magiccount;
      char *magic;
 {
-  char *buf = (char *) xmalloc (string_min + 1);
+  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
 
   while (1)
     {
       file_ptr start;
       int i;
-      int c;
+      long c;
 
       /* See if the next `string_min' chars are all graphic chars.  */
     tryline:
@@ -402,21 +504,10 @@ print_strings (filename, stream, address, stop_point, magiccount, magic)
       start = address;
       for (i = 0; i < string_min; i++)
 	{
-	  if (magiccount)
-	    {
-	      magiccount--;
-	      c = *magic++;
-	    }
-	  else
-	    {
-	      if (stream == NULL)
-		return;
-	      c = getc (stream);
-	      if (c == EOF)
-		return;
-	    }
-	  address++;
-	  if (!isgraphic (c))
+	  c = get_char (stream, &address, &magiccount, &magic);
+	  if (c == EOF)
+	    return;
+	  if (c > 255 || c < 0 || !isgraphic (c))
 	    /* Found a non-graphic.  Try again starting with next char.  */
 	    goto tryline;
 	  buf[i] = c;
@@ -448,21 +539,10 @@ print_strings (filename, stream, address, stop_point, magiccount, magic)
 
       while (1)
 	{
-	  if (magiccount)
-	    {
-	      magiccount--;
-	      c = *magic++;
-	    }
-	  else
-	    {
-	      if (stream == NULL)
-		break;
-	      c = getc (stream);
-	      if (c == EOF)
-		break;
-	    }
-	  address++;
-	  if (! isgraphic (c))
+	  c = get_char (stream, &address, &magiccount, &magic);
+	  if (c == EOF)
+	    break;
+	  if (c > 255 || c < 0 || !isgraphic (c))
 	    break;
 	  putchar (c);
 	}
@@ -524,9 +604,9 @@ usage (stream, status)
      int status;
 {
   fprintf (stream, _("\
-Usage: %s [-afov] [-n min-len] [-min-len] [-t {o,x,d}] [-]\n\
-       [--all] [--print-file-name] [--bytes=min-len] [--radix={o,x,d}]\n\
-       [--target=bfdname] [--help] [--version] file...\n"),
+Usage: %s [-afov] [-n min-len] [-min-len] [-t {o,x,d}] [-e {s,b,l,B,L}]\n\
+       [-] [--all] [--print-file-name] [--bytes=min-len] [--radix={o,x,d}]\n\
+       [--target=bfdname] [--encoding {s,b,l,B,L}] [--help] [--version] file...\n"),
 	   program_name);
   list_supported_targets (program_name, stream);
   if (status == 0)
author	Nick Clifton <nickc@redhat.com>	2001-09-14 11:32:25 +0000
committer	Nick Clifton <nickc@redhat.com>	2001-09-14 11:32:25 +0000
commit	d132876a03877efa59ef0714d56cd53ae338b1e6 (patch)
tree	9acbac537e2d34ffa3afce2dd9d7e6ac41f582c7
parent	2613489e4e5d9f05074e5ab9d30ea4ee7b228492 (diff)
download	binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.tar.gz binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.tar.bz2 binutils-d132876a03877efa59ef0714d56cd53ae338b1e6.zip