Speed up the disassembler by allowing prefixed instruction tables

Modify the disassembler so that we can have separate instruction tables for prefixed instructions. As it was, all instructions which started with 0F were linearly searched, and that is by now more than half the instruction set.
author: H. Peter Anvin <hpa@zytor.com> 2007-09-18 15:08:20 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2007-09-18 15:08:20 -0700
commit: 19e201053689be68d0e45077fa86e9538d74daa1 (patch)
tree: 49c9bbd3d44939e9795951e72d700c83224e6f13 /insns.pl
parent: 0edc309505e659345cf353f81fb77793f8f5c291 (diff)
download: nasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.gz
nasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.bz2
nasm-19e201053689be68d0e45077fa86e9538d74daa1.zip
1 files changed, 118 insertions, 33 deletions
diff --git a/insns.pl b/insns.pl
index 6e961de..c5f280c 100644
--- a/insns.pl
+++ b/insns.pl
@@ -7,6 +7,10 @@
 # redistributable under the licence given in the file "Licence"
 # distributed in the NASM archive.
 
+# Opcode prefixes which need their own opcode tables
+# LONGER PREFIXES FIRST!
+@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0FC2 0F);
+
 print STDERR "Reading insns.dat...\n";
 
 @args   = ();
@@ -26,6 +30,8 @@ foreach $arg ( @ARGV ) {
 $fname = "insns.dat" unless $fname = $args[0];
 open (F, $fname) || die "unable to open $fname";
 
+%dinstables = ();
+
 $line = 0;
 $insns = 0;
 while (<F>) {
@@ -50,9 +56,11 @@ while (<F>) {
   }
   if ($formatted && !$nd) {
     push @big, $formatted;
-    foreach $i (&startbyte($_[2])) {
-      $aname = sprintf "dd_%02X",$i;
-      push @$aname, $#big;
+    foreach $i (startseq($_[2])) {
+	if (!defined($dinstables{$i})) {
+	    $dinstables{$i} = [];
+	}
+	push(@{$dinstables{$i}}, $#big);
     }
   }
 }
@@ -106,23 +114,38 @@ if ( !defined($output) || $output eq 'd' ) {
     foreach $j (@big) {
 	printf D "    /* %4d */ %s\n", $n++, $j;
     }
-    print D "    ITEMPLATE_END\n};\n\n";
-    
-    for ($c=0; $c<256; $c++) {
-	$h = sprintf "%02X", $c;
-	print D "static const struct itemplate * const itable_${h}[] = {\n";
-	$aname = "dd_$h";
-	foreach $j (@$aname) {
+    print D "};\n";
+
+    foreach $h (sort(keys(%dinstables))) {
+	print D "\nstatic const struct itemplate * const itable_${h}[] = {\n";
+	foreach $j (@{$dinstables{$h}}) {
 	    print D "    instrux + $j,\n";
 	}
-	print D "    NULL\n};\n\n";
-    }
-    
-    print D "const struct itemplate * const * const itable[] = {\n";
-    for ($c=0; $c<256; $c++) {
-	printf D "    itable_%02X,\n", $c;
+	print D "};\n";
     }
+
+    foreach $h (@disasm_prefixes, '') {
+	$is_prefix{$h} = 1;
+	print D "\n";
+	print D "static " unless ($h eq '');
+	print D "const struct disasm_index ";
+	print D ($h eq '') ? 'itable' : "itable_$h";
+	print D "[256] = {\n";
+	for ($c = 0; $c < 256; $c++) {
+	    $nn = sprintf("%s%02X", $h, $c);
+	    if ($is_prefix{$nn}) {
+		die "$0: ambiguous decoding of $nn\n"
+		    if (defined($dinstables{$nn}));
+		printf D "    { itable_%s, -1 },\n", $nn;
+	    } elsif (defined($dinstables{$nn})) {
+		printf D "    { itable_%s, %u },\n",
+	    	$nn, scalar(@{$dinstables{$nn}});
+	    } else {
+		printf D "    { NULL, 0 },\n";
+	    }
+	}
     print D "};\n";
+    }
     
     close D;
 }
@@ -240,6 +263,17 @@ sub format {
     ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
 }
 
+sub hexlist($$$) {
+    my($prefix, $start, $n) = @_;
+    my $i;
+    my @l = ();
+
+    for ($i = 0; $i < $n; $i++) {
+	push(@l, sprintf("%s%02X", $prefix, $start+$i));
+    }
+    return @l;
+}
+
 # Here we determine the range of possible starting bytes for a given
 # instruction. We need only consider the codes:
 # \1 \2 \3     mean literal bytes, of course
@@ -248,24 +282,75 @@ sub format {
 # \170         means byte zero
 # \330         means byte plus condition code
 # \0 or \340   mean give up and return empty set
-sub startbyte {
-  my ($codes) = @_;
+sub startseq($) {
+  my ($codestr) = @_;
   my $word, @range;
+  my @codes = ();
+  my $c = $codestr;
+  my $c0, $c1, $i;
+  my $prefix = '';
+
+  # Although these are C-syntax strings, by convention they should have
+  # only octal escapes (for directives) and hexadecimal escapes
+  # (for verbatim bytes)
+  while ($c ne '') {
+      if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
+	  push(@codes, hex $1);
+	  $c = $2;
+	  next;
+      } elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
+	  push(@codes, oct $1);
+	  $c = $2;
+	  next;
+      } else {
+	  die "$0: unknown code format in \"$codestr\"\n";
+      }
+  }
+
+  while ($c0 = shift(@codes)) {
+      $c1 = $codes[0];
+      if ($c0 == 01 || $c0 == 02 || $c0 == 03 || $c0 == 0170) {
+	  # Fixed byte string
+	  my $fbs = $prefix;
+	  while (1) {
+	      if ($c0 == 01 || $c0 == 02 || $c0 == 03) {
+		  while ($c0--) {
+		      $fbs .= sprintf("%02X", shift(@codes));
+		  }
+	      } elsif ($c0 == 0170) {
+		  $fbs .= '00';
+	      } else {
+		  last;
+	      }
+	      $c0 = shift(@codes);
+	  }
+
+	  foreach $pfx (@disasm_prefixes) {
+	      if ($fbs =~ /^$pfx(.*)$/) {
+		  $prefix = $pfx;
+		  $fbs = $1;
+		  last;
+	      }
+	  }
 
-  while (1) {
-    die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/;
-    $word = $1, $codes = $2;
-    return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/;
-    return (0x07, 0x17, 0x1F) if $word eq "\\4";
-    return (0xA1, 0xA9) if $word eq "\\5";
-    return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6";
-    return (0xA0, 0xA8) if $word eq "\\7";
-    $start=hex $1, $r=8, last if $word =~ /^\\1[0123]$/ && $codes =~/^\\x(..)/;
-    return (0) if $word eq "\\170";
-    $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/;
-    return () if $word eq "\\0" || $word eq "\\340";
+	  if ($fbs ne '') {
+	      return ($prefix.substr($fbs,0,2));
+	  }
+      } elsif ($c0 == 04) {
+	  return ("07", "17", "1F");
+      } elsif ($c0 == 05) {
+	  return ("A1", "A9");
+      } elsif ($c0 == 06) {
+	  return ("06", "0E", "16", "1E");
+      } elsif ($c0 == 07) {
+	  return ("A0", "A8");
+      } elsif ($c0 >= 010 && $c0 <= 013) {
+	  return hexlist($prefix, $c1, 8);
+      } elsif ($c0 == 0330) {
+	  return hexlist($prefix, $c1, 16);
+      } elsif ($c0 == 0 || $c0 == 0340) {
+	  return ();
+      }
   }
-  @range = ();
-  push @range, $start++ while ($r-- > 0);
-  @range;
+  return ();
 }
author	H. Peter Anvin <hpa@zytor.com>	2007-09-18 15:08:20 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2007-09-18 15:08:20 -0700
commit	19e201053689be68d0e45077fa86e9538d74daa1 (patch)
tree	49c9bbd3d44939e9795951e72d700c83224e6f13 /insns.pl
parent	0edc309505e659345cf353f81fb77793f8f5c291 (diff)
download	nasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.gz nasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.bz2 nasm-19e201053689be68d0e45077fa86e9538d74daa1.zip