summaryrefslogtreecommitdiff
path: root/insns.pl
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-09-18 15:08:20 -0700
committerH. Peter Anvin <hpa@zytor.com>2007-09-18 15:08:20 -0700
commit19e201053689be68d0e45077fa86e9538d74daa1 (patch)
tree49c9bbd3d44939e9795951e72d700c83224e6f13 /insns.pl
parent0edc309505e659345cf353f81fb77793f8f5c291 (diff)
downloadnasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.gz
nasm-19e201053689be68d0e45077fa86e9538d74daa1.tar.bz2
nasm-19e201053689be68d0e45077fa86e9538d74daa1.zip
Speed up the disassembler by allowing prefixed instruction tables
Modify the disassembler so that we can have separate instruction tables for prefixed instructions. As it was, all instructions which started with 0F were linearly searched, and that is by now more than half the instruction set.
Diffstat (limited to 'insns.pl')
-rw-r--r--insns.pl151
1 files changed, 118 insertions, 33 deletions
diff --git a/insns.pl b/insns.pl
index 6e961de..c5f280c 100644
--- a/insns.pl
+++ b/insns.pl
@@ -7,6 +7,10 @@
# redistributable under the licence given in the file "Licence"
# distributed in the NASM archive.
+# Opcode prefixes which need their own opcode tables
+# LONGER PREFIXES FIRST!
+@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0FC2 0F);
+
print STDERR "Reading insns.dat...\n";
@args = ();
@@ -26,6 +30,8 @@ foreach $arg ( @ARGV ) {
$fname = "insns.dat" unless $fname = $args[0];
open (F, $fname) || die "unable to open $fname";
+%dinstables = ();
+
$line = 0;
$insns = 0;
while (<F>) {
@@ -50,9 +56,11 @@ while (<F>) {
}
if ($formatted && !$nd) {
push @big, $formatted;
- foreach $i (&startbyte($_[2])) {
- $aname = sprintf "dd_%02X",$i;
- push @$aname, $#big;
+ foreach $i (startseq($_[2])) {
+ if (!defined($dinstables{$i})) {
+ $dinstables{$i} = [];
+ }
+ push(@{$dinstables{$i}}, $#big);
}
}
}
@@ -106,23 +114,38 @@ if ( !defined($output) || $output eq 'd' ) {
foreach $j (@big) {
printf D " /* %4d */ %s\n", $n++, $j;
}
- print D " ITEMPLATE_END\n};\n\n";
-
- for ($c=0; $c<256; $c++) {
- $h = sprintf "%02X", $c;
- print D "static const struct itemplate * const itable_${h}[] = {\n";
- $aname = "dd_$h";
- foreach $j (@$aname) {
+ print D "};\n";
+
+ foreach $h (sort(keys(%dinstables))) {
+ print D "\nstatic const struct itemplate * const itable_${h}[] = {\n";
+ foreach $j (@{$dinstables{$h}}) {
print D " instrux + $j,\n";
}
- print D " NULL\n};\n\n";
- }
-
- print D "const struct itemplate * const * const itable[] = {\n";
- for ($c=0; $c<256; $c++) {
- printf D " itable_%02X,\n", $c;
+ print D "};\n";
}
+
+ foreach $h (@disasm_prefixes, '') {
+ $is_prefix{$h} = 1;
+ print D "\n";
+ print D "static " unless ($h eq '');
+ print D "const struct disasm_index ";
+ print D ($h eq '') ? 'itable' : "itable_$h";
+ print D "[256] = {\n";
+ for ($c = 0; $c < 256; $c++) {
+ $nn = sprintf("%s%02X", $h, $c);
+ if ($is_prefix{$nn}) {
+ die "$0: ambiguous decoding of $nn\n"
+ if (defined($dinstables{$nn}));
+ printf D " { itable_%s, -1 },\n", $nn;
+ } elsif (defined($dinstables{$nn})) {
+ printf D " { itable_%s, %u },\n",
+ $nn, scalar(@{$dinstables{$nn}});
+ } else {
+ printf D " { NULL, 0 },\n";
+ }
+ }
print D "};\n";
+ }
close D;
}
@@ -240,6 +263,17 @@ sub format {
("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
}
+sub hexlist($$$) {
+ my($prefix, $start, $n) = @_;
+ my $i;
+ my @l = ();
+
+ for ($i = 0; $i < $n; $i++) {
+ push(@l, sprintf("%s%02X", $prefix, $start+$i));
+ }
+ return @l;
+}
+
# Here we determine the range of possible starting bytes for a given
# instruction. We need only consider the codes:
# \1 \2 \3 mean literal bytes, of course
@@ -248,24 +282,75 @@ sub format {
# \170 means byte zero
# \330 means byte plus condition code
# \0 or \340 mean give up and return empty set
-sub startbyte {
- my ($codes) = @_;
+sub startseq($) {
+ my ($codestr) = @_;
my $word, @range;
+ my @codes = ();
+ my $c = $codestr;
+ my $c0, $c1, $i;
+ my $prefix = '';
+
+ # Although these are C-syntax strings, by convention they should have
+ # only octal escapes (for directives) and hexadecimal escapes
+ # (for verbatim bytes)
+ while ($c ne '') {
+ if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
+ push(@codes, hex $1);
+ $c = $2;
+ next;
+ } elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
+ push(@codes, oct $1);
+ $c = $2;
+ next;
+ } else {
+ die "$0: unknown code format in \"$codestr\"\n";
+ }
+ }
+
+ while ($c0 = shift(@codes)) {
+ $c1 = $codes[0];
+ if ($c0 == 01 || $c0 == 02 || $c0 == 03 || $c0 == 0170) {
+ # Fixed byte string
+ my $fbs = $prefix;
+ while (1) {
+ if ($c0 == 01 || $c0 == 02 || $c0 == 03) {
+ while ($c0--) {
+ $fbs .= sprintf("%02X", shift(@codes));
+ }
+ } elsif ($c0 == 0170) {
+ $fbs .= '00';
+ } else {
+ last;
+ }
+ $c0 = shift(@codes);
+ }
+
+ foreach $pfx (@disasm_prefixes) {
+ if ($fbs =~ /^$pfx(.*)$/) {
+ $prefix = $pfx;
+ $fbs = $1;
+ last;
+ }
+ }
- while (1) {
- die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/;
- $word = $1, $codes = $2;
- return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/;
- return (0x07, 0x17, 0x1F) if $word eq "\\4";
- return (0xA1, 0xA9) if $word eq "\\5";
- return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6";
- return (0xA0, 0xA8) if $word eq "\\7";
- $start=hex $1, $r=8, last if $word =~ /^\\1[0123]$/ && $codes =~/^\\x(..)/;
- return (0) if $word eq "\\170";
- $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/;
- return () if $word eq "\\0" || $word eq "\\340";
+ if ($fbs ne '') {
+ return ($prefix.substr($fbs,0,2));
+ }
+ } elsif ($c0 == 04) {
+ return ("07", "17", "1F");
+ } elsif ($c0 == 05) {
+ return ("A1", "A9");
+ } elsif ($c0 == 06) {
+ return ("06", "0E", "16", "1E");
+ } elsif ($c0 == 07) {
+ return ("A0", "A8");
+ } elsif ($c0 >= 010 && $c0 <= 013) {
+ return hexlist($prefix, $c1, 8);
+ } elsif ($c0 == 0330) {
+ return hexlist($prefix, $c1, 16);
+ } elsif ($c0 == 0 || $c0 == 0340) {
+ return ();
+ }
}
- @range = ();
- push @range, $start++ while ($r-- > 0);
- @range;
+ return ();
}