Generate a byte array instead of using strings for the byte codes

Generate a byte array instead of using C compiler strings for the byte codes. This has a few advantages: - No need to special-case zero due to broken C compilers. - Only insns.pl only ever reads the string, so we can invent our own syntax. - Compaction. - We can give it the proper, unsigned type.
author: H. Peter Anvin <hpa@zytor.com> 2008-05-12 11:00:50 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2008-05-12 11:00:50 -0700
commit: 3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6 (patch)
tree: ef4d366d67a69c27e7b19936d90300209cec4281 /insns.pl
parent: 387c1c271426256ed04439d43b31cdc41a625ed1 (diff)
download: nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.gz
nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.bz2
nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.zip
1 files changed, 93 insertions, 10 deletions
diff --git a/insns.pl b/insns.pl
index 03c840a..eff70f5 100644
--- a/insns.pl
+++ b/insns.pl
@@ -17,7 +17,7 @@ print STDERR "Reading insns.dat...\n";
 undef $output;
 foreach $arg ( @ARGV ) {
     if ( $arg =~ /^\-/ ) {
-	if  ( $arg =~ /^\-([adin])$/ ) {
+	if  ( $arg =~ /^\-([abdin])$/ ) {
 	    $output = $1;
 	} else {
 	    die "$0: Unknown option: ${arg}\n";
@@ -31,6 +31,7 @@ $fname = "insns.dat" unless $fname = $args[0];
 open (F, $fname) || die "unable to open $fname";
 
 %dinstables = ();
+@bytecode_list = ();
 
 $line = 0;
 $insns = 0;
@@ -68,9 +69,59 @@ while (<F>) {
 
 close F;
 
+#
+# Generate the bytecode array.  At this point, @bytecode_list contains
+# the full set of bytecodes.
+#
+
+# Sort by descending length
+@bytecode_list = sort { scalar(@$b) <=> scalar(@$a) } @bytecode_list;
+@bytecode_array = ();
+%bytecode_pos = ();
+$bytecode_next = 0;
+foreach $bl (@bytecode_list) {
+    my $h = hexstr(@$bl);
+    next if (defined($bytecode_pos{$h}));
+
+    push(@bytecode_array, $bl);
+    while ($h ne '') {
+	$bytecode_pos{$h} = $bytecode_next;
+	$h = substr($h, 2);
+	$bytecode_next++;
+    }
+}
+undef @bytecode_list;
+
 @opcodes    = sort keys(%k_opcodes);
 @opcodes_cc = sort keys(%k_opcodes_cc);
 
+if ( !defined($output) || $output eq 'b') {
+    print STDERR "Writing insnsb.c...\n";
+
+    open B, ">insnsb.c";
+    
+    print B "/* This file auto-generated from insns.dat by insns.pl" .
+        " - don't edit it */\n\n";
+
+    print B "#include \"nasm.h\"\n";
+    print B "#include \"insns.h\"\n\n";
+
+    print B "static const uint8_t nasm_bytecodes[$bytecode_next] = {\n";
+
+    $p = 0;
+    foreach $bl (@bytecode_array) {
+	printf B "    /* %4d */ ", $p;
+	foreach $d (@$bl) {
+	    printf B "%#o,", $d;
+	    $p++;
+	}
+	printf B "\n";
+    }
+    print B "};\n";
+
+    close B;
+}
+    
 if ( !defined($output) || $output eq 'a' ) {
     print STDERR "Writing insnsa.c...\n";
 
@@ -78,15 +129,14 @@ if ( !defined($output) || $output eq 'a' ) {
 
     print A "/* This file auto-generated from insns.dat by insns.pl" .
         " - don't edit it */\n\n";
-    print A "#include \"nasm.h\"\n";
-    print A "#include \"insns.h\"\n";
-    print A "\n";
+
+    print A "#include \"insnsb.c\"\n\n";
 
     foreach $i (@opcodes, @opcodes_cc) {
 	print A "static const struct itemplate instrux_${i}[] = {\n";
 	$aname = "aa_$i";
 	foreach $j (@$aname) {
-	    print A "    $j\n";
+	    print A "    ", codesubst($j), "\n";
 	}
 	print A "    ITEMPLATE_END\n};\n\n";
     }
@@ -106,14 +156,13 @@ if ( !defined($output) || $output eq 'd' ) {
 
     print D "/* This file auto-generated from insns.dat by insns.pl" .
         " - don't edit it */\n\n";
-    print D "#include \"nasm.h\"\n";
-    print D "#include \"insns.h\"\n";
-    print D "\n";
+
+    print D "#include \"insnsb.c\"\n\n";
 
     print D "static const struct itemplate instrux[] = {\n";
     $n = 0;
     foreach $j (@big) {
-	printf D "    /* %4d */ %s\n", $n++, $j;
+	printf D "    /* %4d */ %s\n", $n++, codesubst($j);
     }
     print D "};\n";
 
@@ -230,6 +279,7 @@ printf STDERR "Done: %d instructions\n", $insns;
 sub format {
     my ($opcode, $operands, $codes, $flags) = @_;
     my $num, $nd = 0;
+    my @bytecode;
 
     return (undef, undef) if $operands eq "ignore";
 
@@ -260,7 +310,29 @@ sub format {
     $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/;
     $flags = "IF_" . $flags;
 
-    ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
+    @bytecode = (decodify($codes), 0);
+    push(@bytecode_list, [@bytecode]);
+    $codes = hexstr(@bytecode);
+
+    ("{I_$opcode, $num, {$operands}, \@\@CODES-$codes\@\@, $flags},", $nd);
+}
+
+#
+# Look for @@CODES-xxx@@ sequences and replace them with the appropriate
+# offset into nasm_bytecodes
+#
+sub codesubst($) {
+    my($s) = @_;
+    my $n;
+
+    while ($s =~ /\@\@CODES-([0-9A-F]+)\@\@/) {
+	my $pos = $bytecode_pos{$1};
+	if (!defined($pos)) {
+	    die "$0: no position assigned to byte code $1\n";
+	}
+	$s = $` . "nasm_bytecodes+${pos}" . "$'";
+    }
+    return $s;
 }
 
 sub addprefix ($@) {
@@ -303,6 +375,17 @@ sub decodify($) {
     return @codes;
 }
 
+# Turn a numeric list into a hex string
+sub hexstr(@) {
+    my $s = '';
+    my $c;
+
+    foreach $c (@_) {
+	$s .= sprintf("%02X", $c);
+    }
+    return $s;
+}
+
 # Here we determine the range of possible starting bytes for a given
 # instruction. We need only consider the codes:
 # \1 \2 \3     mean literal bytes, of course
author	H. Peter Anvin <hpa@zytor.com>	2008-05-12 11:00:50 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2008-05-12 11:00:50 -0700
commit	3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6 (patch)
tree	ef4d366d67a69c27e7b19936d90300209cec4281 /insns.pl
parent	387c1c271426256ed04439d43b31cdc41a625ed1 (diff)
download	nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.gz nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.bz2 nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.zip