diff options
author | H. Peter Anvin <hpa@zytor.com> | 2008-05-12 11:00:50 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2008-05-12 11:00:50 -0700 |
commit | 3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6 (patch) | |
tree | ef4d366d67a69c27e7b19936d90300209cec4281 /insns.pl | |
parent | 387c1c271426256ed04439d43b31cdc41a625ed1 (diff) | |
download | nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.gz nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.tar.bz2 nasm-3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6.zip |
Generate a byte array instead of using strings for the byte codes
Generate a byte array instead of using C compiler strings for the byte
codes. This has a few advantages:
- No need to special-case zero due to broken C compilers.
- Only insns.pl only ever reads the string, so we can invent our own
syntax.
- Compaction.
- We can give it the proper, unsigned type.
Diffstat (limited to 'insns.pl')
-rw-r--r-- | insns.pl | 103 |
1 files changed, 93 insertions, 10 deletions
@@ -17,7 +17,7 @@ print STDERR "Reading insns.dat...\n"; undef $output; foreach $arg ( @ARGV ) { if ( $arg =~ /^\-/ ) { - if ( $arg =~ /^\-([adin])$/ ) { + if ( $arg =~ /^\-([abdin])$/ ) { $output = $1; } else { die "$0: Unknown option: ${arg}\n"; @@ -31,6 +31,7 @@ $fname = "insns.dat" unless $fname = $args[0]; open (F, $fname) || die "unable to open $fname"; %dinstables = (); +@bytecode_list = (); $line = 0; $insns = 0; @@ -68,9 +69,59 @@ while (<F>) { close F; +# +# Generate the bytecode array. At this point, @bytecode_list contains +# the full set of bytecodes. +# + +# Sort by descending length +@bytecode_list = sort { scalar(@$b) <=> scalar(@$a) } @bytecode_list; +@bytecode_array = (); +%bytecode_pos = (); +$bytecode_next = 0; +foreach $bl (@bytecode_list) { + my $h = hexstr(@$bl); + next if (defined($bytecode_pos{$h})); + + push(@bytecode_array, $bl); + while ($h ne '') { + $bytecode_pos{$h} = $bytecode_next; + $h = substr($h, 2); + $bytecode_next++; + } +} +undef @bytecode_list; + @opcodes = sort keys(%k_opcodes); @opcodes_cc = sort keys(%k_opcodes_cc); +if ( !defined($output) || $output eq 'b') { + print STDERR "Writing insnsb.c...\n"; + + open B, ">insnsb.c"; + + print B "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; + + print B "#include \"nasm.h\"\n"; + print B "#include \"insns.h\"\n\n"; + + print B "static const uint8_t nasm_bytecodes[$bytecode_next] = {\n"; + + $p = 0; + foreach $bl (@bytecode_array) { + printf B " /* %4d */ ", $p; + foreach $d (@$bl) { + printf B "%#o,", $d; + $p++; + } + printf B "\n"; + } + print B "};\n"; + + close B; +} + if ( !defined($output) || $output eq 'a' ) { print STDERR "Writing insnsa.c...\n"; @@ -78,15 +129,14 @@ if ( !defined($output) || $output eq 'a' ) { print A "/* This file auto-generated from insns.dat by insns.pl" . " - don't edit it */\n\n"; - print A "#include \"nasm.h\"\n"; - print A "#include \"insns.h\"\n"; - print A "\n"; + + print A "#include \"insnsb.c\"\n\n"; foreach $i (@opcodes, @opcodes_cc) { print A "static const struct itemplate instrux_${i}[] = {\n"; $aname = "aa_$i"; foreach $j (@$aname) { - print A " $j\n"; + print A " ", codesubst($j), "\n"; } print A " ITEMPLATE_END\n};\n\n"; } @@ -106,14 +156,13 @@ if ( !defined($output) || $output eq 'd' ) { print D "/* This file auto-generated from insns.dat by insns.pl" . " - don't edit it */\n\n"; - print D "#include \"nasm.h\"\n"; - print D "#include \"insns.h\"\n"; - print D "\n"; + + print D "#include \"insnsb.c\"\n\n"; print D "static const struct itemplate instrux[] = {\n"; $n = 0; foreach $j (@big) { - printf D " /* %4d */ %s\n", $n++, $j; + printf D " /* %4d */ %s\n", $n++, codesubst($j); } print D "};\n"; @@ -230,6 +279,7 @@ printf STDERR "Done: %d instructions\n", $insns; sub format { my ($opcode, $operands, $codes, $flags) = @_; my $num, $nd = 0; + my @bytecode; return (undef, undef) if $operands eq "ignore"; @@ -260,7 +310,29 @@ sub format { $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/; $flags = "IF_" . $flags; - ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); + @bytecode = (decodify($codes), 0); + push(@bytecode_list, [@bytecode]); + $codes = hexstr(@bytecode); + + ("{I_$opcode, $num, {$operands}, \@\@CODES-$codes\@\@, $flags},", $nd); +} + +# +# Look for @@CODES-xxx@@ sequences and replace them with the appropriate +# offset into nasm_bytecodes +# +sub codesubst($) { + my($s) = @_; + my $n; + + while ($s =~ /\@\@CODES-([0-9A-F]+)\@\@/) { + my $pos = $bytecode_pos{$1}; + if (!defined($pos)) { + die "$0: no position assigned to byte code $1\n"; + } + $s = $` . "nasm_bytecodes+${pos}" . "$'"; + } + return $s; } sub addprefix ($@) { @@ -303,6 +375,17 @@ sub decodify($) { return @codes; } +# Turn a numeric list into a hex string +sub hexstr(@) { + my $s = ''; + my $c; + + foreach $c (@_) { + $s .= sprintf("%02X", $c); + } + return $s; +} + # Here we determine the range of possible starting bytes for a given # instruction. We need only consider the codes: # \1 \2 \3 mean literal bytes, of course |