diff options
author | H. Peter Anvin <hpa@zytor.com> | 2002-04-30 20:51:32 +0000 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2002-04-30 20:51:32 +0000 |
commit | ea6e34db64c7da7cb885197316c6b5e7d048bdb9 (patch) | |
tree | 78e728348f8fe09e394a51c3617e6261de0f4001 | |
download | nasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.tar.gz nasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.tar.bz2 nasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.zip |
NASM 0.91
-rw-r--r-- | Licence | 75 | ||||
-rw-r--r-- | Makefile | 93 | ||||
-rw-r--r-- | Makefile.bor | 76 | ||||
-rw-r--r-- | Makefile.dos | 72 | ||||
-rw-r--r-- | Readme | 54 | ||||
-rw-r--r-- | assemble.c | 945 | ||||
-rw-r--r-- | assemble.h | 17 | ||||
-rw-r--r-- | disasm.c | 667 | ||||
-rw-r--r-- | disasm.h | 18 | ||||
-rw-r--r-- | float.c | 389 | ||||
-rw-r--r-- | float.h | 16 | ||||
-rw-r--r-- | insns.dat | 984 | ||||
-rw-r--r-- | insns.h | 66 | ||||
-rw-r--r-- | insns.pl | 160 | ||||
-rw-r--r-- | internal.doc | 268 | ||||
-rw-r--r-- | labels.c | 292 | ||||
-rw-r--r-- | labels.h | 17 | ||||
-rw-r--r-- | lcc/Readme | 57 | ||||
-rw-r--r-- | lcc/bind.c | 23 | ||||
-rw-r--r-- | lcc/lin-aout.c | 44 | ||||
-rw-r--r-- | lcc/lin-elf.c | 45 | ||||
-rw-r--r-- | lcc/x86nasm.md | 703 | ||||
-rw-r--r-- | misc/magic | 6 | ||||
-rw-r--r-- | misc/nasm.sl | 305 | ||||
-rw-r--r-- | names.c | 79 | ||||
-rw-r--r-- | nasm.c | 648 | ||||
-rw-r--r-- | nasm.doc | 996 | ||||
-rw-r--r-- | nasm.h | 443 | ||||
-rw-r--r-- | nasmlib.c | 488 | ||||
-rw-r--r-- | nasmlib.h | 115 | ||||
-rw-r--r-- | ndisasm.c | 270 | ||||
-rw-r--r-- | ndisasm.doc | 199 | ||||
-rw-r--r-- | outaout.c | 466 | ||||
-rw-r--r-- | outas86.c | 548 | ||||
-rw-r--r-- | outbin.c | 303 | ||||
-rw-r--r-- | outcoff.c | 611 | ||||
-rw-r--r-- | outdbg.c | 138 | ||||
-rw-r--r-- | outelf.c | 620 | ||||
-rw-r--r-- | outform.c | 42 | ||||
-rw-r--r-- | outform.h | 167 | ||||
-rw-r--r-- | outobj.c | 1229 | ||||
-rw-r--r-- | outrdf.c | 467 | ||||
-rw-r--r-- | parser.c | 1306 | ||||
-rw-r--r-- | parser.h | 18 | ||||
-rw-r--r-- | rdoff/Makefile | 43 | ||||
-rw-r--r-- | rdoff/collectn.c | 40 | ||||
-rw-r--r-- | rdoff/collectn.h | 22 | ||||
-rw-r--r-- | rdoff/ldrdf.c | 540 | ||||
-rw-r--r-- | rdoff/rdf.doc | 99 | ||||
-rw-r--r-- | rdoff/rdfdump.c | 156 | ||||
-rw-r--r-- | rdoff/rdfload.c | 173 | ||||
-rw-r--r-- | rdoff/rdfload.h | 29 | ||||
-rw-r--r-- | rdoff/rdoff.c | 367 | ||||
-rw-r--r-- | rdoff/rdoff.h | 112 | ||||
-rw-r--r-- | rdoff/rdx.c | 61 | ||||
-rw-r--r-- | rdoff/symtab.c | 80 | ||||
-rw-r--r-- | rdoff/symtab.h | 22 | ||||
-rw-r--r-- | sync.c | 84 | ||||
-rw-r--r-- | sync.h | 16 | ||||
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/aouttest.asm | 83 | ||||
-rw-r--r-- | test/aouttest.c | 35 | ||||
-rw-r--r-- | test/bintest.asm | 56 | ||||
-rw-r--r-- | test/cofftest.asm | 82 | ||||
-rw-r--r-- | test/cofftest.c | 34 | ||||
-rw-r--r-- | test/elftest.asm | 83 | ||||
-rw-r--r-- | test/elftest.c | 35 | ||||
-rw-r--r-- | test/inc1.asm | 4 | ||||
-rw-r--r-- | test/inc2.asm | 8 | ||||
-rw-r--r-- | test/inctest.asm | 15 | ||||
-rw-r--r-- | test/objlink.c | 30 | ||||
-rw-r--r-- | test/objtest.asm | 82 |
72 files changed, 16938 insertions, 0 deletions
@@ -0,0 +1,75 @@ +Terms and Conditions for the use of the Netwide Assembler +========================================================= + +Can I have the gist without reading the legalese? +------------------------------------------------- + +Basically, NASM is free. You can't charge for it. You can copy it as +much as you like. You can incorporate it, or bits of it, into other +free programs if you want. (But we want to know about it if you do, +and we want to be mentioned in the credits.) We may well allow you +to incorporate it into commercial software too, but we'll probably +demand some money for it, and we'll certainly demand to be given +credit. And in extreme cases (although I can't immediately think of +a reason we might actually want to do this) we may refuse to let you +do it at all. + +NASM LICENCE AGREEMENT +====================== + +By "the Software" this licence refers to the complete contents of +the NASM archive, excluding this licence document itself, and +excluding the contents of the `test' directory. The Netwide +Disassembler, NDISASM, is specifically included under this licence. + +I. The Software is freely redistributable; anyone may copy the +Software, or parts of the Software, and give away as many copies as +they like to anyone, as long as this licence document is kept with +the Software. Charging a fee for the Software is prohibited, +although a fee may be charged for the act of transferring a copy, +and you can offer warranty protection and charge a fee for that. + +II. The Software, or parts thereof, may be incorporated into other +freely redistributable software (by which we mean software that may +be obtained free of charge) without requiring permission from the +authors, as long as due credit is given to the authors of the +Software in the resulting work, as long as the authors are informed +of this action, and as long as those parts of the Software that are +used remain under this licence. + +III. The Software, or parts thereof, may be incorporated into other +software which is not freely redistributable (i.e. software for +which a fee is charged), as long as permission is granted from the +authors of the Software. The authors reserve the right to grant this +permission only for a fee, which may at our option take the form of +royalty payments. The authors also reserve the right to refuse to +grant permission if they deem it necessary. + +IV. You may not copy, modify or distribute the Software except under +the terms given in this licence document. You may not sublicense the +Software or in any way place it under any other licence than this +one. Since you have not signed this licence, you are not of course +required to accept it; however, no other licence applies to the +Software, and nothing else grants you any permission to copy, +modify, sublicense or distribute the Software in any way. These +actions are therefore prohibited if you do not accept this licence. + +V. There is no warranty for the Software, to the extent permitted by +applicable law. The authors provide the Software "as is" without +warranty of any kind, either expressed or implied, including but not +limited to the implied warranties of merchantability and fitness for +a particular purpose. The entire risk as to the quality and +performance of the Software is with you. Should the Software prove +defective, you assume the cost of all necessary servicing, repair or +correction. + +VI. In no event, unless required by applicable law or agreed to in +writing, will any of the authors be liable to you for damages, +including any general, special, incidental or consequential damages, +arising out of the use or the inability to use the Software, +including but not limited to loss of data or data being rendered +inaccurate or a failure of the Software to operate with any other +programs, even if you have been advised of the possibility of such +damages. + +END OF LICENCE AGREEMENT diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..605ab41 --- /dev/null +++ b/Makefile @@ -0,0 +1,93 @@ +# Makefile for the Netwide Assembler +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed for use under Unix (probably fairly +# portably). It can also be used without change to build NASM using +# DJGPP. The makefile "Makefile.dos" can be used to build NASM using +# a 16-bit DOS C compiler such as Microsoft C. +# +# The `make dist' section at the end of the Makefile is not +# guaranteed to work anywhere except Linux. Come to think of it, +# I'm not sure I want to guarantee it to work anywhere except on +# _my_ computer. :-) + +CC = gcc +CCFLAGS = -c -g -O -Wall -ansi -pedantic +LINK = gcc +LINKFLAGS = -o nasm +DLINKFLAGS = -o ndisasm +LIBRARIES = +STRIP = strip +EXE =# +OBJ = o# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ + assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ + outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ + outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +nasm$(EXE): $(NASMOBJS) + $(LINK) $(LINKFLAGS) $(NASMOBJS) $(LIBRARIES) + +ndisasm$(EXE): $(NDISASMOBJS) + $(LINK) $(DLINKFLAGS) $(NDISASMOBJS) $(LIBRARIES) + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +insnsa.$(OBJ): insnsa.c nasm.h insns.h +insnsd.$(OBJ): insnsd.c nasm.h insns.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +# These two source files are automagically generated from a single +# instruction-table file by a Perl script. They're distributed, +# though, so it isn't necessary to have Perl just to recompile NASM +# from the distribution. + +AUTOSRCS = insnsa.c insnsd.c +$(AUTOSRCS): insns.dat insns.pl + perl insns.pl + +clean : + rm -f $(NASMOBJS) $(NDISASMOBJS) nasm$(EXE) ndisasm$(EXE) + make -C rdoff clean + make -C test clean + +# Here the `make dist' section begins. Nothing is guaranteed hereafter +# unless you're using the Makefile under Linux, running bash, with +# gzip, GNU tar and a sensible version of zip readily available. + +DOSEXES = nasm.exe ndisasm.exe +MANPAGES = nasm.man ndisasm.man + +.SUFFIXES: .man .1 + +.1.man: + -man ./$< | ul > $@ + +dist: $(AUTOSRCS) $(MANPAGES) $(DOSEXES) clean + makedist.sh diff --git a/Makefile.bor b/Makefile.bor new file mode 100644 index 0000000..75aed4f --- /dev/null +++ b/Makefile.bor @@ -0,0 +1,76 @@ +# Makefile for the Netwide Assembler under 16-bit DOS +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed to build NASM using a 16-bit DOS C +# compiler such as Borland C, and has been tested with Borland C 2.3 +# and Borland Make. + +# CC = cl +# CCFLAGS = /c /O /AL +# LINK = cl +CC = bcc +CCFLAGS = -c -O -ml -A +LINK = tlink /c /Lc:\bc\lib +LINKFLAGS = +LIBRARIES = +EXE = .exe# +OBJ = obj# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS1 = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) +NASMOBJS2 = assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) +NASMOBJS3 = outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) +NASMOBJS4 = outobj.$(OBJ) outas86.$(OBJ) outdbg.$(OBJ) outrdf.$(OBJ) + +NASMOBJS = $(NASMOBJS1) $(NASMOBJS2) $(NASMOBJS3) $(NASMOBJS4) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +# We have to have a horrible kludge here to get round the 128 character +# limit, as usual... +nasm$(EXE): $(NASMOBJS) +# $(LINK) /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj + echo c0l.obj $(NASMOBJS1) +> nasmobjs.tmp + echo $(NASMOBJS2) +>> nasmobjs.tmp + echo $(NASMOBJS3) +>> nasmobjs.tmp + echo $(NASMOBJS4),nasm.exe,,cl.lib, >> nasmobjs.tmp + $(LINK) /Tde @nasmobjs.tmp + +ndisasm$(EXE): $(NDISASMOBJS) +# $(LINK) /Fendisasm.exe $(NDISASMOBJS) + $(LINK) /Tde $(NDISASMOBJS),ndisasm.exe,,cl.lib, + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +insnsa.$(OBJ): insnsa.c nasm.h insns.h +insnsd.$(OBJ): insnsd.c nasm.h insns.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +clean : + del *.obj + del nasm$(EXE) + del ndisasm$(EXE) diff --git a/Makefile.dos b/Makefile.dos new file mode 100644 index 0000000..cb75708 --- /dev/null +++ b/Makefile.dos @@ -0,0 +1,72 @@ +# Makefile for the Netwide Assembler under 16-bit DOS +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed to build NASM using a 16-bit DOS C +# compiler such as Microsoft C, provided you have a compatible MAKE. +# It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I +# know it's silly, but...) + +CC = cl +CCFLAGS = /c /O /AL +LINK = cl +LINKFLAGS = +LIBRARIES = +EXE = .exe# +OBJ = obj# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ + assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ + outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ + outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +# We have to have a horrible kludge here to get round the 128 character +# limit, as usual... +nasm$(EXE): $(NASMOBJS) + cl /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj + +ndisasm$(EXE): $(NDISASMOBJS) + cl /Fendisasm.exe $(NDISASMOBJS) + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +# Another grotty hack: QC is less likely to run out of memory than +# CL proper; and we don't need any optimisation in these modules +# since they're just data. +insnsa.$(OBJ): insnsa.c nasm.h insns.h + qcl /c /AL insnsa.c +insnsd.$(OBJ): insnsd.c nasm.h insns.h + qcl /c /AL insnsd.c + +clean : + del *.obj + del nasm$(EXE) + del ndisasm$(EXE) @@ -0,0 +1,54 @@ +This is a distribution of NASM, the Netwide Assembler. NASM is a +prototype general-purpose x86 assembler. It will currently output +flat-form binary files, a.out, COFF and ELF Unix object files, +Microsoft 16-bit DOS and Win32 object files, the as86 object format, +and a home-grown format called RDF. + +Also included is NDISASM, a prototype x86 binary-file disassembler +which uses the same instruction table as NASM. + +To install NASM, you will need GCC. Type `make', and then when it +has finished copy the file `nasm' (and maybe `ndisasm') to a +directory on your search path (I use /usr/local/bin on my linux +machine at home, and ~/bin on other machines where I don't have root +access). You may also want to copy the man page `nasm.1' (and maybe +`ndisasm.1') to somewhere sensible. + +If you want to build a restricted version of NASM containing only +some of the object file formats, you can achieve this by adding +#defines to `outform.h' (see the file itself for documentation), or +equivalently by adding compiler command line options in the +Makefile. + +There is a machine description file for the `LCC' retargetable C +compiler, in the directory `lcc', along with instructions for its +use. This means that NASM can now be used as the code-generator back +end for a useful C compiler. + +Michael `Wuschel' Tippach has ported his DOS extender `WDOSX' to +enable it to work with the 32-bit binary files NASM can output: the +original extender and his port `WDOSX/N' are available from his web +page, http://www.geocities.com/SiliconValley/Park/4493. + +The `misc' directory contains `nasm.sl', a NASM editing mode for the +JED programmers' editor (see http://space.mit.edu/~davis/jed.html +for details about JED). The comment at the start of the file gives +instructions on how to install the mode. This directory also +contains a file (`magic') containing lines to add to /etc/magic on +Unix systems to allow the `file' command to recognise RDF files. + +The `rdoff' directory contains sources for a linker and loader for +the RDF object file format, to run under Linux, and also +documentation on the internal structure of RDF files. + +For information about how you can distribute and use NASM, see the +file Licence. We were tempted to put NASM under the GPL, but decided +that in many ways it was too restrictive for developers. + +For information about how to use NASM, see `nasm.doc'. For +information about how to use NDISASM, see `ndisasm.doc'. For +information about the internal structure of NASM, see +`internals.doc'. + +Bug reports (and patches if you can) should be sent to +jules@dcs.warwick.ac.uk or anakin@pobox.com. diff --git a/assemble.c b/assemble.c new file mode 100644 index 0000000..bab6f29 --- /dev/null +++ b/assemble.c @@ -0,0 +1,945 @@ +/* assemble.c code generation for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * the actual codes (C syntax, i.e. octal): + * \0 - terminates the code. (Unless it's a literal of course.) + * \1, \2, \3 - that many literal bytes follow in the code stream + * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS + * (POP is never used for CS) depending on operand 0 + * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending + * on operand 0 + * \10, \11, \12 - a literal byte follows in the code stream, to be added + * to the register value of operand 0, 1 or 2 + * \17 - encodes the literal byte 0. (Some compilers don't take + * kindly to a zero byte in the _middle_ of a compile time + * string constant, so I had to put this hack in.) + * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2 + * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2 + * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2 + * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2 + * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit + * assembly mode or the address-size override on the operand + * \37 - a word constant, from the _segment_ part of operand 0 + * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2 + * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2 + * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2 + * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit + * assembly mode or the address-size override on the operand + * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2 + * \1ab - a ModRM, calculated on EA in operand a, with the spare + * field the register value of operand b. + * \2ab - a ModRM, calculated on EA in operand a, with the spare + * field equal to digit b. + * \30x - might be an 0x67 byte, depending on the address size of + * the memory reference in operand x. + * \310 - indicates fixed 16-bit address size, i.e. optional 0x67. + * \311 - indicates fixed 32-bit address size, i.e. optional 0x67. + * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66. + * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66. + * \322 - indicates that this instruction is only valid when the + * operand size is the default (instruction to disassembler, + * generates no code in the assembler) + * \330 - a literal byte follows in the code stream, to be added + * to the condition code value of the instruction. + * \340 - reserve <operand 0> bytes of uninitialised storage. + * Operand 0 had better be a segmentless constant. + */ + +#include <stdio.h> +#include <string.h> + +#include "nasm.h" +#include "assemble.h" +#include "insns.h" + +extern struct itemplate *nasm_instructions[]; + +typedef struct { + int sib_present; /* is a SIB byte necessary? */ + int bytes; /* # of bytes of offset needed */ + int size; /* lazy - this is sib+bytes+1 */ + unsigned char modrm, sib; /* the bytes themselves */ +} ea; + +static efunc errfunc; +static struct ofmt *outfmt; + +static long calcsize (long, long, int, insn *, char *); +static void gencode (long, long, int, insn *, char *, long); +static int regval (operand *o); +static int matches (struct itemplate *, insn *); +static ea *process_ea (operand *, ea *, int, int); +static int chsize (operand *, int); + +long assemble (long segment, long offset, int bits, + insn *instruction, struct ofmt *output, efunc error) { + int j, itimes, size_prob; + long insn_end; + long start = offset; + struct itemplate *temp; + + errfunc = error; /* to pass to other functions */ + outfmt = output; /* likewise */ + + if (instruction->opcode == -1) + return 0; + + if (instruction->opcode == I_DB || + instruction->opcode == I_DW || + instruction->opcode == I_DD || + instruction->opcode == I_DQ || + instruction->opcode == I_DT) { + extop *e; + long osize, wsize = 0; /* placate gcc */ + int t = instruction->times; + + switch (instruction->opcode) { + case I_DB: wsize = 1; break; + case I_DW: wsize = 2; break; + case I_DD: wsize = 4; break; + case I_DQ: wsize = 8; break; + case I_DT: wsize = 10; break; + } + + while (t--) { + for (e = instruction->eops; e; e = e->next) { + osize = 0; + if (e->type == EOT_DB_NUMBER) { + if (wsize == 1) { + if (e->segment != NO_SEG) + errfunc (ERR_NONFATAL, + "one-byte relocation attempted"); + else { + unsigned char c = e->offset; + outfmt->output (segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + } + } else if (wsize > 5) { + errfunc (ERR_NONFATAL, "integer supplied to a D%c" + " instruction", wsize==8 ? 'Q' : 'T'); + } else + outfmt->output (segment, &e->offset, + OUT_ADDRESS+wsize, e->segment, + e->wrt); + offset += wsize; + } else if (e->type == EOT_DB_STRING) { + int align; + + align = (-e->stringlen) % wsize; + if (align < 0) + align += wsize; + outfmt->output (segment, e->stringval, + OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG); + if (align) + outfmt->output (segment, "\0\0\0\0", + OUT_RAWDATA+align, NO_SEG, NO_SEG); + offset += e->stringlen + align; + } + } + } + return offset - start; + } + + size_prob = FALSE; + temp = nasm_instructions[instruction->opcode]; + while (temp->opcode != -1) { + int m = matches (temp, instruction); + if (m == 100) { /* matches! */ + char *codes = temp->code; + long insn_size = calcsize(segment, offset, bits, + instruction, codes); + itimes = instruction->times; + if (insn_size < 0) /* shouldn't be, on pass two */ + error (ERR_PANIC, "errors made it through from pass one"); + else while (itimes--) { + insn_end = offset + insn_size; + for (j=0; j<instruction->nprefix; j++) { + unsigned char c; + switch (instruction->prefixes[j]) { + case P_LOCK: + c = 0xF0; break; + case P_REPNE: case P_REPNZ: + c = 0xF2; break; + case P_REPE: case P_REPZ: case P_REP: + c = 0xF3; break; + case R_CS: c = 0x2E; break; + case R_DS: c = 0x3E; break; + case R_ES: c = 0x26; break; + case R_FS: c = 0x64; break; + case R_GS: c = 0x65; break; + case R_SS: c = 0x36; break; + case P_A16: + if (bits == 16) + c = 0; /* no prefix */ + else + c = 0x67; + break; + case P_A32: + if (bits == 32) + c = 0; /* no prefix */ + else + c = 0x67; + break; + case P_O16: + if (bits == 16) + c = 0; /* no prefix */ + else + c = 0x66; + break; + case P_O32: + if (bits == 32) + c = 0; /* no prefix */ + else + c = 0x66; + break; + default: + error (ERR_PANIC, + "invalid instruction prefix"); + } + if (c != 0) + outfmt->output (segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + offset++; + } + gencode (segment, offset, bits, instruction, codes, insn_end); + offset += insn_size; + } + return offset - start; + } else if (m > 0) { + size_prob = m; + } + temp++; + } + if (temp->opcode == -1) { /* didn't match any instruction */ + if (size_prob == 1) /* would have matched, but for size */ + error (ERR_NONFATAL, "operation size not specified"); + else if (size_prob == 2) + error (ERR_NONFATAL, "mismatch in operand sizes"); + else + error (ERR_NONFATAL, + "invalid combination of opcode and operands"); + } + return 0; +} + +long insn_size (long segment, long offset, int bits, + insn *instruction, efunc error) { + struct itemplate *temp; + + errfunc = error; /* to pass to other functions */ + + if (instruction->opcode == -1) + return 0; + + if (instruction->opcode == I_DB || + instruction->opcode == I_DW || + instruction->opcode == I_DD || + instruction->opcode == I_DQ || + instruction->opcode == I_DT) { + extop *e; + long isize, osize, wsize = 0; /* placate gcc */ + + isize = 0; + switch (instruction->opcode) { + case I_DB: wsize = 1; break; + case I_DW: wsize = 2; break; + case I_DD: wsize = 4; break; + case I_DQ: wsize = 8; break; + case I_DT: wsize = 10; break; + } + + for (e = instruction->eops; e; e = e->next) { + long align; + + osize = 0; + if (e->type == EOT_DB_NUMBER) + osize = 1; + else if (e->type == EOT_DB_STRING) + osize = e->stringlen; + + align = (-osize) % wsize; + if (align < 0) + align += wsize; + isize += osize + align; + } + return isize * instruction->times; + } + + temp = nasm_instructions[instruction->opcode]; + while (temp->opcode != -1) { + if (matches(temp, instruction) == 100) { + /* we've matched an instruction. */ + long isize; + char *codes = temp->code; + int j; + + isize = calcsize(segment, offset, bits, instruction, codes); + if (isize < 0) + return -1; + for (j = 0; j < instruction->nprefix; j++) { + if ((instruction->prefixes[j] != P_A16 && + instruction->prefixes[j] != P_O16 && bits==16) || + (instruction->prefixes[j] != P_A32 && + instruction->prefixes[j] != P_O32 && bits==32)) + isize++; + } + return isize * instruction->times; + } + temp++; + } + return -1; /* didn't match any instruction */ +} + +static long calcsize (long segment, long offset, int bits, + insn *ins, char *codes) { + long length = 0; + unsigned char c; + + while (*codes) switch (c = *codes++) { + case 01: case 02: case 03: + codes += c, length += c; break; + case 04: case 05: case 06: case 07: + length++; break; + case 010: case 011: case 012: + codes++, length++; break; + case 017: + length++; break; + case 014: case 015: case 016: + length++; break; + case 020: case 021: case 022: + length++; break; + case 024: case 025: case 026: + length++; break; + case 030: case 031: case 032: + length += 2; break; + case 034: case 035: case 036: + length += ((ins->oprs[c-034].addr_size ? + ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break; + case 037: + length += 2; break; + case 040: case 041: case 042: + length += 4; break; + case 050: case 051: case 052: + length++; break; + case 060: case 061: case 062: + length += 2; break; + case 064: case 065: case 066: + length += ((ins->oprs[c-064].addr_size ? + ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break; + case 070: case 071: case 072: + length += 4; break; + case 0300: case 0301: case 0302: + length += chsize (&ins->oprs[c-0300], bits); + break; + case 0310: + length += (bits==32); + break; + case 0311: + length += (bits==16); + break; + case 0312: + break; + case 0320: + length += (bits==32); + break; + case 0321: + length += (bits==16); + break; + case 0322: + break; + case 0330: + codes++, length++; break; + case 0340: case 0341: case 0342: + if (ins->oprs[0].segment != NO_SEG) + errfunc (ERR_NONFATAL, "attempt to reserve non-constant" + " quantity of BSS space"); + else + length += ins->oprs[0].offset << (c-0340); + break; + default: /* can't do it by 'case' statements */ + if (c>=0100 && c<=0277) { /* it's an EA */ + ea ea_data; + + if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0)) { + errfunc (ERR_NONFATAL, "invalid effective address"); + return -1; + } else + length += ea_data.size; + } else + errfunc (ERR_PANIC, "internal instruction table corrupt" + ": instruction code 0x%02X given", c); + } + return length; +} + +static void gencode (long segment, long offset, int bits, + insn *ins, char *codes, long insn_end) { + static char condval[] = { /* conditional opcodes */ + 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2, + 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5, + 0x0, 0xA, 0xA, 0xB, 0x8, 0x4 + }; + unsigned char c, bytes[4]; + long data, size; + + while (*codes) switch (c = *codes++) { + case 01: case 02: case 03: + outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG); + codes += c; + offset += c; + break; + case 04: case 06: + switch (ins->oprs[0].basereg) { + case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break; + case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break; + case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break; + case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break; + default: + errfunc (ERR_PANIC, "bizarre 8086 segment register received"); + } + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset++; + break; + case 05: case 07: + switch (ins->oprs[0].basereg) { + case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break; + case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break; + default: + errfunc (ERR_PANIC, "bizarre 386 segment register received"); + } + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset++; + break; + case 010: case 011: case 012: + bytes[0] = *codes++ + regval(&ins->oprs[c-010]); + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 017: + bytes[0] = 0; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 014: case 015: case 016: + if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127) + errfunc (ERR_WARNING, "signed byte value exceeds bounds"); + bytes[0] = ins->oprs[c-014].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 020: case 021: case 022: + if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255) + errfunc (ERR_WARNING, "byte value exceeds bounds"); + bytes[0] = ins->oprs[c-020].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 024: case 025: case 026: + if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255) + errfunc (ERR_WARNING, "unsigned byte value exceeds bounds"); + bytes[0] = ins->oprs[c-024].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 030: case 031: case 032: + if (ins->oprs[c-030].segment == NO_SEG && + ins->oprs[c-030].wrt == NO_SEG && + (ins->oprs[c-030].offset < -32768 || + ins->oprs[c-030].offset > 65535)) + errfunc (ERR_WARNING, "word value exceeds bounds"); + data = ins->oprs[c-030].offset; + outfmt->output (segment, &data, OUT_ADDRESS+2, + ins->oprs[c-030].segment, ins->oprs[c-030].wrt); + offset += 2; + break; + case 034: case 035: case 036: + data = ins->oprs[c-034].offset; + size = ((ins->oprs[c-034].addr_size ? + ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); + if (size==16 && (data < -32768 || data > 65535)) + errfunc (ERR_WARNING, "word value exceeds bounds"); + outfmt->output (segment, &data, OUT_ADDRESS+size, + ins->oprs[c-034].segment, ins->oprs[c-034].wrt); + offset += size; + break; + case 037: + if (ins->oprs[0].segment == NO_SEG) + errfunc (ERR_NONFATAL, "value referenced by FAR is not" + " relocatable"); + data = 0L; + outfmt->output (segment, &data, OUT_ADDRESS+2, + outfmt->segbase(1+ins->oprs[0].segment), + ins->oprs[0].wrt); + offset += 2; + break; + case 040: case 041: case 042: + data = ins->oprs[c-040].offset; + outfmt->output (segment, &data, OUT_ADDRESS+4, + ins->oprs[c-040].segment, ins->oprs[c-040].wrt); + offset += 4; + break; + case 050: case 051: case 052: + if (ins->oprs[c-050].segment != segment) + errfunc (ERR_NONFATAL, "short relative jump outside segment"); + data = ins->oprs[c-050].offset - insn_end; + if (data > 127 || data < -128) + errfunc (ERR_NONFATAL, "short jump is out of range"); + bytes[0] = data; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 060: case 061: case 062: + if (ins->oprs[c-060].segment != segment) { + data = ins->oprs[c-060].offset; + outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset, + ins->oprs[c-060].segment, ins->oprs[c-060].wrt); + } else { + data = ins->oprs[c-060].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG); + } + offset += 2; + break; + case 064: case 065: case 066: + size = ((ins->oprs[c-064].addr_size ? + ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); + if (ins->oprs[c-064].segment != segment) { + data = ins->oprs[c-064].offset; + size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR); + outfmt->output (segment, &data, size+insn_end-offset, + ins->oprs[c-064].segment, ins->oprs[c-064].wrt); + size = (bits == 16 ? 2 : 4); + } else { + data = ins->oprs[c-064].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG); + } + offset += size; + break; + case 070: case 071: case 072: + if (ins->oprs[c-070].segment != segment) { + data = ins->oprs[c-070].offset; + outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset, + ins->oprs[c-070].segment, ins->oprs[c-070].wrt); + } else { + data = ins->oprs[c-070].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG); + } + offset += 4; + break; + case 0300: case 0301: case 0302: + if (chsize (&ins->oprs[c-0300], bits)) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0310: + if (bits==32) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0311: + if (bits==16) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0312: + break; + case 0320: + if (bits==32) { + *bytes = 0x66; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0321: + if (bits==16) { + *bytes = 0x66; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0322: + break; + case 0330: + *bytes = *codes++ + condval[ins->condition]; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 0340: case 0341: case 0342: + if (ins->oprs[0].segment != NO_SEG) + errfunc (ERR_PANIC, "non-constant BSS size in pass two"); + else { + long size = ins->oprs[0].offset << (c-0340); + outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG); + offset += size; + } + break; + default: /* can't do it by 'case' statements */ + if (c>=0100 && c<=0277) { /* it's an EA */ + ea ea_data; + int rfield; + unsigned char *p; + long s; + + if (c<=0177) /* pick rfield from operand b */ + rfield = regval (&ins->oprs[c&7]); + else /* rfield is constant */ + rfield = c & 7; + if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield)) + errfunc (ERR_NONFATAL, "invalid effective address"); + + p = bytes; + *p++ = ea_data.modrm; + if (ea_data.sib_present) + *p++ = ea_data.sib; + /* + * the cast in the next line is to placate MS C... + */ + outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes), + NO_SEG, NO_SEG); + s = p-bytes; + + switch (ea_data.bytes) { + case 0: + break; + case 1: + *bytes = ins->oprs[(c>>3)&7].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + s++; + break; + case 2: + case 4: + data = ins->oprs[(c>>3)&7].offset; + outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes, + ins->oprs[(c>>3)&7].segment, + ins->oprs[(c>>3)&7].wrt); + s += ea_data.bytes; + break; + } + offset += s; + } else + errfunc (ERR_PANIC, "internal instruction table corrupt" + ": instruction code 0x%02X given", c); + } +} + +static int regval (operand *o) { + switch (o->basereg) { + case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0: + case R_ST0: case R_MM0: + return 0; + case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1: + case R_MM1: + return 1; + case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2: + case R_ST2: case R_MM2: + return 2; + case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3: + case R_TR3: case R_ST3: case R_MM3: + return 3; + case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4: + case R_ST4: case R_MM4: + return 4; + case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5: + case R_MM5: + return 5; + case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6: + case R_MM6: + return 6; + case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7: + case R_MM7: + return 7; + default: /* panic */ + errfunc (ERR_PANIC, "invalid register operand given to regval()"); + return 0; + } +} + +static int matches (struct itemplate *itemp, insn *instruction) { + int i, size, oprs, ret; + + ret = 100; + + /* + * Check the opcode + */ + if (itemp->opcode != instruction->opcode) return 0; + + /* + * Count the operands + */ + if (itemp->operands != instruction->operands) return 0; + + /* + * Check that no spurious colons or TOs are present + */ + for (i=0; i<itemp->operands; i++) + if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO)) + return 0; + + /* + * Check that the operand flags all match up + */ + for (i=0; i<itemp->operands; i++) + if (itemp->opd[i] & ~instruction->oprs[i].type || + ((itemp->opd[i] & SIZE_MASK) && + ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) { + if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) || + (instruction->oprs[i].type & SIZE_MASK)) + return 0; + else + ret = 1; + } + + /* + * Check operand sizes + */ + if (itemp->flags & IF_SB) { + size = BITS8; + oprs = itemp->operands; + } else if (itemp->flags & IF_SD) { + size = BITS32; + oprs = itemp->operands; + } else if (itemp->flags & (IF_SM | IF_SM2)) { + oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands); + size = 0; /* placate gcc */ + for (i=0; i<oprs; i++) + if ( (size = itemp->opd[i] & SIZE_MASK) != 0) + break; + } else { + size = 0; + oprs = itemp->operands; + } + + for (i=0; i<itemp->operands; i++) + if (!(itemp->opd[i] & SIZE_MASK) && + (instruction->oprs[i].type & SIZE_MASK & ~size)) + ret = 2; + + return ret; +} + +static ea *process_ea (operand *input, ea *output, int addrbits, int rfield) { + if (!(REGISTER & ~input->type)) { /* it's a single register */ + static int regs[] = { + R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL, + R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL, + R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH, + R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH + }; + int i; + + for (i=0; i<elements(regs); i++) + if (input->basereg == regs[i]) break; + if (i<elements(regs)) { + output->sib_present = FALSE;/* no SIB necessary */ + output->bytes = 0; /* no offset necessary either */ + output->modrm = 0xC0 | (rfield << 3) | (i/4); + } else + return NULL; + } else { /* it's a memory reference */ + if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) { + /* it's a pure offset */ + if (input->addr_size) + addrbits = input->addr_size; + output->sib_present = FALSE; + output->bytes = (addrbits==32 ? 4 : 2); + output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3); + } else { /* it's an indirection */ + int i=input->indexreg, b=input->basereg, s=input->scale; + long o=input->offset, seg=input->segment; + + if (s==0) i = -1; /* make this easy, at least */ + + if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX + || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI + || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX + || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) { + /* it must be a 32-bit memory reference. Firstly we have + * to check that all registers involved are type Exx. */ + if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX + && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI) + return NULL; + if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX + && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI) + return NULL; + + /* While we're here, ensure the user didn't specify WORD. */ + if (input->addr_size == 16) + return NULL; + + /* now reorganise base/index */ + if (b==i) /* convert EAX+2*EAX to 3*EAX */ + b = -1, s++; + if (b==-1 && s==1) /* single register should be base */ + b = i, i = -1; + if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1) + b = i, s--; /* convert 3*EAX to EAX+2*EAX */ + if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1)) + return NULL; /* wrong, for various reasons */ + + if (i==-1 && b!=R_ESP) {/* no SIB needed */ + int mod, rm; + switch(b) { + case R_EAX: rm = 0; break; + case R_ECX: rm = 1; break; + case R_EDX: rm = 2; break; + case R_EBX: rm = 3; break; + case R_EBP: rm = 5; break; + case R_ESI: rm = 6; break; + case R_EDI: rm = 7; break; + case -1: rm = 5; break; + default: /* should never happen */ + return NULL; + } + if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG)) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + output->sib_present = FALSE; + output->bytes = (b==-1 || mod==2 ? 4 : mod); + output->modrm = (mod<<6) | (rfield<<3) | rm; + } else { /* we need a SIB */ + int mod, scale, index, base; + + switch (b) { + case R_EAX: base = 0; break; + case R_ECX: base = 1; break; + case R_EDX: base = 2; break; + case R_EBX: base = 3; break; + case R_ESP: base = 4; break; + case R_EBP: case -1: base = 5; break; + case R_ESI: base = 6; break; + case R_EDI: base = 7; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + switch (i) { + case R_EAX: index = 0; break; + case R_ECX: index = 1; break; + case R_EDX: index = 2; break; + case R_EBX: index = 3; break; + case -1: index = 4; break; + case R_EBP: index = 5; break; + case R_ESI: index = 6; break; + case R_EDI: index = 7; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + if (i==-1) s = 1; + switch (s) { + case 1: scale = 0; break; + case 2: scale = 1; break; + case 4: scale = 2; break; + case 8: scale = 3; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG)) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + + output->sib_present = TRUE; + output->bytes = (b==-1 || mod==2 ? 4 : mod); + output->modrm = (mod<<6) | (rfield<<3) | 4; + output->sib = (scale<<6) | (index<<3) | base; + } + } else { /* it's 16-bit */ + int mod, rm; + + /* check all registers are BX, BP, SI or DI */ + if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) || + (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI)) + return NULL; + + /* ensure the user didn't specify DWORD */ + if (input->addr_size == 32) + return NULL; + + if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */ + if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */ + if ((b==R_SI || b==R_DI) && i!=-1) + b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */ + if (b==i) return NULL;/* shouldn't ever happen, in theory */ + if (i!=-1 && b!=-1 && + (i==R_BP || i==R_BX || b==R_SI || b==R_DI)) + return NULL; /* invalid combinations */ + if (b==-1) /* pure offset: handled above */ + return NULL; /* so if it gets to here, panic! */ + + rm = -1; + if (i!=-1) + switch (i*256 + b) { + case R_SI*256+R_BX: rm=0; break; + case R_DI*256+R_BX: rm=1; break; + case R_SI*256+R_BP: rm=2; break; + case R_DI*256+R_BP: rm=3; break; + } + else + switch (b) { + case R_SI: rm=4; break; + case R_DI: rm=5; break; + case R_BP: rm=6; break; + case R_BX: rm=7; break; + } + if (rm==-1) /* can't happen, in theory */ + return NULL; /* so panic if it does */ + + if (o==0 && seg==NO_SEG && rm!=6) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + + output->sib_present = FALSE; /* no SIB - it's 16-bit */ + output->bytes = mod; /* bytes of offset needed */ + output->modrm = (mod<<6) | (rfield<<3) | rm; + } + } + } + output->size = 1 + output->sib_present + output->bytes; + return output; +} + +static int chsize (operand *input, int addrbits) { + if (!(MEMORY & ~input->type)) { + int i=input->indexreg, b=input->basereg; + + if (input->scale==0) i = -1; + + if (i == -1 && b == -1) /* pure offset */ + return (input->addr_size != 0 && input->addr_size != addrbits); + + if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX + || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI + || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX + || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) + return (addrbits==16); + else + return (addrbits==32); + } else + return 0; +} diff --git a/assemble.h b/assemble.h new file mode 100644 index 0000000..cb93a2c --- /dev/null +++ b/assemble.h @@ -0,0 +1,17 @@ +/* assemble.h header file for assemble.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_ASSEMBLE_H +#define NASM_ASSEMBLE_H + +long insn_size (long segment, long offset, int bits, + insn *instruction, efunc error); +long assemble (long segment, long offset, int bits, + insn *instruction, struct ofmt *output, efunc error); + +#endif diff --git a/disasm.c b/disasm.c new file mode 100644 index 0000000..8ad263b --- /dev/null +++ b/disasm.c @@ -0,0 +1,667 @@ +/* disasm.c where all the _work_ gets done in the Netwide Disassembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include <stdio.h> +#include <string.h> + +#include "nasm.h" +#include "disasm.h" +#include "sync.h" +#include "insns.h" + +#include "names.c" + +extern struct itemplate **itable[]; + +/* + * Flags that go into the `segment' field of `insn' structures + * during disassembly. + */ +#define SEG_RELATIVE 1 +#define SEG_32BIT 2 +#define SEG_RMREG 4 +#define SEG_DISP8 8 +#define SEG_DISP16 16 +#define SEG_DISP32 32 +#define SEG_NODISP 64 +#define SEG_SIGNED 128 + +static int whichreg(long regflags, int regval) { + static int reg32[] = { + R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI }; + static int reg16[] = { + R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI }; + static int reg8[] = { + R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH }; + static int sreg[] = { + R_ES, R_CS, R_SS, R_DS, R_FS, R_GS, 0, 0 }; + static int creg[] = { + R_CR0, 0, R_CR2, R_CR3, R_CR4, 0, 0, 0 }; + static int dreg[] = { + R_DR0, R_DR1, R_DR2, R_DR3, 0, 0, R_DR6, R_DR7 }; + static int treg[] = { + 0, 0, 0, R_TR3, R_TR4, R_TR5, R_TR6, R_TR7 }; + static int fpureg[] = { + R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7 }; + static int mmxreg[] = { + R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7 }; + + if (!(REG_AL & ~regflags)) + return R_AL; + if (!(REG_AX & ~regflags)) + return R_AX; + if (!(REG_EAX & ~regflags)) + return R_EAX; + if (!(REG_DX & ~regflags)) + return R_DX; + if (!(REG_CL & ~regflags)) + return R_CL; + if (!(REG_CX & ~regflags)) + return R_CX; + if (!(REG_ECX & ~regflags)) + return R_ECX; + if (!(REG_CR4 & ~regflags)) + return R_CR4; + if (!(FPU0 & ~regflags)) + return R_ST0; + if (!((REGMEM|BITS8) & ~regflags)) + return reg8[regval]; + if (!((REGMEM|BITS16) & ~regflags)) + return reg16[regval]; + if (!((REGMEM|BITS32) & ~regflags)) + return reg32[regval]; + if (!(REG_SREG & ~regflags)) + return sreg[regval]; + if (!(REG_CREG & ~regflags)) + return creg[regval]; + if (!(REG_DREG & ~regflags)) + return dreg[regval]; + if (!(REG_TREG & ~regflags)) + return treg[regval]; + if (!(FPUREG & ~regflags)) + return fpureg[regval]; + if (!(MMXREG & ~regflags)) + return mmxreg[regval]; + return 0; +} + +static char *whichcond(int condval) { + static int conds[] = { + C_O, C_NO, C_B, C_AE, C_E, C_NE, C_BE, C_A, + C_S, C_NS, C_PE, C_PO, C_L, C_GE, C_LE, C_G + }; + return conditions[conds[condval]]; +} + +/* + * Process an effective address (ModRM) specification. + */ +static unsigned char *do_ea (unsigned char *data, int modrm, int asize, + int segsize, operand *op) { + int mod, rm, scale, index, base; + + mod = (modrm >> 6) & 03; + rm = modrm & 07; + + if (mod == 3) { /* pure register version */ + op->basereg = rm; + op->segment |= SEG_RMREG; + return data; + } + + op->addr_size = 0; + + if (asize == 16) { + /* + * <mod> specifies the displacement size (none, byte or + * word), and <rm> specifies the register combination. + * Exception: mod=0,rm=6 does not specify [BP] as one might + * expect, but instead specifies [disp16]. + */ + op->indexreg = op->basereg = -1; + op->scale = 1; /* always, in 16 bits */ + switch (rm) { + case 0: op->basereg = R_BX; op->indexreg = R_SI; break; + case 1: op->basereg = R_BX; op->indexreg = R_DI; break; + case 2: op->basereg = R_BP; op->indexreg = R_SI; break; + case 3: op->basereg = R_BP; op->indexreg = R_DI; break; + case 4: op->basereg = R_SI; break; + case 5: op->basereg = R_DI; break; + case 6: op->basereg = R_BP; break; + case 7: op->basereg = R_BX; break; + } + if (rm == 6 && mod == 0) { /* special case */ + op->basereg = -1; + if (segsize != 16) + op->addr_size = 16; + mod = 2; /* fake disp16 */ + } + switch (mod) { + case 0: + op->segment |= SEG_NODISP; + break; + case 1: + op->segment |= SEG_DISP8; + op->offset = (signed char) *data++; + break; + case 2: + op->segment |= SEG_DISP16; + op->offset = *data++; + op->offset |= (*data++) << 8; + break; + } + return data; + } else { + /* + * Once again, <mod> specifies displacement size (this time + * none, byte or *dword*), while <rm> specifies the base + * register. Again, [EBP] is missing, replaced by a pure + * disp32 (this time that's mod=0,rm=*5*). However, rm=4 + * indicates not a single base register, but instead the + * presence of a SIB byte... + */ + op->indexreg = -1; + switch (rm) { + case 0: op->basereg = R_EAX; break; + case 1: op->basereg = R_ECX; break; + case 2: op->basereg = R_EDX; break; + case 3: op->basereg = R_EBX; break; + case 5: op->basereg = R_EBP; break; + case 6: op->basereg = R_ESI; break; + case 7: op->basereg = R_EDI; break; + } + if (rm == 5 && mod == 0) { + op->basereg = -1; + if (segsize != 32) + op->addr_size = 32; + mod = 2; /* fake disp32 */ + } + if (rm == 4) { /* process SIB */ + scale = (*data >> 6) & 03; + index = (*data >> 3) & 07; + base = *data & 07; + data++; + + op->scale = 1 << scale; + switch (index) { + case 0: op->indexreg = R_EAX; break; + case 1: op->indexreg = R_ECX; break; + case 2: op->indexreg = R_EDX; break; + case 3: op->indexreg = R_EBX; break; + case 4: op->indexreg = -1; break; + case 5: op->indexreg = R_EBP; break; + case 6: op->indexreg = R_ESI; break; + case 7: op->indexreg = R_EDI; break; + } + + switch (base) { + case 0: op->basereg = R_EAX; break; + case 1: op->basereg = R_ECX; break; + case 2: op->basereg = R_EDX; break; + case 3: op->basereg = R_EBX; break; + case 4: op->basereg = R_ESP; break; + case 6: op->basereg = R_ESI; break; + case 7: op->basereg = R_EDI; break; + case 5: + if (mod == 0) { + mod = 2; + op->basereg = -1; + } else + op->basereg = R_EBP; + break; + } + } + switch (mod) { + case 0: + op->segment |= SEG_NODISP; + break; + case 1: + op->segment |= SEG_DISP8; + op->offset = (signed char) *data++; + break; + case 2: + op->segment |= SEG_DISP32; + op->offset = *data++; + op->offset |= (*data++) << 8; + op->offset |= ((long) *data++) << 16; + op->offset |= ((long) *data++) << 24; + break; + } + return data; + } +} + +/* + * Determine whether the code string in r corresponds to the data + * stream in data. Return the number of bytes matched if so. + */ +static int matches (unsigned char *r, unsigned char *data, int asize, + int osize, int segsize, insn *ins) { + unsigned char *origdata = data; + int a_used = FALSE, o_used = FALSE; + + while (*r) { + int c = *r++; + if (c >= 01 && c <= 03) { + while (c--) + if (*r++ != *data++) + return FALSE; + } + if (c == 04) { + switch (*data++) { + case 0x07: ins->oprs[0].basereg = 0; break; + case 0x17: ins->oprs[0].basereg = 2; break; + case 0x1F: ins->oprs[0].basereg = 3; break; + default: return FALSE; + } + } + if (c == 05) { + switch (*data++) { + case 0xA1: ins->oprs[0].basereg = 4; break; + case 0xA9: ins->oprs[0].basereg = 5; break; + default: return FALSE; + } + } + if (c == 06) { + switch (*data++) { + case 0x06: ins->oprs[0].basereg = 0; break; + case 0x0E: ins->oprs[0].basereg = 1; break; + case 0x16: ins->oprs[0].basereg = 2; break; + case 0x1E: ins->oprs[0].basereg = 3; break; + default: return FALSE; + } + } + if (c == 07) { + switch (*data++) { + case 0xA0: ins->oprs[0].basereg = 4; break; + case 0xA8: ins->oprs[0].basereg = 5; break; + default: return FALSE; + } + } + if (c >= 010 && c <= 012) { + int t = *r++, d = *data++; + if (d < t || d > t+7) + return FALSE; + else { + ins->oprs[c-010].basereg = d-t; + ins->oprs[c-010].segment |= SEG_RMREG; + } + } + if (c == 017) + if (*data++) + return FALSE; + if (c >= 014 && c <= 016) { + ins->oprs[c-014].offset = (signed char) *data++; + ins->oprs[c-014].segment |= SEG_SIGNED; + } + if (c >= 020 && c <= 022) + ins->oprs[c-020].offset = *data++; + if (c >= 024 && c <= 026) + ins->oprs[c-024].offset = *data++; + if (c >= 030 && c <= 032) { + ins->oprs[c-030].offset = *data++; + ins->oprs[c-030].offset |= (*data++ << 8); + } + if (c >= 034 && c <= 036) { + ins->oprs[c-034].offset = *data++; + ins->oprs[c-034].offset |= (*data++ << 8); + if (asize == 32) { + ins->oprs[c-034].offset |= (((long) *data++) << 16); + ins->oprs[c-034].offset |= (((long) *data++) << 24); + } + if (segsize != asize) + ins->oprs[c-034].addr_size = asize; + } + if (c >= 040 && c <= 042) { + ins->oprs[c-040].offset = *data++; + ins->oprs[c-040].offset |= (*data++ << 8); + ins->oprs[c-040].offset |= (((long) *data++) << 16); + ins->oprs[c-040].offset |= (((long) *data++) << 24); + } + if (c >= 050 && c <= 052) { + ins->oprs[c-050].offset = (signed char) *data++; + ins->oprs[c-050].segment |= SEG_RELATIVE; + } + if (c >= 060 && c <= 062) { + ins->oprs[c-060].offset = *data++; + ins->oprs[c-060].offset |= (*data++ << 8); + ins->oprs[c-060].segment |= SEG_RELATIVE; + ins->oprs[c-060].segment &= ~SEG_32BIT; + } + if (c >= 064 && c <= 066) { + ins->oprs[c-064].offset = *data++; + ins->oprs[c-064].offset |= (*data++ << 8); + if (asize == 32) { + ins->oprs[c-064].offset |= (((long) *data++) << 16); + ins->oprs[c-064].offset |= (((long) *data++) << 24); + ins->oprs[c-064].segment |= SEG_32BIT; + } else + ins->oprs[c-064].segment &= ~SEG_32BIT; + ins->oprs[c-064].segment |= SEG_RELATIVE; + if (segsize != asize) + ins->oprs[c-064].addr_size = asize; + } + if (c >= 070 && c <= 072) { + ins->oprs[c-070].offset = *data++; + ins->oprs[c-070].offset |= (*data++ << 8); + ins->oprs[c-070].offset |= (((long) *data++) << 16); + ins->oprs[c-070].offset |= (((long) *data++) << 24); + ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE; + } + if (c >= 0100 && c <= 0177) { + int modrm = *data++; + ins->oprs[c & 07].basereg = (modrm >> 3) & 07; + ins->oprs[c & 07].segment |= SEG_RMREG; + data = do_ea (data, modrm, asize, segsize, + &ins->oprs[(c >> 3) & 07]); + } + if (c >= 0200 && c <= 0277) { + int modrm = *data++; + if (((modrm >> 3) & 07) != (c & 07)) + return FALSE; /* spare field doesn't match up */ + data = do_ea (data, modrm, asize, segsize, + &ins->oprs[(c >> 3) & 07]); + } + if (c >= 0300 && c <= 0302) { + if (asize) + ins->oprs[c-0300].segment |= SEG_32BIT; + else + ins->oprs[c-0300].segment &= ~SEG_32BIT; + a_used = TRUE; + } + if (c == 0310) { + if (asize == 32) + return FALSE; + else + a_used = TRUE; + } + if (c == 0311) { + if (asize == 16) + return FALSE; + else + a_used = TRUE; + } + if (c == 0312) { + if (asize != segsize) + return FALSE; + else + a_used = TRUE; + } + if (c == 0320) { + if (osize == 32) + return FALSE; + else + o_used = TRUE; + } + if (c == 0321) { + if (osize == 16) + return FALSE; + else + o_used = TRUE; + } + if (c == 0322) { + if (osize != segsize) + return FALSE; + else + o_used = TRUE; + } + if (c == 0330) { + int t = *r++, d = *data++; + if (d < t || d > t+15) + return FALSE; + else + ins->condition = d - t; + } + } + + /* + * Check for unused a/o prefixes. + */ + ins->nprefix = 0; + if (!a_used && asize != segsize) + ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32); + if (!o_used && osize != segsize) + ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32); + + return data - origdata; +} + +long disasm (unsigned char *data, char *output, int segsize, long offset, + int autosync) { + struct itemplate **p; + int length = 0; + char *segover; + int rep, lock, asize, osize, i, slen, colon; + unsigned char *origdata; + int works; + insn ins; + + /* + * Scan for prefixes. + */ + asize = osize = segsize; + segover = NULL; + rep = lock = 0; + origdata = data; + for (;;) { + if (*data == 0xF3 || *data == 0xF2) + rep = *data++; + else if (*data == 0xF0) + lock = *data++; + else if (*data == 0x2E || *data == 0x36 || *data == 0x3E || + *data == 0x26 || *data == 0x64 || *data == 0x65) { + switch (*data++) { + case 0x2E: segover = "cs"; break; + case 0x36: segover = "ss"; break; + case 0x3E: segover = "ds"; break; + case 0x26: segover = "es"; break; + case 0x64: segover = "fs"; break; + case 0x65: segover = "gs"; break; + } + } else if (*data == 0x66) + osize = 48 - segsize, data++; + else if (*data == 0x67) + asize = 48 - segsize, data++; + else + break; + } + + ins.oprs[0].segment = ins.oprs[1].segment = ins.oprs[2].segment = + ins.oprs[0].addr_size = ins.oprs[1].addr_size = ins.oprs[2].addr_size = + (segsize == 16 ? 0 : SEG_32BIT); + ins.condition = -1; + works = TRUE; + for (p = itable[*data]; *p; p++) + if ( (length = matches((unsigned char *)((*p)->code), data, + asize, osize, segsize, &ins)) ) { + works = TRUE; + /* + * Final check to make sure the types of r/m match up. + */ + for (i = 0; i < (*p)->operands; i++) + if (((ins.oprs[i].segment & SEG_RMREG) && + !(MEMORY & ~(*p)->opd[i])) || + (!(ins.oprs[i].segment & SEG_RMREG) && + !(REGNORM & ~(*p)->opd[i]) && + !((*p)->opd[i] & REG_SMASK))) + works = FALSE; + if (works) + break; + } + if (!length || !works) + return 0; /* no instruction was matched */ + + slen = 0; + + if (rep) { + slen += sprintf(output+slen, "rep%s ", + (rep == 0xF2 ? "ne" : + (*p)->opcode == I_CMPSB || + (*p)->opcode == I_CMPSW || + (*p)->opcode == I_CMPSD || + (*p)->opcode == I_SCASB || + (*p)->opcode == I_SCASW || + (*p)->opcode == I_SCASD ? "e" : "")); + } + if (lock) + slen += sprintf(output+slen, "lock "); + for (i = 0; i < ins.nprefix; i++) + switch (ins.prefixes[i]) { + case P_A16: slen += sprintf(output+slen, "a16 "); break; + case P_A32: slen += sprintf(output+slen, "a32 "); break; + case P_O16: slen += sprintf(output+slen, "o16 "); break; + case P_O32: slen += sprintf(output+slen, "o32 "); break; + } + + for (i = 0; i < elements(ico); i++) + if ((*p)->opcode == ico[i]) { + slen += sprintf(output+slen, "%s%s", icn[i], + whichcond(ins.condition)); + break; + } + if (i >= elements(ico)) + slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]); + colon = FALSE; + length += data - origdata; /* fix up for prefixes */ + for (i=0; i<(*p)->operands; i++) { + output[slen++] = (colon ? ':' : i==0 ? ' ' : ','); + + if (ins.oprs[i].segment & SEG_RELATIVE) { + ins.oprs[i].offset += offset + length; + /* + * sort out wraparound + */ + if (!(ins.oprs[i].segment & SEG_32BIT)) + ins.oprs[i].offset &= 0xFFFF; + /* + * add sync marker, if autosync is on + */ + if (autosync) + add_sync (ins.oprs[i].offset, 0L); + } + + if ((*p)->opd[i] & COLON) + colon = TRUE; + else + colon = FALSE; + + if (((*p)->opd[i] & (REGISTER | FPUREG)) || + (ins.oprs[i].segment & SEG_RMREG)) { + ins.oprs[i].basereg = whichreg ((*p)->opd[i], + ins.oprs[i].basereg); + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].basereg]); + } else if (!(UNITY & ~(*p)->opd[i])) { + output[slen++] = '1'; + } else if ( (*p)->opd[i] & IMMEDIATE ) { + if ( (*p)->opd[i] & BITS8 ) { + slen += sprintf(output+slen, "byte "); + if (ins.oprs[i].segment & SEG_SIGNED) { + if (ins.oprs[i].offset < 0) { + ins.oprs[i].offset *= -1; + output[slen++] = '-'; + } else + output[slen++] = '+'; + } + } else if ( (*p)->opd[i] & BITS16 ) { + slen += sprintf(output+slen, "word "); + } else if ( (*p)->opd[i] & BITS32 ) { + slen += sprintf(output+slen, "dword "); + } else if ( (*p)->opd[i] & NEAR ) { + slen += sprintf(output+slen, "near "); + } else if ( (*p)->opd[i] & SHORT ) { + slen += sprintf(output+slen, "short "); + } + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) { + slen += sprintf(output+slen, "[%s%s%s0x%lx]", + (segover ? segover : ""), + (segover ? ":" : ""), + (ins.oprs[i].addr_size == 32 ? "dword " : + ins.oprs[i].addr_size == 16 ? "word " : ""), + ins.oprs[i].offset); + segover = NULL; + } else if ( !(REGMEM & ~(*p)->opd[i]) ) { + int started = FALSE; + if ( (*p)->opd[i] & BITS8 ) + slen += sprintf(output+slen, "byte "); + if ( (*p)->opd[i] & BITS16 ) + slen += sprintf(output+slen, "word "); + if ( (*p)->opd[i] & BITS32 ) + slen += sprintf(output+slen, "dword "); + if ( (*p)->opd[i] & BITS64 ) + slen += sprintf(output+slen, "qword "); + if ( (*p)->opd[i] & BITS80 ) + slen += sprintf(output+slen, "tword "); + if ( (*p)->opd[i] & FAR ) + slen += sprintf(output+slen, "far "); + if ( (*p)->opd[i] & NEAR ) + slen += sprintf(output+slen, "near "); + output[slen++] = '['; + if (ins.oprs[i].addr_size) + slen += sprintf(output+slen, "%s", + (ins.oprs[i].addr_size == 32 ? "dword " : + ins.oprs[i].addr_size == 16 ? "word " : "")); + if (segover) { + slen += sprintf(output+slen, "%s:", segover); + segover = NULL; + } + if (ins.oprs[i].basereg != -1) { + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].basereg]); + started = TRUE; + } + if (ins.oprs[i].indexreg != -1) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].indexreg]); + if (ins.oprs[i].scale > 1) + slen += sprintf(output+slen, "*%d", ins.oprs[i].scale); + started = TRUE; + } + if (ins.oprs[i].segment & SEG_DISP8) { + int sign = '+'; + if (ins.oprs[i].offset & 0x80) { + ins.oprs[i].offset = - (signed char) ins.oprs[i].offset; + sign = '-'; + } + slen += sprintf(output+slen, "%c0x%lx", sign, + ins.oprs[i].offset); + } else if (ins.oprs[i].segment & SEG_DISP16) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } else if (ins.oprs[i].segment & SEG_DISP32) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } + output[slen++] = ']'; + } else { + slen += sprintf(output+slen, "<operand%d>", i); + } + } + output[slen] = '\0'; + if (segover) { /* unused segment override */ + char *p = output; + int count = slen+1; + while (count--) + p[count+3] = p[count]; + strncpy (output, segover, 2); + output[2] = ' '; + } + return length; +} + +long eatbyte (unsigned char *data, char *output) { + sprintf(output, "db 0x%02X", *data); + return 1; +} diff --git a/disasm.h b/disasm.h new file mode 100644 index 0000000..845fd2e --- /dev/null +++ b/disasm.h @@ -0,0 +1,18 @@ +/* disasm.h header file for disasm.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_DISASM_H +#define NASM_DISASM_H + +#define INSN_MAX 32 /* one instruction can't be longer than this */ + +long disasm (unsigned char *data, char *output, int segsize, long offset, + int autosync); +long eatbyte (unsigned char *data, char *output); + +#endif @@ -0,0 +1,389 @@ +/* float.c floating-point constant support for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 13/ix/96 by Simon Tatham + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "nasm.h" + +#define TRUE 1 +#define FALSE 0 + +#define MANT_WORDS 6 /* 64 bits + 32 for accuracy == 96 */ +#define MANT_DIGITS 28 /* 29 digits don't fit in 96 bits */ + +/* + * guaranteed top bit of from is set + * => we only have to worry about _one_ bit shift to the left + */ + +static int multiply(unsigned short *to, unsigned short *from) { + unsigned long temp[MANT_WORDS*2]; + int i, j; + + for (i=0; i<MANT_WORDS*2; i++) + temp[i] = 0; + + for (i=0; i<MANT_WORDS; i++) + for (j=0; j<MANT_WORDS; j++) { + unsigned long n; + n = (unsigned long)to[i] * (unsigned long)from[j]; + temp[i+j] += n >> 16; + temp[i+j+1] += n & 0xFFFF; + } + + for (i=MANT_WORDS*2; --i ;) { + temp[i-1] += temp[i] >> 16; + temp[i] &= 0xFFFF; + } + if (temp[0] & 0x8000) { + for (i=0; i<MANT_WORDS; i++) + to[i] = temp[i] & 0xFFFF; + return 0; + } else { + for (i=0; i<MANT_WORDS; i++) + to[i] = (temp[i] << 1) + !!(temp[i+1] & 0x8000); + return -1; + } +} + +static void flconvert(char *string, unsigned short *mant, long *exponent) { + char digits[MANT_DIGITS], *p, *q, *r; + unsigned short mult[MANT_WORDS], *m, bit; + long tenpwr, twopwr; + int extratwos, started, seendot; + + p = digits; + tenpwr = 0; + started = seendot = FALSE; + while (*string && *string != 'E' && *string != 'e') { + if (*string == '.') { + if (!seendot) + seendot = TRUE; + else { + fprintf(stderr, "too many periods!\n"); + return; + } + } else if (*string >= '0' && *string <= '9') { + if (*string == '0' && !started) { + if (seendot) + tenpwr--; + } else { + started = TRUE; + if (p < digits+sizeof(digits)) + *p++ = *string - '0'; + if (!seendot) + tenpwr++; + } + } else { + fprintf(stderr, "`%c' is invalid char\n", *string); + return; + } + string++; + } + if (*string) { + string++; /* eat the E */ + tenpwr += atoi(string); + } + + /* + * At this point, the memory interval [digits,p) contains a + * series of decimal digits zzzzzzz such that our number X + * satisfies + * + * X = 0.zzzzzzz * 10^tenpwr + */ + + bit = 0x8000; + for (m=mant; m<mant+MANT_WORDS; m++) + *m = 0; + m = mant; + q = digits; + started = FALSE; + twopwr = 0; + while (m < mant+MANT_WORDS) { + unsigned short carry = 0; + while (p > q && !p[-1]) + p--; + if (p <= q) + break; + for (r = p; r-- > q ;) { + int i; + + i = 2 * *r + carry; + if (i >= 10) + carry = 1, i -= 10; + else + carry = 0; + *r = i; + } + if (carry) + *m |= bit, started = TRUE; + if (started) { + if (bit == 1) + bit = 0x8000, m++; + else + bit >>= 1; + } else + twopwr--; + } + twopwr += tenpwr; + + /* + * At this point the `mant' array contains the first six + * fractional places of a base-2^16 real number, which when + * multiplied by 2^twopwr and 5^tenpwr gives X. So now we + * really do multiply by 5^tenpwr. + */ + + if (tenpwr < 0) { + for (m=mult; m<mult+MANT_WORDS; m++) + *m = 0xCCCC; + extratwos = -2; + tenpwr = -tenpwr; + } else if (tenpwr > 0) { + mult[0] = 0xA000; + for (m=mult+1; m<mult+MANT_WORDS; m++) + *m = 0; + extratwos = 3; + } else + extratwos = 0; + while (tenpwr) { + if (tenpwr & 1) + twopwr += extratwos + multiply (mant, mult); + extratwos = extratwos * 2 + multiply (mult, mult); + tenpwr >>= 1; + } + + /* + * Conversion is done. The elements of `mant' contain the first + * fractional places of a base-2^16 real number in [0.5,1) + * which we can multiply by 2^twopwr to get X. Or, of course, + * it contains zero. + */ + *exponent = twopwr; +} + +/* + * Shift a mantissa to the right by i (i < 16) bits. + */ +static void shr(unsigned short *mant, int i) { + unsigned short n = 0, m; + int j; + + for (j=0; j<MANT_WORDS; j++) { + m = (mant[j] << (16-i)) & 0xFFFF; + mant[j] = (mant[j] >> i) | n; + n = m; + } +} + +/* + * Round a mantissa off after i words. + */ +static int round(unsigned short *mant, int i) { + if (mant[i] & 0x8000) { + do { + ++mant[--i]; + mant[i] &= 0xFFFF; + } while (i > 0 && !mant[i]); + return !i && !mant[i]; + } + return 0; +} + +#define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) ) + +static int to_double(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -1022 && exponent <= 1024) { + /* + * Normalised. + */ + exponent += 1023; + shr(mant, 11); + round(mant, 4); + if (mant[0] & 0x20) /* did we scale up by one? */ + shr(mant, 1), exponent++; + mant[0] &= 0xF; /* remove leading one */ + put(result+6,(exponent << 4) | mant[0] | sign); + put(result+4,mant[1]); + put(result+2,mant[2]); + put(result+0,mant[3]); + } else if (exponent < -1022 && exponent >= -1074) { + /* + * Denormal. + */ + int shift = -(exponent+1011); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+6,(wds == 0 ? mant[0] : 0) | sign); + put(result+4,(wds <= 1 ? mant[1-wds] : 0)); + put(result+2,(wds <= 2 ? mant[2-wds] : 0)); + put(result+0,(wds <= 3 ? mant[3-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 8); + } + } else { + /* + * Zero. + */ + memset (result, 0, 8); + } + return 1; /* success */ +} + +static int to_float(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -126 && exponent <= 128) { + /* + * Normalised. + */ + exponent += 127; + shr(mant, 8); + round(mant, 2); + if (mant[0] & 0x100) /* did we scale up by one? */ + shr(mant, 1), exponent++; + mant[0] &= 0x7F; /* remove leading one */ + put(result+2,(exponent << 7) | mant[0] | sign); + put(result+0,mant[1]); + } else if (exponent < -126 && exponent >= -149) { + /* + * Denormal. + */ + int shift = -(exponent+118); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 2-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+2,(wds == 0 ? mant[0] : 0) | sign); + put(result+0,(wds <= 1 ? mant[1-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 4); + } + } else { + memset (result, 0, 4); + } + return 1; +} + +static int to_ldoub(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -16383 && exponent <= 16384) { + /* + * Normalised. + */ + exponent += 16383; + if (round(mant, 4)) /* did we scale up by one? */ + shr(mant, 1), mant[0] |= 0x8000, exponent++; + put(result+8,exponent | sign); + put(result+6,mant[0]); + put(result+4,mant[1]); + put(result+2,mant[2]); + put(result+0,mant[3]); + } else if (exponent < -16383 && exponent >= -16446) { + /* + * Denormal. + */ + int shift = -(exponent+16383); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+8,sign); + put(result+6,(wds == 0 ? mant[0] : 0)); + put(result+4,(wds <= 1 ? mant[1-wds] : 0)); + put(result+2,(wds <= 2 ? mant[2-wds] : 0)); + put(result+0,(wds <= 3 ? mant[3-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 10); + } + } else { + /* + * Zero. + */ + memset (result, 0, 10); + } + return 1; +} + +int float_const (char *number, long sign, unsigned char *result, int bytes, + efunc error) { + if (bytes == 4) + return to_float (number, sign, result, error); + else if (bytes == 8) + return to_double (number, sign, result, error); + else if (bytes == 10) + return to_ldoub (number, sign, result, error); + else { + error(ERR_PANIC, "strange value %d passed to float_const", bytes); + return 0; + } +} @@ -0,0 +1,16 @@ +/* float.h header file for the floating-point constant module of + * the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_FLOAT_H +#define NASM_FLOAT_H + +int float_const (char *number, long sign, unsigned char *result, int bytes, + efunc error); + +#endif diff --git a/insns.dat b/insns.dat new file mode 100644 index 0000000..f410613 --- /dev/null +++ b/insns.dat @@ -0,0 +1,984 @@ +; insns.dat table of instructions for the Netwide Assembler +; +; The Netwide Assembler is copyright (C) 1996 Simon Tatham and +; Julian Hall. All rights reserved. The software is +; redistributable under the licence given in the file "Licence" +; distributed in the NASM archive. +; +; Format of file: all four fields must be present on every functional +; line. Hence `void' for no-operand instructions, and `\0' for such +; as EQU. If the last three fields are all `ignore', no action is +; taken except to register the opcode as being present. _ALL_ opcodes +; listed in the `enum' in nasm.h must be present in here, in the +; same order. This is to build the main instruction table for NASM. + +AAA void \1\x37 8086 +AAD void \2\xD5\x0A 8086 +AAD imm \1\xD5\24 8086,UNDOC +AAM void \2\xD4\x0A 8086 +AAM imm \1\xD4\24 8086,UNDOC +AAS void \1\x3F 8086 +ADC mem,reg8 \300\1\x10\101 8086,SM +ADC reg8,reg8 \300\1\x10\101 8086 +ADC mem,reg16 \320\300\1\x11\101 8086,SM +ADC reg16,reg16 \320\300\1\x11\101 8086 +ADC mem,reg32 \321\300\1\x11\101 386,SM +ADC reg32,reg32 \321\300\1\x11\101 386 +ADC reg8,mem \301\1\x12\110 8086,SM +ADC reg8,reg8 \301\1\x12\110 8086 +ADC reg16,mem \320\301\1\x13\110 8086,SM +ADC reg16,reg16 \320\301\1\x13\110 8086 +ADC reg32,mem \321\301\1\x13\110 386,SM +ADC reg32,reg32 \321\301\1\x13\110 386 +ADC rm16,imm8 \320\300\1\x83\202\15 8086 +ADC rm32,imm8 \321\300\1\x83\202\15 386 +ADC reg_al,imm \1\x14\21 8086,SM +ADC reg_ax,imm \320\1\x15\31 8086,SM +ADC reg_eax,imm \321\1\x15\41 386,SM +ADC rm8,imm \300\1\x80\202\21 8086,SM +ADC rm16,imm \320\300\1\x81\202\31 8086,SM +ADC rm32,imm \321\300\1\x81\202\41 386,SM +ADC mem,imm8 \300\1\x80\202\21 8086,SM +ADC mem,imm16 \320\300\1\x81\202\31 8086,SM +ADC mem,imm32 \321\300\1\x81\202\41 386,SM +ADD mem,reg8 \300\17\101 8086,SM +ADD reg8,reg8 \300\17\101 8086 +ADD mem,reg16 \320\300\1\x01\101 8086,SM +ADD reg16,reg16 \320\300\1\x01\101 8086 +ADD mem,reg32 \321\300\1\x01\101 386,SM +ADD reg32,reg32 \321\300\1\x01\101 386 +ADD reg8,mem \301\1\x02\110 8086,SM +ADD reg8,reg8 \301\1\x02\110 8086 +ADD reg16,mem \320\301\1\x03\110 8086,SM +ADD reg16,reg16 \320\301\1\x03\110 8086 +ADD reg32,mem \321\301\1\x03\110 386,SM +ADD reg32,reg32 \321\301\1\x03\110 386 +ADD rm16,imm8 \320\300\1\x83\200\15 8086 +ADD rm32,imm8 \321\300\1\x83\200\15 386 +ADD reg_al,imm \1\x04\21 8086,SM +ADD reg_ax,imm \320\1\x05\31 8086,SM +ADD reg_eax,imm \321\1\x05\41 386,SM +ADD rm8,imm \300\1\x80\200\21 8086,SM +ADD rm16,imm \320\300\1\x81\200\31 8086,SM +ADD rm32,imm \321\300\1\x81\200\41 386,SM +ADD mem,imm8 \300\1\x80\200\21 8086,SM +ADD mem,imm16 \320\300\1\x81\200\31 8086,SM +ADD mem,imm32 \321\300\1\x81\200\41 386,SM +AND mem,reg8 \300\1\x20\101 8086,SM +AND reg8,reg8 \300\1\x20\101 8086 +AND mem,reg16 \320\300\1\x21\101 8086,SM +AND reg16,reg16 \320\300\1\x21\101 8086 +AND mem,reg32 \321\300\1\x21\101 386,SM +AND reg32,reg32 \321\300\1\x21\101 386 +AND reg8,mem \301\1\x22\110 8086,SM +AND reg8,reg8 \301\1\x22\110 8086 +AND reg16,mem \320\301\1\x23\110 8086,SM +AND reg16,reg16 \320\301\1\x23\110 8086 +AND reg32,mem \321\301\1\x23\110 386,SM +AND reg32,reg32 \321\301\1\x23\110 386 +AND rm16,imm8 \320\300\1\x83\204\15 8086 +AND rm32,imm8 \321\300\1\x83\204\15 386 +AND reg_al,imm \1\x24\21 8086,SM +AND reg_ax,imm \320\1\x25\31 8086,SM +AND reg_eax,imm \321\1\x25\41 386,SM +AND rm8,imm \300\1\x80\204\21 8086,SM +AND rm16,imm \320\300\1\x81\204\31 8086,SM +AND rm32,imm \321\300\1\x81\204\41 386,SM +AND mem,imm8 \300\1\x80\204\21 8086,SM +AND mem,imm16 \320\300\1\x81\204\31 8086,SM +AND mem,imm32 \321\300\1\x81\204\41 386,SM +ARPL mem,reg16 \300\1\x63\101 286,PRIV,SM +ARPL reg16,reg16 \300\1\x63\101 286,PRIV +BOUND reg16,mem \320\301\1\x62\110 186 +BOUND reg32,mem \321\301\1\x62\110 386 +BSF reg16,mem \320\301\2\x0F\xBC\110 386,SM +BSF reg16,reg16 \320\301\2\x0F\xBC\110 386 +BSF reg32,mem \321\301\2\x0F\xBC\110 386,SM +BSF reg32,reg32 \321\301\2\x0F\xBC\110 386 +BSR reg16,mem \320\301\2\x0F\xBD\110 386,SM +BSR reg16,reg16 \320\301\2\x0F\xBD\110 386 +BSR reg32,mem \321\301\2\x0F\xBD\110 386,SM +BSR reg32,reg32 \321\301\2\x0F\xBD\110 386 +BSWAP reg32 \321\1\x0F\10\xC8 486 +BT mem,reg16 \320\300\2\x0F\xA3\101 386,SM +BT reg16,reg16 \320\300\2\x0F\xA3\101 386 +BT mem,reg32 \321\300\2\x0F\xA3\101 386,SM +BT reg32,reg32 \321\300\2\x0F\xA3\101 386 +BT rm16,imm \320\300\2\x0F\xBA\204\25 386 +BT rm32,imm \321\300\2\x0F\xBA\204\25 386 +BTC mem,reg16 \320\300\2\x0F\xBB\101 386,SM +BTC reg16,reg16 \320\300\2\x0F\xBB\101 386 +BTC mem,reg32 \321\300\2\x0F\xBB\101 386,SM +BTC reg32,reg32 \321\300\2\x0F\xBB\101 386 +BTC rm16,imm \320\300\2\x0F\xBA\207\25 386 +BTC rm32,imm \321\300\2\x0F\xBA\207\25 386 +BTR mem,reg16 \320\300\2\x0F\xB3\101 386,SM +BTR reg16,reg16 \320\300\2\x0F\xB3\101 386 +BTR mem,reg32 \321\300\2\x0F\xB3\101 386,SM +BTR reg32,reg32 \321\300\2\x0F\xB3\101 386 +BTR rm16,imm \320\300\2\x0F\xBA\206\25 386 +BTR rm32,imm \321\300\2\x0F\xBA\206\25 386 +BTS mem,reg16 \320\300\2\x0F\xAB\101 386,SM +BTS reg16,reg16 \320\300\2\x0F\xAB\101 386 +BTS mem,reg32 \321\300\2\x0F\xAB\101 386,SM +BTS reg32,reg32 \321\300\2\x0F\xAB\101 386 +BTS rm16,imm \320\300\2\x0F\xBA\205\25 386 +BTS rm32,imm \321\300\2\x0F\xBA\205\25 386 +CALL imm \322\1\xE8\64 8086 +CALL imm|far \322\1\x9A\34\37 8086 +CALL imm:imm \322\1\x9A\35\30 8086 +CALL imm16:imm \320\1\x9A\31\30 8086 +CALL imm:imm16 \320\1\x9A\31\30 8086 +CALL imm32:imm \321\1\x9A\41\30 386 +CALL imm:imm32 \321\1\x9A\41\30 386 +CALL mem|far \322\300\1\xFF\203 8086 +CALL mem16|far \320\300\1\xFF\203 8086 +CALL mem32|far \321\300\1\xFF\203 386 +CALL mem|near \322\300\1\xFF\202 8086 +CALL mem16|near \320\300\1\xFF\202 8086 +CALL mem32|near \321\300\1\xFF\202 386 +CALL reg16 \320\300\1\xFF\202 8086 +CALL reg32 \321\300\1\xFF\202 386 +CALL mem \322\300\1\xFF\202 8086 +CALL mem16 \320\300\1\xFF\202 8086 +CALL mem32 \321\300\1\xFF\202 386 +CBW void \320\1\x98 8086 +CDQ void \321\1\x99 386 +CLC void \1\xF8 8086 +CLD void \1\xFC 8086 +CLI void \1\xFA 8086 +CLTS void \2\x0F\x06 286,PRIV +CMC void \1\xF5 8086 +CMP mem,reg8 \300\1\x38\101 8086,SM +CMP reg8,reg8 \300\1\x38\101 8086 +CMP mem,reg16 \320\300\1\x39\101 8086,SM +CMP reg16,reg16 \320\300\1\x39\101 8086 +CMP mem,reg32 \321\300\1\x39\101 386,SM +CMP reg32,reg32 \321\300\1\x39\101 386 +CMP reg8,mem \301\1\x3A\110 8086,SM +CMP reg8,reg8 \301\1\x3A\110 8086 +CMP reg16,mem \320\301\1\x3B\110 8086,SM +CMP reg16,reg16 \320\301\1\x3B\110 8086 +CMP reg32,mem \321\301\1\x3B\110 386,SM +CMP reg32,reg32 \321\301\1\x3B\110 386 +CMP rm16,imm8 \320\300\1\x83\207\15 8086 +CMP rm32,imm8 \321\300\1\x83\207\15 386 +CMP reg_al,imm \1\x3C\21 8086,SM +CMP reg_ax,imm \320\1\x3D\31 8086,SM +CMP reg_eax,imm \321\1\x3D\41 386,SM +CMP rm8,imm \300\1\x80\207\21 8086,SM +CMP rm16,imm \320\300\1\x81\207\31 8086,SM +CMP rm32,imm \321\300\1\x81\207\41 386,SM +CMP mem,imm8 \300\1\x80\207\21 8086,SM +CMP mem,imm16 \320\300\1\x81\207\31 8086,SM +CMP mem,imm32 \321\300\1\x81\207\41 386,SM +CMPSB void \1\xA6 8086 +CMPSD void \321\1\xA7 386 +CMPSW void \320\1\xA7 8086 +CMPXCHG mem,reg8 \300\2\x0F\xA6\101 486,SM +CMPXCHG reg8,reg8 \300\2\x0F\xA6\101 486 +CMPXCHG mem,reg16 \320\300\2\x0F\xA7\101 486,SM +CMPXCHG reg16,reg16 \320\300\2\x0F\xA7\101 486 +CMPXCHG mem,reg32 \321\300\2\x0F\xA7\101 486,SM +CMPXCHG reg32,reg32 \321\300\2\x0F\xA7\101 486 +CMPXCHG8B mem \300\2\x0F\xC7\201 PENT +CPUID void \2\x0F\xA2 PENT +CWD void \320\1\x99 8086 +CWDE void \321\1\x98 386 +DAA void \1\x27 8086 +DAS void \1\x2F 8086 +DB ignore ignore ignore +DD ignore ignore ignore +DEC reg16 \320\10\x48 8086 +DEC reg32 \321\10\x48 386 +DEC rm8 \300\1\xFE\201 8086 +DEC rm16 \320\300\1\xFF\201 8086 +DEC rm32 \321\300\1\xFF\201 386 +DIV rm8 \300\1\xF6\206 8086 +DIV rm16 \320\300\1\xF7\206 8086 +DIV rm32 \321\300\1\xF7\206 386 +DQ ignore ignore ignore +DT ignore ignore ignore +DW ignore ignore ignore +EMMS void \2\x0F\x77 PENT,MMX +ENTER imm,imm \1\xC8\30\25 186 +EQU imm \0 8086 +EQU imm:imm \0 8086 +F2XM1 void \2\xD9\xF0 8086,FPU +FABS void \2\xD9\xE1 8086,FPU +FADD mem32 \300\1\xD8\200 8086,FPU +FADD mem64 \300\1\xDC\200 8086,FPU +FADD fpureg|to \1\xDC\10\xC0 8086,FPU +FADD fpureg \1\xD8\10\xC0 8086,FPU +FADD fpureg,fpu0 \1\xDC\10\xC0 8086,FPU +FADD fpu0,fpureg \1\xD8\11\xC0 8086,FPU +FADDP fpureg \1\xDE\10\xC0 8086,FPU +FADDP fpureg,fpu0 \1\xDE\10\xC0 8086,FPU +FBLD mem80 \300\1\xDF\204 8086,FPU +FBSTP mem80 \300\1\xDF\206 8086,FPU +FCHS void \2\xD9\xE0 8086,FPU +FCLEX void \2\xDB\xE2 8086,FPU +FCMOVB fpureg \1\xDA\10\xC0 P6,FPU +FCMOVB fpu0,fpureg \1\xDA\11\xC0 P6,FPU +FCMOVBE fpureg \1\xDA\10\xD0 P6,FPU +FCMOVBE fpu0,fpureg \1\xDA\11\xD0 P6,FPU +FCMOVE fpureg \1\xDA\10\xC8 P6,FPU +FCMOVE fpu0,fpureg \1\xDA\11\xC8 P6,FPU +FCMOVNB fpureg \1\xDB\10\xC0 P6,FPU +FCMOVNB fpu0,fpureg \1\xDB\11\xC0 P6,FPU +FCMOVNBE fpureg \1\xDB\10\xD0 P6,FPU +FCMOVNBE fpu0,fpureg \1\xDB\11\xD0 P6,FPU +FCMOVNE fpureg \1\xDB\10\xC8 P6,FPU +FCMOVNE fpu0,fpureg \1\xDB\11\xC8 P6,FPU +FCMOVNU fpureg \1\xDB\10\xD8 P6,FPU +FCMOVNU fpu0,fpureg \1\xDB\11\xD8 P6,FPU +FCMOVU fpureg \1\xDA\10\xD8 P6,FPU +FCMOVU fpu0,fpureg \1\xDA\11\xD8 P6,FPU +FCOM mem32 \300\1\xD8\202 8086,FPU +FCOM mem64 \300\1\xDC\202 8086,FPU +FCOM fpureg \1\xD8\10\xD0 8086,FPU +FCOM fpu0,fpureg \1\xD8\11\xD0 8086,FPU +FCOMI fpureg \1\xDB\10\xF0 P6,FPU +FCOMI fpu0,fpureg \1\xDB\11\xF0 P6,FPU +FCOMIP fpureg \1\xDF\10\xF0 P6,FPU +FCOMIP fpu0,fpureg \1\xDF\11\xF0 P6,FPU +FCOMP mem32 \300\1\xD8\203 8086,FPU +FCOMP mem64 \300\1\xDC\203 8086,FPU +FCOMP fpureg \1\xD8\10\xD8 8086,FPU +FCOMP fpu0,fpureg \1\xD8\11\xD8 8086,FPU +FCOMPP void \2\xDE\xD9 8086,FPU +FCOS void \2\xD9\xFF 386,FPU +FDECSTP void \2\xD9\xF6 8086,FPU +FDISI void \2\xDB\xE1 8086,FPU +FDIV mem32 \300\1\xD8\206 8086,FPU +FDIV mem64 \300\1\xDC\206 8086,FPU +FDIV fpureg|to \1\xDC\10\xF0 8086,FPU +FDIV fpureg,fpu0 \1\xDC\10\xF0 8086,FPU +FDIV fpureg \1\xD8\10\xF0 8086,FPU +FDIV fpu0,fpureg \1\xD8\11\xF0 8086,FPU +FDIVP fpureg,fpu0 \1\xDE\10\xF0 8086,FPU +FDIVP fpureg \1\xDE\10\xF0 8086,FPU +FDIVR mem32 \300\1\xD8\207 8086,FPU +FDIVR mem64 \300\1\xDC\207 8086,FPU +FDIVR fpureg|to \1\xDC\10\xF8 8086,FPU +FDIVR fpureg,fpu0 \1\xDC\10\xF8 8086,FPU +FDIVR fpureg \1\xD8\10\xF8 8086,FPU +FDIVR fpu0,fpureg \1\xD8\11\xF8 8086,FPU +FDIVRP fpureg \1\xDE\10\xF8 8086,FPU +FDIVRP fpureg,fpu0 \1\xDE\10\xF8 8086,FPU +FENI void \2\xDB\xE0 8086,FPU +FFREE fpureg \1\xDD\10\xC0 8086,FPU +FIADD mem32 \300\1\xDA\200 8086,FPU +FIADD mem16 \300\1\xDE\200 8086,FPU +FICOM mem32 \300\1\xDA\202 8086,FPU +FICOM mem16 \300\1\xDE\202 8086,FPU +FICOMP mem32 \300\1\xDA\203 8086,FPU +FICOMP mem16 \300\1\xDE\203 8086,FPU +FIDIV mem32 \300\1\xDA\206 8086,FPU +FIDIV mem16 \300\1\xDE\206 8086,FPU +FIDIVR mem32 \300\1\xDA\207 8086,FPU +FIDIVR mem16 \300\1\xDE\207 8086,FPU +FILD mem32 \300\1\xDB\200 8086,FPU +FILD mem16 \300\1\xDF\200 8086,FPU +FILD mem64 \300\1\xDF\205 8086,FPU +FIMUL mem32 \300\1\xDA\201 8086,FPU +FIMUL mem16 \300\1\xDE\201 8086,FPU +FINCSTP void \2\xD9\xF7 8086,FPU +FINIT void \2\xDB\xE3 8086,FPU +FIST mem32 \300\1\xDB\202 8086,FPU +FIST mem16 \300\1\xDF\202 8086,FPU +FISTP mem32 \300\1\xDB\203 8086,FPU +FISTP mem16 \300\1\xDF\203 8086,FPU +FISTP mem64 \300\1\xDF\207 8086,FPU +FISUB mem32 \300\1\xDA\204 8086,FPU +FISUB mem16 \300\1\xDE\204 8086,FPU +FISUBR mem32 \300\1\xDA\205 8086,FPU +FISUBR mem16 \300\1\xDE\205 8086,FPU +FLD mem32 \300\1\xD9\200 8086,FPU +FLD mem64 \300\1\xDD\200 8086,FPU +FLD mem80 \300\1\xDB\205 8086,FPU +FLD fpureg \1\xD9\10\xC0 8086,FPU +FLD1 void \2\xD9\xE8 8086,FPU +FLDCW mem \300\1\xD9\205 8086,FPU +FLDENV mem \300\1\xD9\204 8086,FPU +FLDL2E void \2\xD9\xEA 8086,FPU +FLDL2T void \2\xD9\xE9 8086,FPU +FLDLG2 void \2\xD9\xEC 8086,FPU +FLDLN2 void \2\xD9\xED 8086,FPU +FLDPI void \2\xD9\xEB 8086,FPU +FLDZ void \2\xD9\xEE 8086,FPU +FMUL mem32 \300\1\xD8\201 8086,FPU +FMUL mem64 \300\1\xDC\201 8086,FPU +FMUL fpureg|to \1\xDC\10\xC8 8086,FPU +FMUL fpureg,fpu0 \1\xDC\10\xC8 8086,FPU +FMUL fpureg \1\xD8\10\xC8 8086,FPU +FMUL fpu0,fpureg \1\xD8\11\xC8 8086,FPU +FMULP fpureg \1\xDE\10\xC8 8086,FPU +FMULP fpureg,fpu0 \1\xDE\10\xC8 8086,FPU +FNOP void \2\xD9\xD0 8086,FPU +FPATAN void \2\xD9\xF3 8086,FPU +FPREM void \2\xD9\xF8 8086,FPU +FPREM1 void \2\xD9\xF5 386,FPU +FPTAN void \2\xD9\xF2 8086,FPU +FRNDINT void \2\xD9\xFC 8086,FPU +FRSTOR mem \300\1\xDD\204 8086,FPU +FSAVE mem \300\1\xDD\206 8086,FPU +FSCALE void \2\xD9\xFD 8086,FPU +FSETPM void \2\xDB\xE4 286,FPU +FSIN void \2\xD9\xFE 386,FPU +FSINCOS void \2\xD9\xFB 386,FPU +FSQRT void \2\xD9\xFA 8086,FPU +FST mem32 \300\1\xD9\202 8086,FPU +FST mem64 \300\1\xDD\202 8086,FPU +FST fpureg \1\xDD\10\xD0 8086,FPU +FSTCW mem \300\1\xD9\207 8086,FPU +FSTENV mem \300\1\xD9\206 8086,FPU +FSTP mem32 \300\1\xD9\203 8086,FPU +FSTP mem64 \300\1\xDD\203 8086,FPU +FSTP mem80 \300\1\xDB\207 8086,FPU +FSTP fpureg \1\xDD\10\xD8 8086,FPU +FSTSW mem \300\1\xDD\207 8086,FPU +FSTSW reg_ax \2\xDF\xE0 286,FPU +FSUB mem32 \300\1\xD8\204 8086,FPU +FSUB mem64 \300\1\xDC\204 8086,FPU +FSUB fpureg|to \1\xDC\10\xE0 8086,FPU +FSUB fpureg,fpu0 \1\xDC\10\xE0 8086,FPU +FSUB fpureg \1\xD8\10\xE0 8086,FPU +FSUB fpu0,fpureg \1\xD8\11\xE0 8086,FPU +FSUBP fpureg \1\xDE\10\xE0 8086,FPU +FSUBP fpureg,fpu0 \1\xDE\10\xE0 8086,FPU +FSUBR mem32 \300\1\xD8\205 8086,FPU +FSUBR mem64 \300\1\xDC\205 8086,FPU +FSUBR fpureg|to \1\xDC\10\xE8 8086,FPU +FSUBR fpureg,fpu0 \1\xDC\10\xE8 8086,FPU +FSUBR fpureg \1\xD8\10\xE8 8086,FPU +FSUBR fpu0,fpureg \1\xD8\11\xE8 8086,FPU +FSUBRP fpureg \1\xDE\10\xE8 8086,FPU +FSUBRP fpureg,fpu0 \1\xDE\10\xE8 8086,FPU +FTST void \2\xD9\xE4 8086,FPU +FUCOM fpureg \1\xDD\10\xE0 386,FPU +FUCOMI fpureg \1\xDB\10\xE8 P6,FPU +FUCOMI fpu0,fpureg \1\xDB\11\xE8 P6,FPU +FUCOMIP fpureg \1\xDF\10\xE8 P6,FPU +FUCOMIP fpu0,fpureg \1\xDF\11\xE8 P6,FPU +FUCOMP fpureg \1\xDD\10\xE8 386,FPU +FUCOMPP void \2\xDA\xE9 386,FPU +FXAM void \2\xD9\xE5 8086,FPU +FXCH void \2\xD9\xC9 8086,FPU +FXCH fpureg \1\xD9\10\xC8 8086,FPU +FXCH fpureg,fpu0 \1\xD9\10\xC8 8086,FPU +FXCH fpu0,fpureg \1\xD9\11\xC8 8086,FPU +FXTRACT void \2\xD9\xF4 8086,FPU +FYL2X void \2\xD9\xF1 8086,FPU +FYL2XP1 void \2\xD9\xF9 8086,FPU +HLT void \1\xF4 8086 +ICEBP void \1\xF1 286,UNDOC +IDIV rm8 \300\1\xF6\207 8086 +IDIV rm16 \320\300\1\xF7\207 8086 +IDIV rm32 \321\300\1\xF7\207 386 +IMUL rm8 \300\1\xF6\205 8086 +IMUL rm16 \320\300\1\xF7\205 8086 +IMUL rm32 \321\300\1\xF7\205 386 +IMUL reg16,mem \320\301\2\x0F\xAF\110 386,SM +IMUL reg16,reg16 \320\301\2\x0F\xAF\110 386 +IMUL reg32,mem \321\301\2\x0F\xAF\110 386,SM +IMUL reg32,reg32 \321\301\2\x0F\xAF\110 386 +IMUL reg16,mem,imm8 \320\301\1\x6B\110\16 286,SM +IMUL reg16,reg16,imm8 \320\301\1\x6B\110\16 286 +IMUL reg16,mem,imm \320\301\1\x69\110\32 286,SM +IMUL reg16,reg16,imm \320\301\1\x69\110\32 286 +IMUL reg32,mem,imm8 \321\301\1\x6B\110\16 386,SM +IMUL reg32,reg32,imm8 \321\301\1\x6B\110\16 386 +IMUL reg32,mem,imm \321\301\1\x69\110\42 386,SM +IMUL reg32,reg32,imm \321\301\1\x69\110\42 386,SM +IMUL reg16,imm8 \320\1\x6B\100\15 286 +IMUL reg16,imm \320\1\x69\100\31 286,SM +IMUL reg32,imm8 \321\1\x6B\100\15 386 +IMUL reg32,imm \321\1\x69\100\41 386,SM +IN reg_al,imm \1\xE4\25 8086 +IN reg_ax,imm \320\1\xE5\25 8086 +IN reg_eax,imm \321\1\xE5\25 386 +IN reg_al,reg_dx \1\xEC 8086 +IN reg_ax,reg_dx \320\1\xED 8086 +IN reg_eax,reg_dx \321\1\xED 386 +INC reg16 \320\10\x40 8086 +INC reg32 \321\10\x40 386 +INC rm8 \300\1\xFE\200 8086 +INC rm16 \320\300\1\xFF\200 8086 +INC rm32 \321\300\1\xFF\200 386 +INSB void \1\x6C 186 +INSD void \321\1\x6D 386 +INSW void \320\1\x6D 186 +INT imm \1\xCD\24 8086 +INT01 void \1\xF1 286,UNDOC +INT1 void \1\xF1 286,UNDOC +INT3 void \1\xCC 8086 +INTO void \1\xCE 8086 +INVD void \2\x0F\x08 486 +INVLPG mem \300\2\x0F\x01\207 486 +IRET void \1\xCF 8086 +IRETD void \321\1\xCF 386 +IRETW void \320\1\xCF 8086 +JCXZ imm \320\1\xE3\50 8086 +JECXZ imm \321\1\xE3\50 386 +JMP imm|short \1\xEB\50 8086 +JMP imm \322\1\xE9\64 8086 +JMP imm|far \322\1\xEA\34\37 8086 +JMP imm:imm \322\1\xEA\35\30 8086 +JMP imm16:imm \320\1\xEA\31\30 8086 +JMP imm:imm16 \320\1\xEA\31\30 8086 +JMP imm32:imm \321\1\xEA\41\30 386 +JMP imm:imm32 \321\1\xEA\41\30 386 +JMP mem|far \322\300\1\xFF\205 8086 +JMP mem16|far \320\300\1\xFF\205 8086 +JMP mem32|far \321\300\1\xFF\205 386 +JMP mem|near \322\300\1\xFF\204 8086 +JMP mem16|near \320\300\1\xFF\204 8086 +JMP mem32|near \321\300\1\xFF\204 386 +JMP reg16 \320\300\1\xFF\204 8086 +JMP reg32 \321\300\1\xFF\204 386 +JMP mem \322\300\1\xFF\204 8086 +JMP mem16 \320\300\1\xFF\204 8086 +JMP mem32 \321\300\1\xFF\204 386 +LAHF void \1\x9F 8086 +LAR reg16,mem \320\301\2\x0F\x02\110 286,PRIV,SM +LAR reg16,reg16 \320\301\2\x0F\x02\110 286,PRIV +LAR reg32,mem \321\301\2\x0F\x02\110 286,PRIV,SM +LAR reg32,reg32 \321\301\2\x0F\x02\110 286,PRIV +LDS reg16,mem \320\301\1\xC5\110 8086 +LDS reg32,mem \321\301\1\xC5\110 8086 +LEA reg16,mem \320\301\1\x8D\110 8086 +LEA reg32,mem \321\301\1\x8D\110 8086 +LEAVE void \1\xC9 186 +LES reg16,mem \320\301\1\xC4\110 8086 +LES reg32,mem \321\301\1\xC4\110 8086 +LFS reg16,mem \320\301\2\x0F\xB4\110 386 +LFS reg32,mem \321\301\2\x0F\xB4\110 386 +LGDT mem \300\2\x0F\x01\202 286,PRIV +LGS reg16,mem \320\301\2\x0F\xB5\110 386 +LGS reg32,mem \321\301\2\x0F\xB5\110 386 +LIDT mem \300\2\x0F\x01\203 286,PRIV +LLDT mem \300\1\x0F\17\202 286,PRIV +LLDT mem16 \300\1\x0F\17\202 286,PRIV +LLDT reg16 \300\1\x0F\17\202 286,PRIV +LMSW mem \300\2\x0F\x01\206 286,PRIV +LMSW mem16 \300\2\x0F\x01\206 286,PRIV +LMSW reg16 \300\2\x0F\x01\206 286,PRIV +LOADALL void \2\x0F\x07 386,UNDOC +LODSB void \1\xAC 8086 +LODSD void \321\1\xAD 386 +LODSW void \320\1\xAD 8086 +LOOP imm \312\1\xE2\50 8086 +LOOP imm,reg_cx \310\1\xE2\50 8086 +LOOP imm,reg_ecx \311\1\xE2\50 386 +LOOPE imm \312\1\xE1\50 8086 +LOOPE imm,reg_cx \310\1\xE1\50 8086 +LOOPE imm,reg_ecx \311\1\xE1\50 386 +LOOPNE imm \312\1\xE0\50 8086 +LOOPNE imm,reg_cx \310\1\xE0\50 8086 +LOOPNE imm,reg_ecx \311\1\xE0\50 386 +LOOPNZ imm \312\1\xE0\50 8086 +LOOPNZ imm,reg_cx \310\1\xE0\50 8086 +LOOPNZ imm,reg_ecx \311\1\xE0\50 386 +LOOPZ imm \312\1\xE1\50 8086 +LOOPZ imm,reg_cx \310\1\xE1\50 8086 +LOOPZ imm,reg_ecx \311\1\xE1\50 386 +LSL reg16,mem \320\301\2\x0F\x03\110 286,PRIV,SM +LSL reg16,reg16 \320\301\2\x0F\x03\110 286,PRIV +LSL reg32,mem \321\301\2\x0F\x03\110 286,PRIV,SM +LSL reg32,reg32 \321\301\2\x0F\x03\110 286,PRIV +LSS reg16,mem \320\301\2\x0F\xB2\110 386 +LSS reg32,mem \321\301\2\x0F\xB2\110 386 +LTR mem \300\1\x0F\17\203 286,PRIV +LTR mem16 \300\1\x0F\17\203 286,PRIV +LTR reg16 \300\1\x0F\17\203 286,PRIV +MOV mem,reg_cs \300\1\x8C\101 8086,SM +MOV mem,reg_dess \300\1\x8C\101 8086,SM +MOV mem,reg_fsgs \300\1\x8C\101 386,SM +MOV reg16,reg_cs \300\1\x8C\101 8086 +MOV reg16,reg_dess \300\1\x8C\101 8086 +MOV reg16,reg_fsgs \300\1\x8C\101 386 +MOV reg_dess,mem \301\1\x8E\110 8086,SM +MOV reg_dess,reg16 \301\1\x8E\110 8086 +MOV reg_fsgs,mem \301\1\x8E\110 386,SM +MOV reg_fsgs,reg16 \301\1\x8E\110 386 +MOV reg_al,mem_offs \301\1\xA0\35 8086,SM +MOV reg_ax,mem_offs \301\320\1\xA1\35 8086,SM +MOV reg_eax,mem_offs \301\321\1\xA1\35 386,SM +MOV mem_offs,reg_al \300\1\xA2\34 8086,SM +MOV mem_offs,reg_ax \300\320\1\xA3\34 8086,SM +MOV mem_offs,reg_eax \300\321\1\xA3\34 386,SM +MOV reg32,reg_cr4 \2\x0F\x20\204 PENT +MOV reg32,reg_creg \2\x0F\x20\101 386 +MOV reg32,reg_dreg \2\x0F\x21\101 386 +MOV reg32,reg_treg \2\x0F\x24\101 386 +MOV reg_cr4,reg32 \2\x0F\x22\214 PENT +MOV reg_creg,reg32 \2\x0F\x22\110 386 +MOV reg_dreg,reg32 \2\x0F\x23\110 386 +MOV reg_treg,reg32 \2\x0F\x26\110 386 +MOV mem,reg8 \300\1\x88\101 8086,SM +MOV reg8,reg8 \300\1\x88\101 8086 +MOV mem,reg16 \320\300\1\x89\101 8086,SM +MOV reg16,reg16 \320\300\1\x89\101 8086 +MOV mem,reg32 \321\300\1\x89\101 386,SM +MOV reg32,reg32 \321\300\1\x89\101 386 +MOV reg8,mem \301\1\x8A\110 8086,SM +MOV reg8,reg8 \301\1\x8A\110 8086 +MOV reg16,mem \320\301\1\x8B\110 8086,SM +MOV reg16,reg16 \320\301\1\x8B\110 8086 +MOV reg32,mem \321\301\1\x8B\110 386,SM +MOV reg32,reg32 \321\301\1\x8B\110 386 +MOV reg8,imm \10\xB0\21 8086,SM +MOV reg16,imm \320\10\xB8\31 8086,SM +MOV reg32,imm \321\10\xB8\41 386,SM +MOV rm8,imm \300\1\xC6\200\21 8086,SM +MOV rm16,imm \320\300\1\xC7\200\31 8086,SM +MOV rm32,imm \321\300\1\xC7\200\41 386,SM +MOV mem,imm8 \300\1\xC6\200\21 8086,SM +MOV mem,imm16 \320\300\1\xC7\200\31 8086,SM +MOV mem,imm32 \321\300\1\xC7\200\41 386,SM +MOVD mmxreg,mem \301\2\x0F\x6E\110 PENT,MMX,SD +MOVD mmxreg,reg32 \2\x0F\x6E\110 PENT,MMX +MOVD mem,mmxreg \300\2\x0F\x7E\101 PENT,MMX,SD +MOVD reg32,mmxreg \2\x0F\x7E\101 PENT,MMX +MOVQ mmxreg,mem \301\2\x0F\x6F\110 PENT,MMX,SM +MOVQ mmxreg,mmxreg \2\x0F\x6F\110 PENT,MMX +MOVQ mem,mmxreg \300\2\x0F\x7F\101 PENT,MMX,SM +MOVQ mmxreg,mmxreg \2\x0F\x7F\101 PENT,MMX +MOVSB void \1\xA4 8086 +MOVSD void \321\1\xA5 386 +MOVSW void \320\1\xA5 8086 +MOVSX reg16,mem \320\301\2\x0F\xBE\110 386,SB +MOVSX reg16,reg8 \320\301\2\x0F\xBE\110 386 +MOVSX reg32,rm8 \321\301\2\x0F\xBE\110 386 +MOVSX reg32,rm16 \321\301\2\x0F\xBF\110 386 +MOVZX reg16,mem \320\301\2\x0F\xB6\110 386,SB +MOVZX reg16,reg8 \320\301\2\x0F\xB6\110 386 +MOVZX reg32,rm8 \321\301\2\x0F\xB6\110 386 +MOVZX reg32,rm16 \321\301\2\x0F\xB7\110 386 +MUL rm8 \300\1\xF6\204 8086 +MUL rm16 \320\300\1\xF7\204 8086 +MUL rm32 \321\300\1\xF7\204 386 +NEG rm8 \300\1\xF6\203 8086 +NEG rm16 \320\300\1\xF7\203 8086 +NEG rm32 \321\300\1\xF7\203 386 +NOP void \1\x90 8086 +NOT rm8 \300\1\xF6\202 8086 +NOT rm16 \320\300\1\xF7\202 8086 +NOT rm32 \321\300\1\xF7\202 386 +OR mem,reg8 \300\1\x08\101 8086,SM +OR reg8,reg8 \300\1\x08\101 8086 +OR mem,reg16 \320\300\1\x09\101 8086,SM +OR reg16,reg16 \320\300\1\x09\101 8086 +OR mem,reg32 \321\300\1\x09\101 386,SM +OR reg32,reg32 \321\300\1\x09\101 386 +OR reg8,mem \301\1\x0A\110 8086,SM +OR reg8,reg8 \301\1\x0A\110 8086 +OR reg16,mem \320\301\1\x0B\110 8086,SM +OR reg16,reg16 \320\301\1\x0B\110 8086 +OR reg32,mem \321\301\1\x0B\110 386,SM +OR reg32,reg32 \321\301\1\x0B\110 386 +OR rm16,imm8 \320\300\1\x83\201\15 8086 +OR rm32,imm8 \321\300\1\x83\201\15 386 +OR reg_al,imm \1\x0C\21 8086,SM +OR reg_ax,imm \320\1\x0D\31 8086,SM +OR reg_eax,imm \321\1\x0D\41 386,SM +OR rm8,imm \300\1\x80\201\21 8086,SM +OR rm16,imm \320\300\1\x81\201\31 8086,SM +OR rm32,imm \321\300\1\x81\201\41 386,SM +OR mem,imm8 \300\1\x80\201\21 8086,SM +OR mem,imm16 \320\300\1\x81\201\31 8086,SM +OR mem,imm32 \321\300\1\x81\201\41 386,SM +OUT imm,reg_al \1\xE6\24 8086 +OUT imm,reg_ax \320\1\xE7\24 8086 +OUT imm,reg_eax \321\1\xE7\24 386 +OUT reg_dx,reg_al \1\xEE 8086 +OUT reg_dx,reg_ax \320\1\xEF 8086 +OUT reg_dx,reg_eax \321\1\xEF 386 +OUTSB void \1\x6E 186 +OUTSD void \321\1\x6F 386 +OUTSW void \320\1\x6F 186 +PACKSSDW mmxreg,mem \301\2\x0F\x6B\110 PENT,MMX,SM +PACKSSDW mmxreg,mmxreg \2\x0F\x6B\110 PENT,MMX +PACKSSWB mmxreg,mem \301\2\x0F\x63\110 PENT,MMX,SM +PACKSSWB mmxreg,mmxreg \2\x0F\x63\110 PENT,MMX +PACKUSWB mmxreg,mem \301\2\x0F\x67\110 PENT,MMX,SM +PACKUSWB mmxreg,mmxreg \2\x0F\x67\110 PENT,MMX +PADDB mmxreg,mem \301\2\x0F\xFC\110 PENT,MMX,SM +PADDB mmxreg,mmxreg \2\x0F\xFC\110 PENT,MMX +PADDD mmxreg,mem \301\2\x0F\xFE\110 PENT,MMX,SM +PADDD mmxreg,mmxreg \2\x0F\xFE\110 PENT,MMX +PADDSB mmxreg,mem \301\2\x0F\xEC\110 PENT,MMX,SM +PADDSB mmxreg,mmxreg \2\x0F\xEC\110 PENT,MMX +PADDSW mmxreg,mem \301\2\x0F\xED\110 PENT,MMX,SM +PADDSW mmxreg,mmxreg \2\x0F\xED\110 PENT,MMX +PADDUSB mmxreg,mem \301\2\x0F\xDC\110 PENT,MMX,SM +PADDUSB mmxreg,mmxreg \2\x0F\xDC\110 PENT,MMX +PADDUSW mmxreg,mem \301\2\x0F\xDD\110 PENT,MMX,SM +PADDUSW mmxreg,mmxreg \2\x0F\xDD\110 PENT,MMX +PADDW mmxreg,mem \301\2\x0F\xFD\110 PENT,MMX,SM +PADDW mmxreg,mmxreg \2\x0F\xFD\110 PENT,MMX +PAND mmxreg,mem \301\2\x0F\xDB\110 PENT,MMX,SM +PAND mmxreg,mmxreg \2\x0F\xDB\110 PENT,MMX +PANDN mmxreg,mem \301\2\x0F\xDF\110 PENT,MMX,SM +PANDN mmxreg,mmxreg \2\x0F\xDF\110 PENT,MMX +PCMPEQB mmxreg,mem \301\2\x0F\x74\110 PENT,MMX,SM +PCMPEQB mmxreg,mmxreg \2\x0F\x74\110 PENT,MMX +PCMPEQD mmxreg,mem \301\2\x0F\x76\110 PENT,MMX,SM +PCMPEQD mmxreg,mmxreg \2\x0F\x76\110 PENT,MMX +PCMPEQW mmxreg,mem \301\2\x0F\x75\110 PENT,MMX,SM +PCMPEQW mmxreg,mmxreg \2\x0F\x75\110 PENT,MMX +PCMPGTB mmxreg,mem \301\2\x0F\x64\110 PENT,MMX,SM +PCMPGTB mmxreg,mmxreg \2\x0F\x64\110 PENT,MMX +PCMPGTD mmxreg,mem \301\2\x0F\x66\110 PENT,MMX,SM +PCMPGTD mmxreg,mmxreg \2\x0F\x66\110 PENT,MMX +PCMPGTW mmxreg,mem \301\2\x0F\x65\110 PENT,MMX,SM +PCMPGTW mmxreg,mmxreg \2\x0F\x65\110 PENT,MMX +PMADDWD mmxreg,mem \301\2\x0F\xF5\110 PENT,MMX,SM +PMADDWD mmxreg,mmxreg \2\x0F\xF5\110 PENT,MMX +PMULHW mmxreg,mem \301\2\x0F\xE5\110 PENT,MMX,SM +PMULHW mmxreg,mmxreg \2\x0F\xE5\110 PENT,MMX +PMULLW mmxreg,mem \301\2\x0F\xD5\110 PENT,MMX,SM +PMULLW mmxreg,mmxreg \2\x0F\xD5\110 PENT,MMX +POP mem16 \320\300\1\x8F\200 8086 +POP mem32 \321\300\1\x8F\200 386 +POP reg_dess \4 8086 +POP reg_fsgs \1\x0F\5 386 +POP reg16 \320\10\x58 8086 +POP reg32 \321\10\x58 386 +POPA void \1\x61 186 +POPAD void \321\1\x61 386 +POPAW void \320\1\x61 186 +POPF void \1\x9D 186 +POPFD void \321\1\x9D 386 +POPFW void \320\1\x9D 186 +POR mmxreg,mem \301\2\x0F\xEB\110 PENT,MMX,SM +POR mmxreg,mmxreg \2\x0F\xEB\110 PENT,MMX +PSLLD mmxreg,mem \301\2\x0F\xF2\110 PENT,MMX,SM +PSLLD mmxreg,mmxreg \2\x0F\xF2\110 PENT,MMX +PSLLD mmxreg,imm \2\x0F\x72\206\25 PENT,MMX +PSLLQ mmxreg,mem \301\2\x0F\xF3\110 PENT,MMX,SM +PSLLQ mmxreg,mmxreg \2\x0F\xF3\110 PENT,MMX +PSLLQ mmxreg,imm \2\x0F\x73\206\25 PENT,MMX +PSLLW mmxreg,mem \301\2\x0F\xF1\110 PENT,MMX,SM +PSLLW mmxreg,mmxreg \2\x0F\xF1\110 PENT,MMX +PSLLW mmxreg,imm \2\x0F\x71\206\25 PENT,MMX +PSRAD mmxreg,mem \301\2\x0F\xE2\110 PENT,MMX,SM +PSRAD mmxreg,mmxreg \2\x0F\xE2\110 PENT,MMX +PSRAD mmxreg,imm \2\x0F\x72\204\25 PENT,MMX +PSRAW mmxreg,mem \301\2\x0F\xE1\110 PENT,MMX,SM +PSRAW mmxreg,mmxreg \2\x0F\xE1\110 PENT,MMX +PSRAW mmxreg,imm \2\x0F\x71\204\25 PENT,MMX +PSRLD mmxreg,mem \301\2\x0F\xD2\110 PENT,MMX,SM +PSRLD mmxreg,mmxreg \2\x0F\xD2\110 PENT,MMX +PSRLD mmxreg,imm \2\x0F\x72\202\25 PENT,MMX +PSRLQ mmxreg,mem \301\2\x0F\xD3\110 PENT,MMX,SM +PSRLQ mmxreg,mmxreg \2\x0F\xD3\110 PENT,MMX +PSRLQ mmxreg,imm \2\x0F\x73\202\25 PENT,MMX +PSRLW mmxreg,mem \301\2\x0F\xD1\110 PENT,MMX,SM +PSRLW mmxreg,mmxreg \2\x0F\xD1\110 PENT,MMX +PSRLW mmxreg,imm \2\x0F\x71\202\25 PENT,MMX +PSUBB mmxreg,mem \301\2\x0F\xF8\110 PENT,MMX,SM +PSUBB mmxreg,mmxreg \2\x0F\xF8\110 PENT,MMX +PSUBD mmxreg,mem \301\2\x0F\xFA\110 PENT,MMX,SM +PSUBD mmxreg,mmxreg \2\x0F\xFA\110 PENT,MMX +PSUBSB mmxreg,mem \301\2\x0F\xE8\110 PENT,MMX,SM +PSUBSB mmxreg,mmxreg \2\x0F\xE8\110 PENT,MMX +PSUBSW mmxreg,mem \301\2\x0F\xE9\110 PENT,MMX,SM +PSUBSW mmxreg,mmxreg \2\x0F\xE9\110 PENT,MMX +PSUBUSB mmxreg,mem \301\2\x0F\xD8\110 PENT,MMX,SM +PSUBUSB mmxreg,mmxreg \2\x0F\xD8\110 PENT,MMX +PSUBUSW mmxreg,mem \301\2\x0F\xD9\110 PENT,MMX,SM +PSUBUSW mmxreg,mmxreg \2\x0F\xD9\110 PENT,MMX +PSUBW mmxreg,mem \301\2\x0F\xF9\110 PENT,MMX,SM +PSUBW mmxreg,mmxreg \2\x0F\xF9\110 PENT,MMX +PUNPCKHBW mmxreg,mem \301\2\x0F\x68\110 PENT,MMX,SM +PUNPCKHBW mmxreg,mmxreg \2\x0F\x68\110 PENT,MMX +PUNPCKHDQ mmxreg,mem \301\2\x0F\x6A\110 PENT,MMX,SM +PUNPCKHDQ mmxreg,mmxreg \2\x0F\x6A\110 PENT,MMX +PUNPCKHWD mmxreg,mem \301\2\x0F\x69\110 PENT,MMX,SM +PUNPCKHWD mmxreg,mmxreg \2\x0F\x69\110 PENT,MMX +PUNPCKLBW mmxreg,mem \301\2\x0F\x60\110 PENT,MMX,SM +PUNPCKLBW mmxreg,mmxreg \2\x0F\x60\110 PENT,MMX +PUNPCKLDQ mmxreg,mem \301\2\x0F\x62\110 PENT,MMX,SM +PUNPCKLDQ mmxreg,mmxreg \2\x0F\x62\110 PENT,MMX +PUNPCKLWD mmxreg,mem \301\2\x0F\x61\110 PENT,MMX,SM +PUNPCKLWD mmxreg,mmxreg \2\x0F\x61\110 PENT,MMX +PUSH mem16 \320\300\1\xFF\206 8086 +PUSH mem32 \321\300\1\xFF\206 386 +PUSH reg_fsgs \1\x0F\7 386 +PUSH reg_sreg \6 8086 +PUSH reg16 \320\10\x50 8086 +PUSH reg32 \321\10\x50 386 +PUSH imm8 \1\x6A\14 286 +PUSH imm16 \320\1\x68\30 286 +PUSH imm32 \321\1\x68\40 386 +PUSHA void \1\x60 186 +PUSHAD void \321\1\x60 386 +PUSHAW void \320\1\x60 186 +PUSHF void \1\x9C 186 +PUSHFD void \321\1\x9C 386 +PUSHFW void \320\1\x9C 186 +PXOR mmxreg,mem \301\2\x0F\xEF\110 PENT,MMX,SM +PXOR mmxreg,mmxreg \2\x0F\xEF\110 PENT,MMX +RCL rm8,unity \300\1\xD0\202 8086 +RCL rm8,reg_cl \300\1\xD2\202 8086 +RCL rm8,imm \300\1\xC0\202\25 286 +RCL rm16,unity \320\300\1\xD1\202 8086 +RCL rm16,reg_cl \320\300\1\xD3\202 8086 +RCL rm16,imm \320\300\1\xC1\202\25 286 +RCL rm32,unity \321\300\1\xD1\202 386 +RCL rm32,reg_cl \321\300\1\xD3\202 386 +RCL rm32,imm \321\300\1\xC1\202\25 386 +RCR rm8,unity \300\1\xD0\203 8086 +RCR rm8,reg_cl \300\1\xD2\203 8086 +RCR rm8,imm \300\1\xC0\203\25 286 +RCR rm16,unity \320\300\1\xD1\203 8086 +RCR rm16,reg_cl \320\300\1\xD3\203 8086 +RCR rm16,imm \320\300\1\xC1\203\25 286 +RCR rm32,unity \321\300\1\xD1\203 386 +RCR rm32,reg_cl \321\300\1\xD3\203 386 +RCR rm32,imm \321\300\1\xC1\203\25 386 +RDMSR void \2\x0F\x32 PENT +RDPMC void \2\x0F\x33 P6 +RDTSC void \2\x0F\x31 PENT +RESB imm \340 8086 +RESD ignore ignore ignore +RESQ ignore ignore ignore +REST ignore ignore ignore +RESW ignore ignore ignore +RET void \1\xC3 8086 +RET imm \1\xC2\30 8086 +RETF void \1\xCB 8086 +RETF imm \1\xCA\30 8086 +RETN void \1\xC3 8086 +RETN imm \1\xC2\30 8086 +ROL rm8,unity \300\1\xD0\200 8086 +ROL rm8,reg_cl \300\1\xD2\200 8086 +ROL rm8,imm \300\1\xC0\200\25 286 +ROL rm16,unity \320\300\1\xD1\200 8086 +ROL rm16,reg_cl \320\300\1\xD3\200 8086 +ROL rm16,imm \320\300\1\xC1\200\25 286 +ROL rm32,unity \321\300\1\xD1\200 386 +ROL rm32,reg_cl \321\300\1\xD3\200 386 +ROL rm32,imm \321\300\1\xC1\200\25 386 +ROR rm8,unity \300\1\xD0\201 8086 +ROR rm8,reg_cl \300\1\xD2\201 8086 +ROR rm8,imm \300\1\xC0\201\25 286 +ROR rm16,unity \320\300\1\xD1\201 8086 +ROR rm16,reg_cl \320\300\1\xD3\201 8086 +ROR rm16,imm \320\300\1\xC1\201\25 286 +ROR rm32,unity \321\300\1\xD1\201 386 +ROR rm32,reg_cl \321\300\1\xD3\201 386 +ROR rm32,imm \321\300\1\xC1\201\25 386 +RSM void \2\x0F\xAA PENT +SAHF void \1\x9E 8086 +SAL rm8,unity \300\1\xD0\204 8086,ND +SAL rm8,reg_cl \300\1\xD2\204 8086,ND +SAL rm8,imm \300\1\xC0\204\25 286,ND +SAL rm16,unity \320\300\1\xD1\204 8086,ND +SAL rm16,reg_cl \320\300\1\xD3\204 8086,ND +SAL rm16,imm \320\300\1\xC1\204\25 286,ND +SAL rm32,unity \321\300\1\xD1\204 386,ND +SAL rm32,reg_cl \321\300\1\xD3\204 386,ND +SAL rm32,imm \321\300\1\xC1\204\25 386,ND +SALC void \1\xD6 8086,UNDOC +SAR rm8,unity \300\1\xD0\207 8086 +SAR rm8,reg_cl \300\1\xD2\207 8086 +SAR rm8,imm \300\1\xC0\207\25 286 +SAR rm16,unity \320\300\1\xD1\207 8086 +SAR rm16,reg_cl \320\300\1\xD3\207 8086 +SAR rm16,imm \320\300\1\xC1\207\25 286 +SAR rm32,unity \321\300\1\xD1\207 386 +SAR rm32,reg_cl \321\300\1\xD3\207 386 +SAR rm32,imm \321\300\1\xC1\207\25 386 +SBB mem,reg8 \300\1\x18\101 8086,SM +SBB reg8,reg8 \300\1\x18\101 8086 +SBB mem,reg16 \320\300\1\x19\101 8086,SM +SBB reg16,reg16 \320\300\1\x19\101 8086 +SBB mem,reg32 \321\300\1\x19\101 386,SM +SBB reg32,reg32 \321\300\1\x19\101 386 +SBB reg8,mem \301\1\x1A\110 8086,SM +SBB reg8,reg8 \301\1\x1A\110 8086 +SBB reg16,mem \320\301\1\x1B\110 8086,SM +SBB reg16,reg16 \320\301\1\x1B\110 8086 +SBB reg32,mem \321\301\1\x1B\110 386,SM +SBB reg32,reg32 \321\301\1\x1B\110 386 +SBB rm16,imm8 \320\300\1\x83\203\15 8086 +SBB rm32,imm8 \321\300\1\x83\203\15 8086 +SBB reg_al,imm \1\x1C\21 8086,SM +SBB reg_ax,imm \320\1\x1D\31 8086,SM +SBB reg_eax,imm \321\1\x1D\41 386,SM +SBB rm8,imm \300\1\x80\203\21 8086,SM +SBB rm16,imm \320\300\1\x81\203\31 8086,SM +SBB rm32,imm \321\300\1\x81\203\41 386,SM +SBB mem,imm8 \300\1\x80\203\21 8086,SM +SBB mem,imm16 \320\300\1\x81\203\31 8086,SM +SBB mem,imm32 \321\300\1\x81\203\41 386,SM +SCASB void \1\xAE 8086 +SCASD void \321\1\xAF 386 +SCASW void \320\1\xAF 8086 +SGDT mem \300\2\x0F\x01\200 286,PRIV +SHL rm8,unity \300\1\xD0\204 8086 +SHL rm8,reg_cl \300\1\xD2\204 8086 +SHL rm8,imm \300\1\xC0\204\25 286 +SHL rm16,unity \320\300\1\xD1\204 8086 +SHL rm16,reg_cl \320\300\1\xD3\204 8086 +SHL rm16,imm \320\300\1\xC1\204\25 286 +SHL rm32,unity \321\300\1\xD1\204 386 +SHL rm32,reg_cl \321\300\1\xD3\204 386 +SHL rm32,imm \321\300\1\xC1\204\25 386 +SHLD mem,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2 +SHLD reg16,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2 +SHLD mem,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2 +SHLD reg32,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2 +SHLD mem,reg16,reg_cl \300\320\2\x0F\xA5\101 386,SM +SHLD reg16,reg16,reg_cl \300\320\2\x0F\xA5\101 386 +SHLD mem,reg32,reg_cl \300\321\2\x0F\xA5\101 386,SM +SHLD reg32,reg32,reg_cl \300\321\2\x0F\xA5\101 386 +SHR rm8,unity \300\1\xD0\205 8086 +SHR rm8,reg_cl \300\1\xD2\205 8086 +SHR rm8,imm \300\1\xC0\205\25 286 +SHR rm16,unity \320\300\1\xD1\205 8086 +SHR rm16,reg_cl \320\300\1\xD3\205 8086 +SHR rm16,imm \320\300\1\xC1\205\25 286 +SHR rm32,unity \321\300\1\xD1\205 386 +SHR rm32,reg_cl \321\300\1\xD3\205 386 +SHR rm32,imm \321\300\1\xC1\205\25 386 +SHRD mem,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2 +SHRD reg16,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2 +SHRD mem,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2 +SHRD reg32,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2 +SHRD mem,reg16,reg_cl \300\320\2\x0F\xAD\101 386,SM +SHRD reg16,reg16,reg_cl \300\320\2\x0F\xAD\101 386 +SHRD mem,reg32,reg_cl \300\321\2\x0F\xAD\101 386,SM +SHRD reg32,reg32,reg_cl \300\321\2\x0F\xAD\101 386 +SIDT mem \300\2\x0F\x01\201 286,PRIV +SLDT mem \300\1\x0F\17\200 286,PRIV +SLDT mem16 \300\1\x0F\17\200 286,PRIV +SLDT reg16 \300\1\x0F\17\200 286,PRIV +SMSW mem \300\2\x0F\x01\204 286,PRIV +SMSW reg16 \300\2\x0F\x01\204 286,PRIV +STC void \1\xF9 8086 +STD void \1\xFD 8086 +STI void \1\xFB 8086 +STOSB void \1\xAA 8086 +STOSD void \321\1\xAB 386 +STOSW void \320\1\xAB 8086 +STR mem \300\1\x0F\17\201 286,PRIV +STR mem16 \300\1\x0F\17\201 286,PRIV +STR reg16 \300\1\x0F\17\201 286,PRIV +SUB mem,reg8 \300\1\x28\101 8086,SM +SUB reg8,reg8 \300\1\x28\101 8086 +SUB mem,reg16 \320\300\1\x29\101 8086,SM +SUB reg16,reg16 \320\300\1\x29\101 8086 +SUB mem,reg32 \321\300\1\x29\101 386,SM +SUB reg32,reg32 \321\300\1\x29\101 386 +SUB reg8,mem \301\1\x2A\110 8086,SM +SUB reg8,reg8 \301\1\x2A\110 8086 +SUB reg16,mem \320\301\1\x2B\110 8086,SM +SUB reg16,reg16 \320\301\1\x2B\110 8086 +SUB reg32,mem \321\301\1\x2B\110 386,SM +SUB reg32,reg32 \321\301\1\x2B\110 386 +SUB rm16,imm8 \320\300\1\x83\205\15 8086 +SUB rm32,imm8 \321\300\1\x83\205\15 386 +SUB reg_al,imm \1\x2C\21 8086,SM +SUB reg_ax,imm \320\1\x2D\31 8086,SM +SUB reg_eax,imm \321\1\x2D\41 386,SM +SUB rm8,imm \300\1\x80\205\21 8086,SM +SUB rm16,imm \320\300\1\x81\205\31 8086,SM +SUB rm32,imm \321\300\1\x81\205\41 386,SM +SUB mem,imm8 \300\1\x80\205\21 8086,SM +SUB mem,imm16 \320\300\1\x81\205\31 8086,SM +SUB mem,imm32 \321\300\1\x81\205\41 386,SM +TEST mem,reg8 \300\1\x84\101 8086,SM +TEST reg8,reg8 \300\1\x84\101 8086 +TEST mem,reg16 \320\300\1\x85\101 8086,SM +TEST reg16,reg16 \320\300\1\x85\101 8086 +TEST mem,reg32 \321\300\1\x85\101 386,SM +TEST reg32,reg32 \321\300\1\x85\101 386 +TEST reg_al,imm \1\xA8\21 8086,SM +TEST reg_ax,imm \320\1\xA9\31 8086,SM +TEST reg_eax,imm \321\1\xA9\41 386,SM +TEST rm8,imm \300\1\xF6\200\21 8086,SM +TEST rm16,imm \320\300\1\xF7\200\31 8086,SM +TEST rm32,imm \321\300\1\xF7\200\41 386,SM +TEST mem,imm8 \300\1\xF6\200\21 8086,SM +TEST mem,imm16 \320\300\1\xF7\200\31 8086,SM +TEST mem,imm32 \321\300\1\xF7\200\41 386,UNDOC,SM +UMOV mem,reg8 \300\2\x0F\x10\101 386,UNDOC,SM +UMOV reg8,reg8 \300\2\x0F\x10\101 386,UNDOC +UMOV mem,reg16 \320\300\2\x0F\x11\101 386,UNDOC,SM +UMOV reg16,reg16 \320\300\2\x0F\x11\101 386,UNDOC +UMOV mem,reg32 \321\300\2\x0F\x11\101 386,UNDOC,SM +UMOV reg32,reg32 \321\300\2\x0F\x11\101 386,UNDOC +UMOV reg8,mem \301\2\x0F\x12\110 386,UNDOC,SM +UMOV reg8,reg8 \301\2\x0F\x12\110 386,UNDOC +UMOV reg16,mem \320\301\2\x0F\x13\110 386,UNDOC,SM +UMOV reg16,reg16 \320\301\2\x0F\x13\110 386,UNDOC +UMOV reg32,mem \321\301\2\x0F\x13\110 386,UNDOC,SM +UMOV reg32,reg32 \321\301\2\x0F\x13\110 386,UNDOC +VERR mem \300\1\x0F\17\204 286,PRIV +VERR mem16 \300\1\x0F\17\204 286,PRIV +VERR reg16 \300\1\x0F\17\204 286,PRIV +VERW mem \300\1\x0F\17\205 286,PRIV +VERW mem16 \300\1\x0F\17\205 286,PRIV +VERW reg16 \300\1\x0F\17\205 286,PRIV +WAIT void \1\x9B 8086 +WBINVD void \2\x0F\x09 486 +WRMSR void \2\x0F\x30 PENT +XADD mem,reg8 \300\2\x0F\xC0\101 486,SM +XADD reg8,reg8 \300\2\x0F\xC0\101 486 +XADD mem,reg16 \320\300\2\x0F\xC1\101 486,SM +XADD reg16,reg16 \320\300\2\x0F\xC1\101 486 +XADD mem,reg32 \321\300\2\x0F\xC1\101 486,SM +XADD reg32,reg32 \321\300\2\x0F\xC1\101 486 +XCHG reg_ax,reg16 \320\11\x90 8086 +XCHG reg_eax,reg32 \321\11\x90 386 +XCHG reg16,reg_ax \320\10\x90 8086 +XCHG reg32,reg_eax \321\10\x90 386 +XCHG reg8,mem \301\1\x86\110 8086,SM +XCHG reg8,reg8 \301\1\x86\110 8086 +XCHG reg16,mem \320\301\1\x87\110 8086,SM +XCHG reg16,reg16 \320\301\1\x87\110 8086 +XCHG reg32,mem \321\301\1\x87\110 386,SM +XCHG reg32,reg32 \321\301\1\x87\110 386 +XCHG mem,reg8 \300\1\x86\101 8086,SM +XCHG reg8,reg8 \300\1\x86\101 8086 +XCHG mem,reg16 \320\300\1\x87\101 8086,SM +XCHG reg16,reg16 \320\300\1\x87\101 8086 +XCHG mem,reg32 \321\300\1\x87\101 386,SM +XCHG reg32,reg32 \321\300\1\x87\101 386 +XLATB void \1\xD7 8086 +XOR mem,reg8 \300\1\x30\101 8086,SM +XOR reg8,reg8 \300\1\x30\101 8086 +XOR mem,reg16 \320\300\1\x31\101 8086,SM +XOR reg16,reg16 \320\300\1\x31\101 8086 +XOR mem,reg32 \321\300\1\x31\101 386,SM +XOR reg32,reg32 \321\300\1\x31\101 386 +XOR reg8,mem \301\1\x32\110 8086,SM +XOR reg8,reg8 \301\1\x32\110 8086 +XOR reg16,mem \320\301\1\x33\110 8086,SM +XOR reg16,reg16 \320\301\1\x33\110 8086 +XOR reg32,mem \321\301\1\x33\110 386,SM +XOR reg32,reg32 \321\301\1\x33\110 386 +XOR rm16,imm8 \320\300\1\x83\206\15 8086 +XOR rm32,imm8 \321\300\1\x83\206\15 386 +XOR reg_al,imm \1\x34\21 8086,SM +XOR reg_ax,imm \320\1\x35\31 8086,SM +XOR reg_eax,imm \321\1\x35\41 386,SM +XOR rm8,imm \300\1\x80\206\21 8086,SM +XOR rm16,imm \320\300\1\x81\206\31 8086,SM +XOR rm32,imm \321\300\1\x81\206\41 386,SM +XOR mem,imm8 \300\1\x80\206\21 8086,SM +XOR mem,imm16 \320\300\1\x81\206\31 8086,SM +XOR mem,imm32 \321\300\1\x81\206\41 386,SM +CMOVcc reg16,mem \320\301\1\x0F\330\x40\110 P6,SM +CMOVcc reg16,reg16 \320\301\1\x0F\330\x40\110 P6 +CMOVcc reg32,mem \320\301\1\x0F\330\x40\110 P6,SM +CMOVcc reg32,reg32 \320\301\1\x0F\330\x40\110 P6 +Jcc imm|near \322\1\x0F\330\x80\64 386 +Jcc imm \330\x70\50 8086 +Jcc imm|short \330\x70\50 8086 +SETcc mem \300\1\x0F\330\x90\200 386,SB +SETcc reg8 \300\1\x0F\330\x90\200 386 @@ -0,0 +1,66 @@ +/* insns.h header file for insns.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_INSNS_H +#define NASM_INSNS_H + +struct itemplate { + int opcode; /* the token, passed from "parser.c" */ + int operands; /* number of operands */ + long opd[3]; /* bit flags for operand types */ + char *code; /* the code it assembles to */ + int flags; /* some flags */ +}; + +/* + * Instruction template flags. These specify which processor + * targets the instruction is eligible for, whether it is + * privileged or undocumented, and also specify extra error + * checking on the matching of the instruction. + * + * IF_SM stands for Size Match: any operand whose size is not + * explicitly specified by the template is `really' intended to be + * the same size as the first size-specified operand. + * Non-specification is tolerated in the input instruction, but + * _wrong_ specification is not. + * + * IF_SM2 invokes Size Match on only the first _two_ operands, for + * three-operand instructions such as SHLD: it implies that the + * first two operands must match in size, but that the third is + * required to be _unspecified_. + * + * IF_SB invokes Size Byte: operands with unspecified size in the + * template are really bytes, and so no non-byte specification in + * the input instruction will be tolerated. + * + * IF_SD similarly invokes Size Doubleword. + * + * (The default state if neither IF_SM nor IF_SM2 is specified is + * that any operand with unspecified size in the template is + * required to have unspecified size in the instruction too...) + */ + +#define IF_SM 0x0001 /* size match */ +#define IF_SM2 0x0002 /* size match first two operands */ +#define IF_SB 0x0004 /* unsized operands can't be non-byte */ +#define IF_SD 0x0008 /* unsized operands can't be nondword */ +#define IF_8086 0x0000 /* 8086 instruction */ +#define IF_186 0x0010 /* 186+ instruction */ +#define IF_286 0x0020 /* 286+ instruction */ +#define IF_386 0x0030 /* 386+ instruction */ +#define IF_486 0x0040 /* 486+ instruction */ +#define IF_PENT 0x0050 /* Pentium instruction */ +#define IF_P6 0x0060 /* P6 instruction */ +#define IF_PMASK 0x00F0 /* the mask for processor types */ +#define IF_PRIV 0x0100 /* it's a privileged instruction */ +#define IF_UNDOC 0x0200 /* it's an undocumented instruction */ +#define IF_FPU 0x0400 /* it's an FPU instruction */ +#define IF_MMX 0x0800 /* it's an MMX instruction */ +#define IF_ND 0x1000 /* ignore this in the disassembler */ + +#endif diff --git a/insns.pl b/insns.pl new file mode 100644 index 0000000..275a66b --- /dev/null +++ b/insns.pl @@ -0,0 +1,160 @@ +#!/usr/bin/perl +# +# insns.pl produce insnsa.c and insnsd.c from insns.dat +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. + +print STDERR "Reading insns.dat...\n"; + +open (F, "insns.dat") || die "unable to open insns.dat"; + +$line = 0; +$opcodes = 0; +$insns = 0; +while (<F>) { + $line++; + next if /^\s*;/; # comments + chomp; + split; + next if $#_ == -1; # blank lines + (warn "line $line does not contain four fields\n"), next if $#_ != 3; + $formatted = &format(@_); + if ($formatted) { + $insns++; + $aname = "aa_$_[0]"; + push @$aname, $formatted; + } + $opcodes[$opcodes++] = $_[0], $done{$_[0]} = 1 if !$done{$_[0]}; + if ($formatted && $formatted !~ /IF_ND/) { + push @big, $formatted; + foreach $i (&startbyte($_[2])) { + $aname = sprintf "dd_%02X",$i; + push @$aname, $#big; + } + } +} + +close F; + +print STDERR "Writing insnsa.c...\n"; + +open A, ">insnsa.c"; + +print A "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; +print A "#include <stdio.h>\n"; +print A "#include \"nasm.h\"\n"; +print A "#include \"insns.h\"\n"; +print A "\n"; + +foreach $i (@opcodes) { + print A "static struct itemplate instrux_${i}[] = {\n"; + $aname = "aa_$i"; + foreach $j (@$aname) { + print A " $j\n"; + } + print A " {-1}\n};\n\n"; +} +print A "struct itemplate *nasm_instructions[] = {\n"; +foreach $i (@opcodes) { + print A " instrux_${i},\n"; +} +print A "};\n"; + +close A; + +print STDERR "Writing insnsd.c...\n"; + +open D, ">insnsd.c"; + +print D "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; +print D "#include <stdio.h>\n"; +print D "#include \"nasm.h\"\n"; +print D "#include \"insns.h\"\n"; +print D "\n"; + +print D "static struct itemplate instrux[] = {\n"; +foreach $j (@big) { + print D " $j\n"; +} +print D " {-1}\n};\n\n"; + +for ($c=0; $c<256; $c++) { + $h = sprintf "%02X", $c; + print D "static struct itemplate *itable_${h}[] = {\n"; + $aname = "dd_$h"; + foreach $j (@$aname) { + print D " instrux + $j,\n"; + } + print D " NULL\n};\n\n"; +} + +print D "struct itemplate **itable[] = {\n"; +for ($c=0; $c<256; $c++) { + printf D " itable_%02X,\n", $c; +} +print D "};\n"; + +close D; + +printf STDERR "Done: %d instructions\n", $insns; + +sub format { + local ($opcode, $operands, $codes, $flags) = @_; + local $num; + + return undef if $operands eq "ignore"; + + # format the operands + $operands =~ s/:/|colon,/g; + $operands =~ s/mem(\d+)/mem|bits$1/g; + $operands =~ s/mem/memory/g; + $operands =~ s/memory_offs/mem_offs/g; + $operands =~ s/imm(\d+)/imm|bits$1/g; + $operands =~ s/imm/immediate/g; + $operands =~ s/rm(\d+)/regmem|bits$1/g; + $num = 3; + $operands = '0,0,0', $num = 0 if $operands eq 'void'; + $operands .= ',0', $num-- while $operands !~ /,.*,/; + $operands =~ tr/a-z/A-Z/; + + # format the flags + $flags =~ s/,/|IF_/g; + $flags = "IF_" . $flags; + + "{I_$opcode, $num, {$operands}, \"$codes\", $flags},"; +} + +# Here we determine the range of possible starting bytes for a given +# instruction. We need only consider the codes: +# \1 \2 \3 mean literal bytes, of course +# \4 \5 \6 \7 mean PUSH/POP of segment registers: special case +# \10 \11 \12 mean byte plus register value +# \17 means byte zero +# \330 means byte plus condition code +# \0 or \340 mean give up and return empty set +sub startbyte { # FIXME we cheat, for now :-) + local ($codes) = @_; + local $word, @range; + + while (1) { + die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/; + $word = $1, $codes = $2; + return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/; + return (0x07, 0x17, 0x1F) if $word eq "\\4"; + return (0xA1, 0xA9) if $word eq "\\5"; + return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6"; + return (0xA0, 0xA8) if $word eq "\\7"; + $start=hex $1, $r=8, last if $word =~ /^\\1[012]$/ && $codes =~/^\\x(..)/; + return (0) if $word eq "\\17"; + $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/; + return () if $word eq "\\0" || $word eq "\\340"; + } + @range = (); + push @range, $start++ while ($r-- > 0); + @range; +} diff --git a/internal.doc b/internal.doc new file mode 100644 index 0000000..f04152a --- /dev/null +++ b/internal.doc @@ -0,0 +1,268 @@ +Internals of the Netwide Assembler +================================== + +The Netwide Assembler is intended to be a modular, re-usable x86 +assembler, which can be embedded in other programs, for example as +the back end to a compiler. + +The assembler is composed of modules. The interfaces between them +look like: + + +---- parser.c ----+ + | | | + | float.c | + | | + +--- assemble.c ---+ + | | | + nasm.c ---+ insnsa.c +--- nasmlib.c + | | + +---- labels.c ----+ + | | + +--- outform.c ----+ + | | + +----- *out.c -----+ + +In other words, each of `parser.c', `assemble.c', `labels.c', +`outform.c' and each of the output format modules `*out.c' are +independent modules, which do not inter-communicate except through +the main program. + +The Netwide *Disassembler* is not intended to be particularly +portable or reusable or anything, however. So I won't bother +documenting it here. :-) + +nasmlib.c +--------- + +This is a library module; it contains simple library routines which +may be referenced by all other modules. Among these are a set of +wrappers around the standard `malloc' routines, which will report a +fatal error if they run out of memory, rather than returning NULL. + +parser.c +-------- + +This contains a source-line parser. It parses `canonical' assembly +source lines, containing some combination of the `label', `opcode', +`operand' and `comment' fields: it does not process directives or +macros. It exports two functions: `parse_line' and `cleanup_insn'. + +`parse_line' is the main parser function: you pass it a source line +in ASCII text form, and it returns you an `insn' structure +containing all the details of the instruction on that line. The +parameters it requires are: + +- The location (segment, offset) where the instruction on this line + will eventually be placed. This is necessary in order to evaluate + expressions containing the Here token, `$'. + +- A function which can be called to retrieve the value of any + symbols the source line references. + +- Which pass the assembler is on: an undefined symbol only causes an + error condition on pass two. + +- The source line to be parsed. + +- A structure to fill with the results of the parse. + +- A function which can be called to report errors. + +Some instructions (DB, DW, DD for example) can require an arbitrary +amount of storage, and so some of the members of the resulting +`insn' structure will be dynamically allocated. The other function +exported by `parser.c' is `cleanup_insn', which can be called to +deallocate any dynamic storage associated with the results of a +parse. + +names.c +------- + +This doesn't count as a module - it defines a few arrays which are +shared between NASM and NDISASM, so it's a separate file which is +#included by both parser.c and disasm.c. + +float.c +------- + +This is essentially a library module: it exports one function, +`float_const', which converts an ASCII representation of a +floating-point number into an x86-compatible binary representation, +without using any built-in floating-point arithmetic (so it will run +on any platform, portably). It calls nothing, and is called only by +`parser.c'. Note that the function `float_const' must be passed an +error reporting routine. + +assemble.c +---------- + +This module contains the code generator: it translates `insn' +structures as returned from the parser module into actual generated +code which can be placed in an output file. It exports two +functions, `assemble' and `insn_size'. + +`insn_size' is designed to be called on pass one of assembly: it +takes an `insn' structure as input, and returns the amount of space +that would be taken up if the instruction described in the structure +were to be converted to real machine code. `insn_size' also requires +to be told the location (as a segment/offset pair) where the +instruction would be assembled, the mode of assembly (16/32 bit +default), and a function it can call to report errors. + +`assemble' is designed to be called on pass two: it takes all the +parameters that `insn_size' does, but has an extra parameter which +is an output driver. `assemble' actually converts the input +instruction into machine code, and outputs the machine code by means +of calling the `output' function of the driver. + +insnsa.c +-------- + +This is another library module: it exports one very big array of +instruction translations. It has to be a separate module so that DOS +compilers, with less memory to spare than typical Unix ones, can +cope with it. + +labels.c +-------- + +This module contains a label manager. It exports six functions: + +`init_labels' should be called before any other function in the +module. `cleanup_labels' may be called after all other use of the +module has finished, to deallocate storage. + +`define_label' is called to define new labels: you pass it the name +of the label to be defined, and the (segment,offset) pair giving the +value of the label. It is also passed an error-reporting function, +and an output driver structure (so that it can call the output +driver's label-definition function). `define_label' mentally +prepends the name of the most recently defined non-local label to +any label beginning with a period. + +`define_label_stub' is designed to be called in pass two, once all +the labels have already been defined: it does nothing except to +update the "most-recently-defined-non-local-label" status, so that +references to local labels in pass two will work correctly. + +`declare_as_global' is used to declare that a label should be +global. It must be called _before_ the label in question is defined. + +Finally, `lookup_label' attempts to translate a label name into a +(segment,offset) pair. It returns non-zero on success. + +The label manager module is (theoretically :) restartable: after +calling `cleanup_labels', you can call `init_labels' again, and +start a new assembly with a new set of symbols. + +outform.c +--------- + +This small module contains a set of routines to manage a list of +output formats, and select one given a keyword. It contains three +small routines: `ofmt_register' which registers an output driver as +part of the managed list, `ofmt_list' which lists the available +drivers on stdout, and `ofmt_find' which tries to find the driver +corresponding to a given name. + +The output modules +------------------ + +Each of the output modules, `binout.o', `elfout.o' and so on, +exports only one symbol, which is an output driver data structure +containing pointers to all the functions needed to produce output +files of the appropriate type. + +The exception to this is `coffout.o', which exports _two_ output +driver structures, since COFF and Win32 object file formats are very +similar and most of the code is shared between them. + +nasm.c +------ + +This is the main program: it calls all the functions in the above +modules, and puts them together to form a working assembler. We +hope. :-) + +Segment Mechanism +----------------- + +In NASM, the term `segment' is used to separate the different +sections/segments/groups of which an object file is composed. +Essentially, every address NASM is capable of understanding is +expressed as an offset from the beginning of some segment. + +The defining property of a segment is that if two symbols are +declared in the same segment, then the distance between them is +fixed at assembly time. Hence every externally-declared variable +must be declared in its own segment, since none of the locations of +these are known, and so no distances may be computed at assembly +time. + +The special segment value NO_SEG (-1) is used to denote an absolute +value, e.g. a constant whose value does not depend on relocation, +such as the _size_ of a data object. + +Apart from NO_SEG, segment indices all have their least significant +bit clear, if they refer to actual in-memory segments. For each +segment of this type, there is an auxiliary segment value, defined +to be the same number but with the LSB set, which denotes the +segment-base value of that segment, for object formats which support +it (Microsoft .OBJ, for example). + +Hence, if `textsym' is declared in a code segment with index 2, then +referencing `SEG textsym' would return zero offset from +segment-index 3. Or, in object formats which don't understand such +references, it would return an error instead. + +The next twist is SEG_ABS. Some symbols may be declared with a +segment value of SEG_ABS plus a 16-bit constant: this indicates that +they are far-absolute symbols, such as the BIOS keyboard buffer +under MS-DOS, which always resides at 0040h:001Eh. Far-absolutes are +handled with care in the parser, since they are supposed to evaluate +simply to their offset part within expressions, but applying SEG to +one should yield its segment part. A far-absolute should never find +its way _out_ of the parser, unless it is enclosed in a WRT clause, +in which case Microsoft 16-bit object formats will want to know +about it. + +Porting Issues +-------------- + +We have tried to write NASM in portable ANSI C: we do not assume +little-endianness or any hardware characteristics (in order that +NASM should work as a cross-assembler for x86 platforms, even when +run on other, stranger machines). + +Assumptions we _have_ made are: + +- We assume that `short' is at least 16 bits, and `long' at least + 32. This really _shouldn't_ be a problem, since Kernighan and + Ritchie tell us we are entitled to do so. + +- We rely on having more than 6 characters of significance on + externally linked symbols in the NASM sources. This may get fixed + at some point. We haven't yet come across a linker brain-dead + enough to get it wrong anyway. + +- We assume that `fopen' using the mode "wb" can be used to write + binary data files. This may be wrong on systems like VMS, with a + strange file system. Though why you'd want to run NASM on VMS is + beyond me anyway. + +That's it. Subject to those caveats, NASM should be completely +portable. If not, we _really_ want to know about it. + +Porting Non-Issues +------------------ + +The following is _not_ a portability problem, although it looks like +one. + +- When compiling with some versions of DJGPP, you may get errors + such as `warning: ANSI C forbids braced-groups within + expressions'. This isn't NASM's fault - the problem seems to be + that DJGPP's definitions of the <ctype.h> macros include a + GNU-specific C extension. So when compiling using -ansi and + -pedantic, DJGPP complains about its own header files. It isn't a + problem anyway, since it still generates correct code. diff --git a/labels.c b/labels.c new file mode 100644 index 0000000..ff1d571 --- /dev/null +++ b/labels.c @@ -0,0 +1,292 @@ +/* labels.c label handling for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "nasm.h" +#include "nasmlib.h" + +/* + * A local label is one that begins with exactly one period. Things + * that begin with _two_ periods are NASM-specific things. + */ +#define islocal(l) ((l)[0] == '.' && (l)[1] != '.') + +#define LABEL_BLOCK 320 /* no. of labels/block */ +#define LBLK_SIZE (LABEL_BLOCK*sizeof(union label)) +#define LABEL_HASHES 32 /* no. of hash table entries */ + +#define END_LIST -3 /* don't clash with NO_SEG! */ +#define END_BLOCK -2 +#define BOGUS_VALUE -4 + +#define PERMTS_SIZE 4096 /* size of text blocks */ + +/* values for label.defn.is_global */ +#define NOT_DEFINED_YET 0 +#define LOCAL_SYMBOL 1 +#define GLOBAL_SYMBOL 2 +#define GLOBAL_PLACEHOLDER 3 + +union label { /* actual label structures */ + struct { + long segment, offset; + char *label; + int is_global; + } defn; + struct { + long movingon, dummy; + union label *next; + } admin; +}; + +struct permts { /* permanent text storage */ + struct permts *next; /* for the linked list */ + int size, usage; /* size and used space in ... */ + char data[PERMTS_SIZE]; /* ... the data block itself */ +}; + +static union label *ltab[LABEL_HASHES];/* using a hash table */ +static union label *lfree[LABEL_HASHES];/* pointer into the above */ +static struct permts *perm_head; /* start of perm. text storage */ +static struct permts *perm_tail; /* end of perm. text storage */ + +static void init_block (union label *blk); +static char *perm_copy (char *string1, char *string2); + +static char *prevlabel; + +/* + * Internal routine: finds the `union label' corresponding to the + * given label name. Creates a new one, if it isn't found, and if + * `create' is TRUE. + */ +static union label *find_label (char *label, int create) { + int hash = 0; + char *p, *prev; + int prevlen; + union label *lptr; + + if (islocal(label)) + prev = prevlabel; + else + prev = ""; + prevlen = strlen(prev); + p = prev; + while (*p) hash += *p++; + p = label; + while (*p) hash += *p++; + hash %= LABEL_HASHES; + lptr = ltab[hash]; + while (lptr->admin.movingon != END_LIST) { + if (lptr->admin.movingon == END_BLOCK) { + lptr = lptr->admin.next; + } + if (!strncmp(lptr->defn.label, prev, prevlen) && + !strcmp(lptr->defn.label+prevlen, label)) + return lptr; + lptr++; + } + if (create) { + if (lfree[hash]->admin.movingon == END_BLOCK) { + /* + * must allocate a new block + */ + lfree[hash]->admin.next = (union label *) nasm_malloc (LBLK_SIZE); + lfree[hash] = lfree[hash]->admin.next; + init_block(lfree[hash]); + } + + lfree[hash]->admin.movingon = BOGUS_VALUE; + lfree[hash]->defn.label = perm_copy (prev, label); + lfree[hash]->defn.is_global = NOT_DEFINED_YET; + return lfree[hash]++; + } else + return NULL; +} + +int lookup_label (char *label, long *segment, long *offset) { + union label *lptr; + + lptr = find_label (label, 0); + if (lptr && (lptr->defn.is_global == LOCAL_SYMBOL || + lptr->defn.is_global == GLOBAL_SYMBOL)) { + *segment = lptr->defn.segment; + *offset = lptr->defn.offset; + return 1; + } else + return 0; +} + +void define_label_stub (char *label, efunc error) { + union label *lptr; + + if (!islocal(label)) { + lptr = find_label (label, 1); + if (!lptr) + error (ERR_PANIC, "can't find label `%s' on pass two", label); + prevlabel = lptr->defn.label; + } +} + +void define_label (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error) { + union label *lptr; + + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = LOCAL_SYMBOL; + break; + case GLOBAL_PLACEHOLDER: + lptr->defn.is_global = GLOBAL_SYMBOL; + break; + default: + error(ERR_NONFATAL, "symbol `%s' redefined", label); + return; + } + + if (label[0] != '.') /* not local, but not special either */ + prevlabel = lptr->defn.label; + else if (!*prevlabel) + error(ERR_NONFATAL, "attempt to define a local label before any" + " non-local labels"); + + lptr->defn.segment = segment; + lptr->defn.offset = offset; + + ofmt->symdef (lptr->defn.label, segment, offset, + lptr->defn.is_global == GLOBAL_SYMBOL); +} + +void define_common (char *label, long segment, long size, + struct ofmt *ofmt, efunc error) { + union label *lptr; + + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = LOCAL_SYMBOL; + break; + case GLOBAL_PLACEHOLDER: + lptr->defn.is_global = GLOBAL_SYMBOL; + break; + default: + error(ERR_NONFATAL, "symbol `%s' redefined", label); + return; + } + + if (label[0] != '.') /* not local, but not special either */ + prevlabel = lptr->defn.label; + else + error(ERR_NONFATAL, "attempt to define a local label as a " + "common variable"); + + lptr->defn.segment = segment; + lptr->defn.offset = 0; + + ofmt->symdef (lptr->defn.label, segment, size, 2); +} + +void declare_as_global (char *label, efunc error) { + union label *lptr; + + if (islocal(label)) { + error(ERR_NONFATAL, "attempt to declare local symbol `%s' as" + " global", label); + return; + } + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = GLOBAL_PLACEHOLDER; + break; + case GLOBAL_PLACEHOLDER: /* already done: silently ignore */ + case GLOBAL_SYMBOL: + break; + case LOCAL_SYMBOL: + error(ERR_NONFATAL, "symbol `%s': [GLOBAL] directive must" + " appear before symbol definition", label); + break; + } +} + +int init_labels (void) { + int i; + + for (i=0; i<LABEL_HASHES; i++) { + ltab[i] = (union label *) nasm_malloc (LBLK_SIZE); + if (!ltab[i]) + return -1; /* can't initialise, panic */ + init_block (ltab[i]); + lfree[i] = ltab[i]; + } + + perm_head = perm_tail = (struct permts *) nasm_malloc (sizeof(struct permts)); + if (!perm_head) + return -1; + + perm_head->next = NULL; + perm_head->size = PERMTS_SIZE; + perm_head->usage = 0; + + prevlabel = ""; + + return 0; +} + +void cleanup_labels (void) { + int i; + + for (i=0; i<LABEL_HASHES; i++) { + union label *lptr, *lhold; + + lptr = lhold = ltab[i]; + + while (lptr) { + while (lptr->admin.movingon != END_BLOCK) lptr++; + lptr = lptr->admin.next; + nasm_free (lhold); + lhold = lptr; + } + } + + while (perm_head) { + perm_tail = perm_head; + perm_head = perm_head->next; + nasm_free (perm_tail); + } +} + +static void init_block (union label *blk) { + int j; + + for (j=0; j<LABEL_BLOCK-1; j++) + blk[j].admin.movingon = END_LIST; + blk[LABEL_BLOCK-1].admin.movingon = END_BLOCK; + blk[LABEL_BLOCK-1].admin.next = NULL; +} + +static char *perm_copy (char *string1, char *string2) { + char *p, *q; + int len = strlen(string1)+strlen(string2)+1; + + if (perm_tail->size - perm_tail->usage < len) { + perm_tail->next = (struct permts *)nasm_malloc(sizeof(struct permts)); + perm_tail = perm_tail->next; + perm_tail->size = PERMTS_SIZE; + perm_tail->usage = 0; + } + p = q = perm_tail->data + perm_tail->usage; + while ( (*q = *string1++) ) q++; + while ( (*q++ = *string2++) ); + perm_tail->usage = q - perm_tail->data; + + return p; +} diff --git a/labels.h b/labels.h new file mode 100644 index 0000000..fb466ca --- /dev/null +++ b/labels.h @@ -0,0 +1,17 @@ +/* labels.h header file for labels.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +int lookup_label (char *label, long *segment, long *offset); +void define_label (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error); +void define_common (char *label, long segment, long size, + struct ofmt *ofmt, efunc error); +void define_label_stub (char *label, efunc error); +void declare_as_global (char *label, efunc error); +int init_labels (void); +void cleanup_labels (void); diff --git a/lcc/Readme b/lcc/Readme new file mode 100644 index 0000000..d37f812 --- /dev/null +++ b/lcc/Readme @@ -0,0 +1,57 @@ +This directory contains the necessary files to port the C compiler +``LCC'' (available by FTP from sunsite.doc.ic.ac.uk in the directory +/computing/programming/languages/c/lcc) to compile for Linux (a.out +or ELF) by using NASM as a back-end code generator. + +This patch has been tested on lcc version 3.6. + +To install: + +- Copy `x86nasm.md' into the `src' directory of the lcc tree. + +- Copy either `lin-elf.c' or `lin-aout.c' into the `etc' directory. + +- If you're installing for a.out, edit `x86nasm.md' and change the + conditional after the comment reading "CHANGE THIS FOR a.out" in + the `defsymbol' function from `#if 0' to `#if 1'. + +- Make the following changes to `bind.c' in the `src' directory: + + - Near the top of the file, add a line that reads + extern Interface x86nasmIR; + + - In the `bindings' array, add the lines + "x86-nasm", &x86nasmIR, + "x86/nasm", &x86nasmIR, + (in sensible looking places...) + + A sample `bind.c' has been provided to show what the result of + this might look like. You might be able to get away with using it + directly... + +- Modify the lcc makefile to include rules for x86nasm.o: this will + have to be done in about three places. Just copy any line with + `x86' on it and modify it to read `x86nasm' everywhere. (Except + that in the list of object files that rcc is made up from, do + remember to ensure that every line but the last has a trailing + backslash...) + +- You may have to modify the contents of `lin-elf.c' or `lin-aout.c' + to reflect the true locations of files such as crt0.o, crt1.o, + ld-linux.so and so forth. If you don't know where to find these, + compile a short C program with `gcc -v' and see what command line + gcc feeds to `ld'. + +- You should now be able to build lcc, using `lin-elf.c' or + `lin-aout.c' as the system-dependent part of the `lcc' wrapper + program. + +- Symlink x86nasm.c into the `src' directory before attempting the + triple test, or the compile will fail. + +- Now it should pass the triple test, on either ELF or a.out. Voila! + +Known potential problems: + +- The machine description may occasionally generate `db' lines that + are longer than NASM's 1024-character maximum. diff --git a/lcc/bind.c b/lcc/bind.c new file mode 100644 index 0000000..b0c1f51 --- /dev/null +++ b/lcc/bind.c @@ -0,0 +1,23 @@ +#include "c.h" +extern Interface nullIR, symbolicIR; +extern Interface mipsebIR, mipselIR; +extern Interface sparcIR, solarisIR; +extern Interface x86IR, x86nasmIR; +Binding bindings[] = { + "symbolic", &symbolicIR, + "mips-irix", &mipsebIR, + "mips-ultrix", &mipselIR, + "sparc-sun", &sparcIR, + "sparc-solaris", &solarisIR, + "x86-dos", &x86IR, + "x86-nasm", &x86nasmIR, + "symbolic/irix", &symbolicIR, /* omit */ + "mips/irix", &mipsebIR, /* omit */ + "mips/ultrix", &mipselIR, /* omit */ + "sparc/sun", &sparcIR, /* omit */ + "sparc/solaris", &solarisIR, /* omit */ + "x86/dos", &x86IR, /* omit */ + "x86/nasm", &x86nasmIR, /* omit */ + "null", &nullIR, + NULL, NULL +}; diff --git a/lcc/lin-aout.c b/lcc/lin-aout.c new file mode 100644 index 0000000..f1ac88a --- /dev/null +++ b/lcc/lin-aout.c @@ -0,0 +1,44 @@ +/* x86 running linux and using nasm as a.out */ + +#include <string.h> + +#ifndef LCCDIR +#define LCCDIR "/usr/local/lib/lcc/" +#endif + +#define NASMPATH "/usr/local/bin/nasm" + +char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1", + "-Di386", "-D__i386", "-D__i386__", + "-Dlinux", "-D__linux", "-D__linux__", + "-Dunix", "-D__unix", "-D__unix__", + "$1", "$2", "$3", 0 }; +char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include", + "-I/usr/include", 0 }; +char *com[] = { LCCDIR "rcc", "-target=x86/nasm", + "$1", "$2", "$3", 0 }; +char *as[] = { NASMPATH, "-faout", "-o", "$3", "$1", "$2", 0 }; +char *ld[] = { "/usr/bin/ld", "-m", "i386linux", + "-L/usr/i486-linuxaout/lib", + "-o", "$3", "$1", + "/usr/i486-linuxaout/lib/crt0.o", + "$2", "", "-lc", 0 }; +static char *bbexit = LCCDIR "bbexit.o"; + +extern char *concat(char *, char *); +extern int access(const char *, int); + +int option(char *arg) { + if (strncmp(arg, "-lccdir=", 8) == 0) { + cpp[0] = concat(&arg[8], "/cpp"); + include[0] = concat("-I", concat(&arg[8], "/include")); + com[0] = concat(&arg[8], "/rcc"); + bbexit = concat(&arg[8], "/bbexit.o"); + } else if (strcmp(arg, "-g") == 0) + ; + else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0) + ld[9] = bbexit; + else + return 0; + return 1; +} diff --git a/lcc/lin-elf.c b/lcc/lin-elf.c new file mode 100644 index 0000000..15df9e5 --- /dev/null +++ b/lcc/lin-elf.c @@ -0,0 +1,45 @@ +/* x86 running linux and using nasm as ELF */ + +#include <string.h> + +#ifndef LCCDIR +#define LCCDIR "/usr/local/lib/lcc/" +#endif + +#define NASMPATH "/usr/local/bin/nasm" + +char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1", + "-D__ELF__", "-Di386", "-D__i386", "-D__i386__", + "-Dlinux", "-D__linux", "-D__linux__", + "$1", "$2", "$3", 0 }; +char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include", + "-I/usr/include", 0 }; +char *com[] = { LCCDIR "rcc", "-target=x86/nasm", + "$1", "$2", "$3", 0 }; +char *as[] = { NASMPATH, "-felf", "-o", "$3", "$1", "$2", 0 }; +char *ld[] = { "/usr/bin/ld", "-m", "elf_i386", + "-dynamic-linker", "/lib/ld-linux.so.1", + "-L/usr/i486-linux/lib", + "-o", "$3", "$1", + "/usr/lib/crt1.o", "/usr/lib/crti.o", "/usr/lib/crtbegin.o", + "$2", "", + "-lc", "", "/usr/lib/crtend.o", "/usr/lib/crtn.o", 0 }; +static char *bbexit = LCCDIR "bbexit.o"; + +extern char *concat(char *, char *); +extern int access(const char *, int); + +int option(char *arg) { + if (strncmp(arg, "-lccdir=", 8) == 0) { + cpp[0] = concat(&arg[8], "/cpp"); + include[0] = concat("-I", concat(&arg[8], "/include")); + com[0] = concat(&arg[8], "/rcc"); + bbexit = concat(&arg[8], "/bbexit.o"); + } else if (strcmp(arg, "-g") == 0) + ; + else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0) + ld[13] = bbexit; + else + return 0; + return 1; +} diff --git a/lcc/x86nasm.md b/lcc/x86nasm.md new file mode 100644 index 0000000..d709122 --- /dev/null +++ b/lcc/x86nasm.md @@ -0,0 +1,703 @@ +%{ +enum { EAX=0, ECX=1, EDX=2, EBX=3, ESI=6, EDI=7 }; +#include "c.h" +#define NODEPTR_TYPE Node +#define OP_LABEL(p) ((p)->op) +#define LEFT_CHILD(p) ((p)->kids[0]) +#define RIGHT_CHILD(p) ((p)->kids[1]) +#define STATE_LABEL(p) ((p)->x.state) +static void address ARGS((Symbol, Symbol, int)); +static void blkfetch ARGS((int, int, int, int)); +static void blkloop ARGS((int, int, int, int, int, int[])); +static void blkstore ARGS((int, int, int, int)); +static void defaddress ARGS((Symbol)); +static void defconst ARGS((int, Value)); +static void defstring ARGS((int, char *)); +static void defsymbol ARGS((Symbol)); +static void doarg ARGS((Node)); +static void emit2 ARGS((Node)); +static void export ARGS((Symbol)); +static void clobber ARGS((Node)); +static void function ARGS((Symbol, Symbol [], Symbol [], int)); +static void global ARGS((Symbol)); +static void import ARGS((Symbol)); +static void local ARGS((Symbol)); +static void progbeg ARGS((int, char **)); +static void progend ARGS((void)); +static void segment ARGS((int)); +static void space ARGS((int)); +static void target ARGS((Node)); +static int ckstack ARGS((Node, int)); +static int memop ARGS((Node)); +static int sametree ARGS((Node, Node)); +static Symbol charreg[32], shortreg[32], intreg[32]; +static Symbol fltreg[32]; + +static int cseg; + +static Symbol quo, rem; + +%} +%start stmt +%term ADDD=306 ADDF=305 ADDI=309 ADDP=311 ADDU=310 +%term ADDRFP=279 +%term ADDRGP=263 +%term ADDRLP=295 +%term ARGB=41 ARGD=34 ARGF=33 ARGI=37 ARGP=39 +%term ASGNB=57 ASGNC=51 ASGND=50 ASGNF=49 ASGNI=53 ASGNP=55 ASGNS=52 +%term BANDU=390 +%term BCOMU=406 +%term BORU=422 +%term BXORU=438 +%term CALLB=217 CALLD=210 CALLF=209 CALLI=213 CALLV=216 +%term CNSTC=19 CNSTD=18 CNSTF=17 CNSTI=21 CNSTP=23 CNSTS=20 CNSTU=22 +%term CVCI=85 CVCU=86 +%term CVDF=97 CVDI=101 +%term CVFD=114 +%term CVIC=131 CVID=130 CVIS=132 CVIU=134 +%term CVPU=150 +%term CVSI=165 CVSU=166 +%term CVUC=179 CVUI=181 CVUP=183 CVUS=180 +%term DIVD=450 DIVF=449 DIVI=453 DIVU=454 +%term EQD=482 EQF=481 EQI=485 +%term GED=498 GEF=497 GEI=501 GEU=502 +%term GTD=514 GTF=513 GTI=517 GTU=518 +%term INDIRB=73 INDIRC=67 INDIRD=66 INDIRF=65 INDIRI=69 INDIRP=71 INDIRS=68 +%term JUMPV=584 +%term LABELV=600 +%term LED=530 LEF=529 LEI=533 LEU=534 +%term LOADB=233 LOADC=227 LOADD=226 LOADF=225 LOADI=229 LOADP=231 LOADS=228 LOADU=230 +%term LSHI=341 LSHU=342 +%term LTD=546 LTF=545 LTI=549 LTU=550 +%term MODI=357 MODU=358 +%term MULD=466 MULF=465 MULI=469 MULU=470 +%term NED=562 NEF=561 NEI=565 +%term NEGD=194 NEGF=193 NEGI=197 +%term RETD=242 RETF=241 RETI=245 +%term RSHI=373 RSHU=374 +%term SUBD=322 SUBF=321 SUBI=325 SUBP=327 SUBU=326 +%term VREGP=615 +%% +reg: INDIRC(VREGP) "# read register\n" +reg: INDIRD(VREGP) "# read register\n" +reg: INDIRF(VREGP) "# read register\n" +reg: INDIRI(VREGP) "# read register\n" +reg: INDIRP(VREGP) "# read register\n" +reg: INDIRS(VREGP) "# read register\n" +stmt: ASGNC(VREGP,reg) "# write register\n" +stmt: ASGND(VREGP,reg) "# write register\n" +stmt: ASGNF(VREGP,reg) "# write register\n" +stmt: ASGNI(VREGP,reg) "# write register\n" +stmt: ASGNP(VREGP,reg) "# write register\n" +stmt: ASGNS(VREGP,reg) "# write register\n" +con: CNSTC "%a" +con: CNSTI "%a" +con: CNSTP "%a" +con: CNSTS "%a" +con: CNSTU "%a" +stmt: reg "" +reg: CVIU(reg) "%0" notarget(a) +reg: CVPU(reg) "%0" notarget(a) +reg: CVUI(reg) "%0" notarget(a) +reg: CVUP(reg) "%0" notarget(a) +acon: ADDRGP "%a" +acon: con "%0" +base: ADDRGP "%a" +base: reg "%0" +base: ADDI(reg,acon) "%0 + (%1)" +base: ADDP(reg,acon) "%0 + (%1)" +base: ADDU(reg,acon) "%0 + (%1)" +base: ADDRFP "ebp + %a" +base: ADDRLP "ebp + %a" +index: reg "%0" +index: LSHI(reg,con1) "%0*2" +index: LSHI(reg,con2) "%0*4" +index: LSHI(reg,con3) "%0*8" + +con1: CNSTI "1" range(a, 1, 1) +con1: CNSTU "1" range(a, 1, 1) +con2: CNSTI "2" range(a, 2, 2) +con2: CNSTU "2" range(a, 2, 2) +con3: CNSTI "3" range(a, 3, 3) +con3: CNSTU "3" range(a, 3, 3) +index: LSHU(reg,con1) "%0*2" +index: LSHU(reg,con2) "%0*4" +index: LSHU(reg,con3) "%0*8" +addr: base "[%0]" +addr: ADDI(index,base) "[%1 + %0]" +addr: ADDP(index,base) "[%1 + %0]" +addr: ADDU(index,base) "[%1 + %0]" +addr: index "[%0]" +mem: INDIRC(addr) "byte %0" +mem: INDIRI(addr) "dword %0" +mem: INDIRP(addr) "dword %0" +mem: INDIRS(addr) "word %0" +rc: reg "%0" +rc: con "%0" + +mr: reg "%0" +mr: mem "%0" + +mrc0: mem "%0" +mrc0: rc "%0" +mrc1: mem "%0" 1 +mrc1: rc "%0" + +mrc3: mem "%0" 3 +mrc3: rc "%0" +reg: addr "lea %c,%0\n" 1 +reg: mrc0 "mov %c,%0\n" 1 +reg: LOADC(reg) "mov %c,%0\n" move(a) +reg: LOADI(reg) "mov %c,%0\n" move(a) +reg: LOADP(reg) "mov %c,%0\n" move(a) +reg: LOADS(reg) "mov %c,%0\n" move(a) +reg: LOADU(reg) "mov %c,%0\n" move(a) +reg: ADDI(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: ADDP(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: ADDU(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: SUBI(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: SUBP(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: SUBU(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: BANDU(reg,mrc1) "?mov %c,%0\nand %c,%1\n" 1 +reg: BORU(reg,mrc1) "?mov %c,%0\nor %c,%1\n" 1 +reg: BXORU(reg,mrc1) "?mov %c,%0\nxor %c,%1\n" 1 +stmt: ASGNI(addr,ADDI(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNI(addr,ADDU(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNP(addr,ADDP(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNI(addr,SUBI(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNI(addr,SUBU(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNP(addr,SUBP(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNI(addr,ADDI(mem,rc)) "add %1,%2\n" memop(a) +stmt: ASGNI(addr,ADDU(mem,rc)) "add %1,%2\n" memop(a) +stmt: ASGNI(addr,SUBI(mem,rc)) "sub %1,%2\n" memop(a) +stmt: ASGNI(addr,SUBU(mem,rc)) "sub %1,%2\n" memop(a) + +stmt: ASGNI(addr,BANDU(mem,rc)) "and %1,%2\n" memop(a) +stmt: ASGNI(addr,BORU(mem,rc)) "or %1,%2\n" memop(a) +stmt: ASGNI(addr,BXORU(mem,rc)) "xor %1,%2\n" memop(a) +reg: BCOMU(reg) "?mov %c,%0\nnot %c\n" 2 +reg: NEGI(reg) "?mov %c,%0\nneg %c\n" 2 + +stmt: ASGNI(addr,BCOMU(mem)) "not %1\n" memop(a) +stmt: ASGNI(addr,NEGI(mem)) "neg %1\n" memop(a) +reg: LSHI(reg,rc5) "?mov %c,%0\nsal %c,%1\n" 2 +reg: LSHU(reg,rc5) "?mov %c,%0\nshl %c,%1\n" 2 +reg: RSHI(reg,rc5) "?mov %c,%0\nsar %c,%1\n" 2 +reg: RSHU(reg,rc5) "?mov %c,%0\nshr %c,%1\n" 2 + +stmt: ASGNI(addr,LSHI(mem,rc5)) "sal %1,%2\n" memop(a) +stmt: ASGNI(addr,LSHU(mem,rc5)) "shl %1,%2\n" memop(a) +stmt: ASGNI(addr,RSHI(mem,rc5)) "sar %1,%2\n" memop(a) +stmt: ASGNI(addr,RSHU(mem,rc5)) "shr %1,%2\n" memop(a) + +rc5: CNSTI "%a" range(a, 0, 31) +rc5: reg "cl" +reg: MULI(reg,mrc3) "?mov %c,%0\nimul %c,%1\n" 14 +reg: MULI(con,mr) "imul %c,%1,%0\n" 13 +reg: MULU(reg,mr) "mul %1\n" 13 +reg: DIVU(reg,reg) "xor edx,edx\ndiv %1\n" +reg: MODU(reg,reg) "xor edx,edx\ndiv %1\n" +reg: DIVI(reg,reg) "cdq\nidiv %1\n" +reg: MODI(reg,reg) "cdq\nidiv %1\n" +reg: CVIU(reg) "mov %c,%0\n" move(a) +reg: CVPU(reg) "mov %c,%0\n" move(a) +reg: CVUI(reg) "mov %c,%0\n" move(a) +reg: CVUP(reg) "mov %c,%0\n" move(a) +reg: CVCI(INDIRC(addr)) "movsx %c,byte %0\n" 3 +reg: CVCU(INDIRC(addr)) "movzx %c,byte %0\n" 3 +reg: CVSI(INDIRS(addr)) "movsx %c,word %0\n" 3 +reg: CVSU(INDIRS(addr)) "movzx %c,word %0\n" 3 +reg: CVCI(reg) "# extend\n" 3 +reg: CVCU(reg) "# extend\n" 3 +reg: CVSI(reg) "# extend\n" 3 +reg: CVSU(reg) "# extend\n" 3 + +reg: CVIC(reg) "# truncate\n" 1 +reg: CVIS(reg) "# truncate\n" 1 +reg: CVUC(reg) "# truncate\n" 1 +reg: CVUS(reg) "# truncate\n" 1 +stmt: ASGNC(addr,rc) "mov byte %0,%1\n" 1 +stmt: ASGNI(addr,rc) "mov dword %0,%1\n" 1 +stmt: ASGNP(addr,rc) "mov dword %0,%1\n" 1 +stmt: ASGNS(addr,rc) "mov word %0,%1\n" 1 +stmt: ARGI(mrc3) "push dword %0\n" 1 +stmt: ARGP(mrc3) "push dword %0\n" 1 +stmt: ASGNB(reg,INDIRB(reg)) "mov ecx,%a\nrep movsb\n" +stmt: ARGB(INDIRB(reg)) "sub esp,%a\nmov edi,esp\nmov ecx,%a\nrep movsb\n" + +memf: INDIRD(addr) "qword %0" +memf: INDIRF(addr) "dword %0" +memf: CVFD(INDIRF(addr)) "dword %0" +reg: memf "fld %0\n" 3 +stmt: ASGND(addr,reg) "fstp qword %0\n" 7 +stmt: ASGNF(addr,reg) "fstp dword %0\n" 7 +stmt: ASGNF(addr,CVDF(reg)) "fstp dword %0\n" 7 +stmt: ARGD(reg) "sub esp,8\nfstp qword [esp]\n" +stmt: ARGF(reg) "sub esp,4\nfstp dword [esp]\n" +reg: NEGD(reg) "fchs\n" +reg: NEGF(reg) "fchs\n" +reg: ADDD(reg,memf) "fadd %1\n" +reg: ADDD(reg,reg) "faddp st1\n" +reg: ADDF(reg,memf) "fadd %1\n" +reg: ADDF(reg,reg) "faddp st1\n" +reg: DIVD(reg,memf) "fdiv %1\n" +reg: DIVD(reg,reg) "fdivrp st1\n" +reg: DIVF(reg,memf) "fdiv %1\n" +reg: DIVF(reg,reg) "fdivrp st1\n" +reg: MULD(reg,memf) "fmul %1\n" +reg: MULD(reg,reg) "fmulp st1\n" +reg: MULF(reg,memf) "fmul %1\n" +reg: MULF(reg,reg) "fmulp st1\n" +reg: SUBD(reg,memf) "fsub %1\n" +reg: SUBD(reg,reg) "fsubrp st1\n" +reg: SUBF(reg,memf) "fsub %1\n" +reg: SUBF(reg,reg) "fsubrp st1\n" +reg: CVFD(reg) "# CVFD\n" +reg: CVDF(reg) "sub esp,4\nfstp dword [esp]\nfld dword [esp]\nadd esp,4\n" 12 + +stmt: ASGNI(addr,CVDI(reg)) "fistp dword %0\n" 29 +reg: CVDI(reg) "sub esp,4\nfistp dword [esp]\npop %c\n" 31 + +reg: CVID(INDIRI(addr)) "fild dword %0\n" 10 +reg: CVID(reg) "push %0\nfild dword [esp]\nadd esp,4\n" 12 + +addrj: ADDRGP "%a" +addrj: reg "%0" 2 +addrj: mem "%0" 2 + +stmt: JUMPV(addrj) "jmp %0\n" 3 +stmt: LABELV "%a:\n" +stmt: EQI(mem,rc) "cmp %0,%1\nje near %a\n" 5 +stmt: GEI(mem,rc) "cmp %0,%1\njge near %a\n" 5 +stmt: GTI(mem,rc) "cmp %0,%1\njg near %a\n" 5 +stmt: LEI(mem,rc) "cmp %0,%1\njle near %a\n" 5 +stmt: LTI(mem,rc) "cmp %0,%1\njl near %a\n" 5 +stmt: NEI(mem,rc) "cmp %0,%1\njne near %a\n" 5 +stmt: GEU(mem,rc) "cmp %0,%1\njae near %a\n" 5 +stmt: GTU(mem,rc) "cmp %0,%1\nja near %a\n" 5 +stmt: LEU(mem,rc) "cmp %0,%1\njbe near %a\n" 5 +stmt: LTU(mem,rc) "cmp %0,%1\njb near %a\n" 5 +stmt: EQI(reg,mrc1) "cmp %0,%1\nje near %a\n" 4 +stmt: GEI(reg,mrc1) "cmp %0,%1\njge near %a\n" 4 +stmt: GTI(reg,mrc1) "cmp %0,%1\njg near %a\n" 4 +stmt: LEI(reg,mrc1) "cmp %0,%1\njle near %a\n" 4 +stmt: LTI(reg,mrc1) "cmp %0,%1\njl near %a\n" 4 +stmt: NEI(reg,mrc1) "cmp %0,%1\njne near %a\n" 4 + +stmt: GEU(reg,mrc1) "cmp %0,%1\njae near %a\n" 4 +stmt: GTU(reg,mrc1) "cmp %0,%1\nja near %a\n" 4 +stmt: LEU(reg,mrc1) "cmp %0,%1\njbe near %a\n" 4 +stmt: LTU(reg,mrc1) "cmp %0,%1\njb near %a\n" 4 +cmpf: memf " %0" +cmpf: reg "p" +stmt: EQD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n" +stmt: GED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n" +stmt: GTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n" +stmt: LED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n" +stmt: LTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n" +stmt: NED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n" + +stmt: EQF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n" +stmt: GEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n" +stmt: GTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n" +stmt: LEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n" +stmt: LTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n" +stmt: NEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n" +reg: CALLI(addrj) "call %0\nadd esp,%a\n" +stmt: CALLV(addrj) "call %0\nadd esp,%a\n" +reg: CALLF(addrj) "call %0\nadd esp,%a\n" +reg: CALLD(addrj) "call %0\nadd esp,%a\n" + +stmt: RETI(reg) "# ret\n" +stmt: RETF(reg) "# ret\n" +stmt: RETD(reg) "# ret\n" +%% +static void progbeg(argc, argv) int argc; char *argv[]; { + int i; + + { + union { + char c; + int i; + } u; + u.i = 0; + u.c = 1; + swap = (u.i == 1) != IR->little_endian; + } + parseflags(argc, argv); + intreg[EAX] = mkreg("eax", EAX, 1, IREG); + intreg[EDX] = mkreg("edx", EDX, 1, IREG); + intreg[ECX] = mkreg("ecx", ECX, 1, IREG); + intreg[EBX] = mkreg("ebx", EBX, 1, IREG); + intreg[ESI] = mkreg("esi", ESI, 1, IREG); + intreg[EDI] = mkreg("edi", EDI, 1, IREG); + shortreg[EAX] = mkreg("ax", EAX, 1, IREG); + shortreg[ECX] = mkreg("cx", ECX, 1, IREG); + shortreg[EDX] = mkreg("dx", EDX, 1, IREG); + shortreg[EBX] = mkreg("bx", EBX, 1, IREG); + shortreg[ESI] = mkreg("si", ESI, 1, IREG); + shortreg[EDI] = mkreg("di", EDI, 1, IREG); + + charreg[EAX] = mkreg("al", EAX, 1, IREG); + charreg[ECX] = mkreg("cl", ECX, 1, IREG); + charreg[EDX] = mkreg("dl", EDX, 1, IREG); + charreg[EBX] = mkreg("bl", EBX, 1, IREG); + for (i = 0; i < 8; i++) + fltreg[i] = mkreg("%d", i, 0, FREG); + rmap[C] = mkwildcard(charreg); + rmap[S] = mkwildcard(shortreg); + rmap[P] = rmap[B] = rmap[U] = rmap[I] = mkwildcard(intreg); + rmap[F] = rmap[D] = mkwildcard(fltreg); + tmask[IREG] = (1<<EDI) | (1<<ESI) | (1<<EBX) + | (1<<EDX) | (1<<ECX) | (1<<EAX); + vmask[IREG] = 0; + tmask[FREG] = 0xff; + vmask[FREG] = 0; + cseg = 0; + quo = mkreg("eax", EAX, 1, IREG); + quo->x.regnode->mask |= 1<<EDX; + rem = mkreg("edx", EDX, 1, IREG); + rem->x.regnode->mask |= 1<<EAX; +} +static void segment(n) int n; { + if (n == cseg) + return; + cseg = n; + if (cseg == CODE) + print("[section .text]\n"); + else if (cseg == DATA || cseg == LIT) + print("[section .data]\n"); + else if (cseg == BSS) + print("[section .bss]\n"); +} +static void progend() { + +} +static void target(p) Node p; { + assert(p); + switch (p->op) { + case RSHI: case RSHU: case LSHI: case LSHU: + if (generic(p->kids[1]->op) != CNST + && !( generic(p->kids[1]->op) == INDIR + && p->kids[1]->kids[0]->op == VREG+P + && p->kids[1]->syms[RX]->u.t.cse + && generic(p->kids[1]->syms[RX]->u.t.cse->op) == CNST +)) { + rtarget(p, 1, intreg[ECX]); + setreg(p, intreg[EAX]); + } + break; + case MULU: + setreg(p, quo); + rtarget(p, 0, intreg[EAX]); + break; + case DIVI: case DIVU: + setreg(p, quo); + rtarget(p, 0, intreg[EAX]); + rtarget(p, 1, intreg[ECX]); + break; + case MODI: case MODU: + setreg(p, rem); + rtarget(p, 0, intreg[EAX]); + rtarget(p, 1, intreg[ECX]); + break; + case ASGNB: + rtarget(p, 0, intreg[EDI]); + rtarget(p->kids[1], 0, intreg[ESI]); + break; + case ARGB: + rtarget(p->kids[0], 0, intreg[ESI]); + break; + case CALLI: case CALLV: + setreg(p, intreg[EAX]); + break; + case RETI: + rtarget(p, 0, intreg[EAX]); + break; + } +} + +static void clobber(p) Node p; { + static int nstack = 0; + + assert(p); + nstack = ckstack(p, nstack); + assert(p->count > 0 || nstack == 0); + switch (p->op) { + case ASGNB: case ARGB: + spill(1<<ECX | 1<<ESI | 1<<EDI, IREG, p); + break; + case EQD: case LED: case GED: case LTD: case GTD: case NED: + case EQF: case LEF: case GEF: case LTF: case GTF: case NEF: + spill(1<<EAX, IREG, p); + break; + case CALLD: case CALLF: + spill(1<<EDX | 1<<EAX, IREG, p); + break; + } +} +#define isfp(p) (optype((p)->op)==F || optype((p)->op)==D) + +static int ckstack(p, n) Node p; int n; { + int i; + + for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) + if (isfp(p->x.kids[i])) + n--; + if (isfp(p) && p->count > 0) + n++; + if (n > 8) + error("expression too complicated\n"); + debug(fprint(2, "(ckstack(%x)=%d)\n", p, n)); + assert(n >= 0); + return n; +} +static int memop(p) Node p; { + assert(p); + assert(generic(p->op) == ASGN); + assert(p->kids[0]); + assert(p->kids[1]); + if (generic(p->kids[1]->kids[0]->op) == INDIR + && sametree(p->kids[0], p->kids[1]->kids[0]->kids[0])) + return 3; + else + return LBURG_MAX; +} +static int sametree(p, q) Node p, q; { + return p == NULL && q == NULL + || p && q && p->op == q->op && p->syms[0] == q->syms[0] + && sametree(p->kids[0], q->kids[0]) + && sametree(p->kids[1], q->kids[1]); +} +static void emit2(p) Node p; { +#define preg(f) ((f)[getregnum(p->x.kids[0])]->x.name) + + if (p->op == CVCI) + print("movsx %s,%s\n", p->syms[RX]->x.name +, preg(charreg)); + else if (p->op == CVCU) + print("movzx %s,%s\n", p->syms[RX]->x.name +, preg(charreg)); + else if (p->op == CVSI) + print("movsx %s,%s\n", p->syms[RX]->x.name +, preg(shortreg)); + else if (p->op == CVSU) + print("movzx %s,%s\n", p->syms[RX]->x.name +, preg(shortreg)); + else if (p->op == CVIC || p->op == CVIS + || p->op == CVUC || p->op == CVUS) { + char *dst = shortreg[getregnum(p)]->x.name; + char *src = preg(shortreg); + if (dst != src) + print("mov %s,%s\n", dst, src); + } +} + +static void doarg(p) Node p; { + assert(p && p->syms[0]); + mkactual(4, p->syms[0]->u.c.v.i); +} +static void blkfetch(k, off, reg, tmp) +int k, off, reg, tmp; {} +static void blkstore(k, off, reg, tmp) +int k, off, reg, tmp; {} +static void blkloop(dreg, doff, sreg, soff, size, tmps) +int dreg, doff, sreg, soff, size, tmps[]; {} +static void local(p) Symbol p; { + if (isfloat(p->type)) + p->sclass = AUTO; + if (askregvar(p, rmap[ttob(p->type)]) == 0) + mkauto(p); +} +static void function(f, caller, callee, n) +Symbol f, callee[], caller[]; int n; { + int i; + + print("%s:\n", f->x.name); + print("push ebx\n"); + print("push esi\n"); + print("push edi\n"); + print("push ebp\n"); + print("mov ebp,esp\n"); +usedmask[0] = usedmask[1] = 0; +freemask[0] = freemask[1] = ~(unsigned)0; + offset = 16 + 4; + for (i = 0; callee[i]; i++) { + Symbol p = callee[i]; + Symbol q = caller[i]; + assert(q); + p->x.offset = q->x.offset = offset; + p->x.name = q->x.name = stringf("%d", p->x.offset); + p->sclass = q->sclass = AUTO; + offset += roundup(q->type->size, 4); + } + assert(caller[i] == 0); + offset = maxoffset = 0; + gencode(caller, callee); + framesize = roundup(maxoffset, 4); + if (framesize > 0) + print("sub esp,%d\n", framesize); + emitcode(); + print("mov esp,ebp\n"); + print("pop ebp\n"); + print("pop edi\n"); + print("pop esi\n"); + print("pop ebx\n"); + print("ret\n"); +} +static void defsymbol(p) Symbol p; { + if (p->scope >= LOCAL && p->sclass == STATIC) + p->x.name = stringf("L%d", genlabel(1)); + else if (p->generated) + p->x.name = stringf("$L%s", p->name); + else if (p->scope == GLOBAL || p->sclass == EXTERN) + /* CHANGE THIS FOR a.out */ +#if 0 + p->x.name = stringf("$_%s", p->name); +#else + p->x.name = stringf("$%s", p->name); +#endif + else if (p->scope == CONSTANTS + && (isint(p->type) || isptr(p->type)) + && p->name[0] == '0' && p->name[1] == 'x') + p->x.name = stringf("0%sH", &p->name[2]); + else + p->x.name = p->name; +} +static void address(q, p, n) Symbol q, p; int n; { + if (p->scope == GLOBAL + || p->sclass == STATIC || p->sclass == EXTERN) + q->x.name = stringf("%s%s%d", + p->x.name, n >= 0 ? "+" : "", n); + else { + q->x.offset = p->x.offset + n; + q->x.name = stringd(q->x.offset); + } +} +static void defconst(ty, v) int ty; Value v; { + switch (ty) { + case C: print("db %d\n", v.uc); return; + case S: print("dw %d\n", v.ss); return; + case I: print("dd %d\n", v.i ); return; + case U: print("dd 0%xH\n", v.u ); return; + case P: print("dd 0%xH\n", v.p ); return; + case F: + print("dd 0%xH\n", *(unsigned *)&v.f); + return; + case D: { + unsigned *p = (unsigned *)&v.d; + print("dd 0%xH,0%xH\n", p[swap], p[1 - swap]); + return; + } + } + assert(0); +} +static void defaddress(p) Symbol p; { + print("dd %s\n", p->x.name); +} +static void defstring(n, str) int n; char *str; { + char *s; + int inquote = 1; + + print("db '"); + + for (s = str; s < str + n; s++) + { + if ((*s & 0x7F) == *s && *s >= ' ' && *s != '\'') { + if (!inquote){ + print(", '"); + inquote = 1; + } + print("%c",*s); + } + else + { + if (inquote){ + print("', "); + inquote = 0; + } + else + print(", "); + print("%d",*s); + } + } + if (inquote) print("'"); + print("\n"); +} +static void export(p) Symbol p; { + print("[global %s]\n", p->x.name); +} +static void import(p) Symbol p; { + if (p->ref > 0) { + print("[extern %s]\n", p->x.name); + } +} +static void global(p) Symbol p; { + int i; + + if (p->u.seg == BSS) + print("resb ($-$$) & %d\n", + p->type->align > 4 ? 3 : p->type->align-1); + else + print("times ($-$$) & %d nop\n", + p->type->align > 4 ? 3 : p->type->align-1); + print("%s:\n", p->x.name); + if (p->u.seg == BSS) + print("resb %d\n", p->type->size); +} +static void space(n) int n; { + int i; + + if (cseg != BSS) + print("times %d db 0\n", n); +} +Interface x86nasmIR = { + 1, 1, 0, /* char */ + 2, 2, 0, /* short */ + 4, 4, 0, /* int */ + 4, 4, 1, /* float */ + 8, 4, 1, /* double */ + 4, 4, 0, /* T * */ + 0, 4, 0, /* struct; so that ARGB keeps stack aligned */ + 1, /* little_endian */ + 0, /* mulops_calls */ + 0, /* wants_callb */ + 1, /* wants_argb */ + 0, /* left_to_right */ + 0, /* wants_dag */ + address, + blockbeg, + blockend, + defaddress, + defconst, + defstring, + defsymbol, + emit, + export, + function, + gen, + global, + import, + local, + progbeg, + progend, + segment, + space, + 0, 0, 0, 0, 0, 0, 0, + {1, blkfetch, blkstore, blkloop, + _label, + _rule, + _nts, + _kids, + _opname, + _arity, + _string, + _templates, + _isinstruction, + _ntname, + emit2, + doarg, + target, + clobber, +} +}; diff --git a/misc/magic b/misc/magic new file mode 100644 index 0000000..0172f4a --- /dev/null +++ b/misc/magic @@ -0,0 +1,6 @@ +# Put the following lines in your /etc/magic file to get 'file' to recognise +# RDOFF Object Files + +0 string RDOFF RDOFF Object File +>5 byte >32 version %c (little endian) +>5 byte <32 version %d (big endian) diff --git a/misc/nasm.sl b/misc/nasm.sl new file mode 100644 index 0000000..be4d30b --- /dev/null +++ b/misc/nasm.sl @@ -0,0 +1,305 @@ +% This file defines a NASM editor mode for the JED editor. +% JED's home page is http://space.mit.edu/~davis/jed.html. +% +% To install, copy this file into your JED_LIBRARY directory +% (/usr/local/jed/lib or C:\JED\LIB or whatever), then add the +% following lines to your .jedrc or jed.rc file: +% autoload("nasm_mode", "nasm"); +% add_mode_for_extension("nasm", "asm"); +% (you can of course replace "asm" with whatever file extension +% you like to use for your NASM source files). + +variable Nasm_Instruction_Indent = 10; +variable Nasm_Comment_Column = 33; +variable Nasm_Comment_Space = 1; + +variable nasm_kw_2 = strcat("ahalaxbhblbpbtbxchclcscxdbdddhdidldqdsdtdwdxes", + "fsgsinjajbjcjejgjljojpjsjzorsispssto"); +variable nasm_kw_3 = strncat("a16a32aaaaadaamaasadcaddandbsfbsrbtcbtrbtscbw", + "cdqclccldclicmccmpcr0cr2cr3cr4cwddaadasdecdiv", + "dr0dr1dr2dr3dr6dr7eaxebpebxecxediedxequesiesp", + "farfldfsthltincintjaejbejgejlejmpjnajnbjncjne", + "jngjnljnojnpjnsjnzjpejpolarldslealeslfslgslsl", + "lssltrmm0mm1mm2mm3mm4mm5mm6mm7movmulnegnopnot", + "o16o32outpopporrclrcrrepretrolrorrsmsalsarsbb", + "segshlshrst0st1st2st3st4st5st6st7stcstdstistr", + "subtr3tr4tr5tr6tr7wrtxor", 9); +variable nasm_kw_4 = strncat("arplbytecallcltscwdeemmsfabsfaddfbldfchsfcom", + "fcosfdivfenifildfistfld1fldzfmulfnopfsinfstp", + "fsubftstfxamfxchidivimulinsbinsdinswint3into", + "invdiretjcxzjnaejnbejngejnlelahflgdtlidtlldt", + "lmswlocklongloopmovdmovqnearpandpopapopfpush", + "pxorreperepzresbresdreswretfretnsahfsetasetb", + "setcsetesetgsetlsetosetpsetssetzsgdtshldshrd", + "sidtsldtsmswtestverrverwwaitwordxaddxchg", 8); +variable nasm_kw_5 = strncat("boundbswapcmpsbcmpsdcmpswcpuiddwordenterf2xm1", + "faddpfbstpfclexfcompfdisifdivpfdivrffreefiadd", + "ficomfidivfimulfinitfistpfisubfldcwfldpifmulp", + "fpremfptanfsavefsqrtfstcwfstswfsubpfsubrfucom", + "fyl2xiretdiretwjecxzleavelodsblodsdlodswloope", + "loopzmovsbmovsdmovswmovsxmovzxoutsboutsdoutsw", + "paddbpadddpaddwpandnpopadpopawpopfdpopfwpslld", + "psllqpsllwpsradpsrawpsrldpsrlqpsrlwpsubbpsubd", + "psubwpushapushfqwordrdmsrrdtscrepnerepnzscasb", + "scasdscaswsetaesetbesetgesetlesetnasetnbsetnc", + "setnesetngsetnlsetnosetnpsetnssetnzsetpesetpo", + "shortstosbstosdstoswtimestwordwrmsrxlatb", 12); +variable nasm_kw_6 = strncat("fcomppfdivrpficompfidivrfisubrfldenvfldl2e", + "fldl2tfldlg2fldln2fpatanfprem1frstorfscale", + "fsetpmfstenvfsubrpfucompinvlpgloopneloopnz", + "paddsbpaddswpmulhwpmullwpsubsbpsubswpushad", + "pushawpushfdpushfwsetnaesetnbesetngesetnle", + "wbinvd", 6); +variable nasm_kw_7 = strncat("cmpxchgfdecstpfincstpfrndintfsincosfucompp", + "fxtractfyl2xp1paddusbpadduswpcmpeqbpcmpeqd", + "pcmpeqwpcmpgtbpcmpgtdpcmpgtwpmaddwdpsubusb", + "psubusw", 4); +variable nasm_kw_8 = "packssdwpacksswbpackuswb"; +variable nasm_kw_9 = strcat("cmpxchg8bpunpckhbwpunpckhdqpunpckhwdpunpcklbw", + "punpckldqpunpcklwd"); + +define nasm_is_kw { + variable word; + variable len; + variable list, min, max, pos, cmp; + + word = strlow(()); + len = strlen(word); + + switch (len) + { case 0: return 1; } + { case 2: list = nasm_kw_2; } + { case 3: list = nasm_kw_3; } + { case 4: list = nasm_kw_4; } + { case 5: list = nasm_kw_5; } + { case 6: list = nasm_kw_6; } + { case 7: list = nasm_kw_7; } + { case 8: list = nasm_kw_8; } + { case 9: list = nasm_kw_9; } + { pop(); return 0; } + + min = -1; + max = strlen(list) / len; + while (max - min >= 2) { + pos = (max + min) / 2; + cmp = strcmp(word, substr(list, pos * len + 1, len)); + if (cmp == 0) + return 1; % it's a keyword + else if (cmp < 0) + max = pos; % bottom half + else if (cmp > 0) + min = pos; % top half + } + return 0; +} + +define nasm_indent_line() { + variable word, len, e; + + e = eolp(); + + push_spot(); + EXIT_BLOCK { + pop_spot(); + if (what_column() <= Nasm_Instruction_Indent) + skip_white(); + } + + bol_skip_white(); + + if (orelse + {looking_at_char(';')} + {looking_at_char('#')} + {looking_at_char('[')}) { + bol_trim(); + pop_spot(); + EXIT_BLOCK { + } + return; + } + + push_mark(); + skip_chars("0-9a-zA-Z_."); + word = bufsubstr(); + + if (nasm_is_kw(word)) { + bol_trim(); + whitespace(Nasm_Instruction_Indent); + } else { + push_spot(); + bol_trim(); + pop_spot(); + len = strlen(word); + if (looking_at_char(':')) { + go_right_1(); + len++; + } + trim(); + if (e or not(eolp())) { + if (len >= Nasm_Instruction_Indent) { + pop(); + whitespace(1); + } else + whitespace(Nasm_Instruction_Indent - len); + if (e) { + pop_spot(); + eol(); + push_spot(); + } + } + } +} + +define nasm_newline_indent { + push_spot(); + bol_skip_white(); + if (eolp()) + trim(); + pop_spot(); + newline(); + nasm_indent_line(); +} + +define nasm_bol_self_ins { + push_spot(); + bskip_white(); + bolp(); + pop_spot(); + + call("self_insert_cmd"); + + % Grotty: force immediate update of the syntax highlighting. + insert_char('.'); + deln(left(1)); + + if (()) + nasm_indent_line(); +} + +define nasm_self_ins_ind { + call("self_insert_cmd"); + + % Grotty: force immediate update of the syntax highlighting. + insert_char('.'); + deln(left(1)); + + nasm_indent_line(); +} + +define nasm_insert_comment { + variable spc; + + bol_skip_white(); + if (looking_at_char(';')) { + bol_trim(); + go_right(1); + skip_white(); + return; + } else if (eolp()) { + bol_trim(); + insert("; "); + return; + } + + forever { + skip_chars("^;\n'\""); + if (looking_at_char('\'')) { + go_right_1(); + skip_chars("^'\n"); + !if (eolp()) + go_right_1(); + } else if (looking_at_char('\"')) { + go_right_1(); + skip_chars("^\"\n"); + !if (eolp()) + go_right_1(); + } else if (looking_at_char(';')) { + !if (bolp()) { + go_left_1(); + trim(); + !if (looking_at_char(';')) + go_right_1(); + } + break; + } else { + break; + } + } + spc = Nasm_Comment_Column - what_column(); + if (spc < Nasm_Comment_Space) + spc = Nasm_Comment_Space; + whitespace(spc); + if (eolp()) { + insert("; "); + } else { + go_right_1(); + skip_white(); + } +} + +$1 = "NASM"; +create_syntax_table($1); + +define_syntax (";", "", '%', $1); +define_syntax ("([", ")]", '(', $1); +define_syntax ('"', '"', $1); +define_syntax ('\'', '\'', $1); +define_syntax ("0-9a-zA-Z_.@#", 'w', $1); +define_syntax ("-+0-9a-fA-F.xXL", '0', $1); +define_syntax (",:", ',', $1); +define_syntax ('#', '#', $1); +define_syntax ("|^&<>+-*/%~", '+', $1); + +set_syntax_flags($1,1); + +#ifdef HAS_DFA_SYNTAX + +enable_highlight_cache("nasm.dfa", $1); +define_highlight_rule(";.*$", "comment", $1); +define_highlight_rule("[A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*", + "Knormal", $1); +define_highlight_rule("$([A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*)?", + "normal", $1); +define_highlight_rule("[0-9]+(\\.[0-9]*)?([Ee][\\+\\-]?[0-9]*)?", + "number", $1); +define_highlight_rule("[0-9]+[QqBb]", "number", $1); +define_highlight_rule("(0x|\\$[0-9A-Fa-f])[0-9A-Fa-f]*", "number", $1); +define_highlight_rule("[0-9A-Fa-f]+[Hh]", "number", $1); +define_highlight_rule("\"[^\"]*\"", "string", $1); +define_highlight_rule("\"[^\"]*$", "string", $1); +define_highlight_rule("'[^']*'", "string", $1); +define_highlight_rule("'[^']*$", "string", $1); +define_highlight_rule("[\\(\\)\\[\\],:]*", "delimiter", $1); +define_highlight_rule("[\\|\\^&<>\\+\\-\\*/%~]*", "operator", $1); +define_highlight_rule("^[ \t]*#", "PQpreprocess", $1); +define_highlight_rule("@[0-9A-Za-z_\\.]*", "keyword1", $1); +define_highlight_rule("[ \t]*", "normal", $1); +define_highlight_rule(".", "normal", $1); +build_highlight_table($1); + +#endif + +define_keywords_n($1, nasm_kw_2, 2, 0); +define_keywords_n($1, nasm_kw_3, 3, 0); +define_keywords_n($1, nasm_kw_4, 4, 0); +define_keywords_n($1, nasm_kw_5, 5, 0); +define_keywords_n($1, nasm_kw_6, 6, 0); +define_keywords_n($1, nasm_kw_7, 7, 0); +define_keywords_n($1, nasm_kw_8, 8, 0); +define_keywords_n($1, nasm_kw_9, 9, 0); + +!if (keymap_p ($1)) make_keymap ($1); +definekey("nasm_bol_self_ins", ";", $1); +definekey("nasm_bol_self_ins", "#", $1); +definekey("nasm_bol_self_ins", "[", $1); +definekey("nasm_self_ins_ind", ":", $1); +definekey("nasm_insert_comment", "^[;", $1); + +define nasm_mode { + set_mode("NASM", 4); + use_keymap ("NASM"); + use_syntax_table ("NASM"); + set_buffer_hook ("indent_hook", "nasm_indent_line"); + set_buffer_hook ("newline_indent_hook", "nasm_newline_indent"); + runhooks("nasm_mode_hook"); +} @@ -0,0 +1,79 @@ +/* names.c included source file defining instruction and register + * names for the Netwide [Dis]Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +static char *reg_names[] = { /* register names, as strings */ + "\0", "ah", "al", "ax", "bh", "bl", "bp", "bx", "ch", "cl", + "cr0", "cr2", "cr3", "cr4", "cs", "cx", "dh", "di", "dl", "dr0", + "dr1", "dr2", "dr3", "dr6", "dr7", "ds", "dx", "eax", "ebp", + "ebx", "ecx", "edi", "edx", "es", "esi", "esp", "fs", "gs", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "si", + "sp", "ss", "st0", "st1", "st2", "st3", "st4", "st5", "st6", + "st7", "tr3", "tr4", "tr5", "tr6", "tr7" +}; + +static char *insn_names[] = { /* instruction names, as strings */ + "aaa", "aad", "aam", "aas", "adc", "add", "and", "arpl", + "bound", "bsf", "bsr", "bswap", "bt", "btc", "btr", "bts", + "call", "cbw", "cdq", "clc", "cld", "cli", "clts", "cmc", "cmp", + "cmpsb", "cmpsd", "cmpsw", "cmpxchg", "cmpxchg8b", "cpuid", + "cwd", "cwde", "daa", "das", "db", "dd", "dec", "div", "dq", + "dt", "dw", "emms", "enter", "equ", "f2xm1", "fabs", "fadd", + "faddp", "fbld", "fbstp", "fchs", "fclex", "fcmovb", "fcmovbe", + "fcmove", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovnu", "fcmovu", + "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", + "fdisi", "fdiv", "fdivp", "fdivr", "fdivrp", "feni", "ffree", + "fiadd", "ficom", "ficomp", "fidiv", "fidivr", "fild", "fimul", + "fincstp", "finit", "fist", "fistp", "fisub", "fisubr", "fld", + "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", + "fldln2", "fldpi", "fldz", "fmul", "fmulp", "fnop", "fpatan", + "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", + "fscale", "fsetpm", "fsin", "fsincos", "fsqrt", "fst", "fstcw", + "fstenv", "fstp", "fstsw", "fsub", "fsubp", "fsubr", "fsubrp", + "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fucompp", + "fxam", "fxch", "fxtract", "fyl2x", "fyl2xp1", "hlt", "icebp", + "idiv", "imul", "in", "inc", "insb", "insd", "insw", "int", + "int1", "int01", "int3", "into", "invd", "invlpg", "iret", + "iretd", "iretw", "jcxz", "jecxz", "jmp", "lahf", "lar", "lds", + "lea", "leave", "les", "lfs", "lgdt", "lgs", "lidt", "lldt", + "lmsw", "loadall", "lodsb", "lodsd", "lodsw", "loop", "loope", + "loopne", "loopnz", "loopz", "lsl", "lss", "ltr", "mov", "movd", + "movq", "movsb", "movsd", "movsw", "movsx", "movzx", "mul", + "neg", "nop", "not", "or", "out", "outsb", "outsd", "outsw", + "packssdw", "packsswb", "packuswb", "paddb", "paddd", "paddsb", + "paddsw", "paddusb", "paddusw", "paddw", "pand", "pandn", + "pcmpeqb", "pcmpeqd", "pcmpeqw", "pcmpgtb", "pcmpgtd", + "pcmpgtw", "pmaddwd", "pmulhw", "pmullw", "pop", "popa", + "popad", "popaw", "popf", "popfd", "popfw", "por", "pslld", + "psllq", "psllw", "psrad", "psraw", "psrld", "psrlq", "psrlw", + "psubb", "psubd", "psubsb", "psubsw", "psubusb", "psubusw", + "psubw", "punpckhbw", "punpckhdq", "punpckhwd", "punpcklbw", + "punpckldq", "punpcklwd", "push", "pusha", "pushad", "pushaw", + "pushf", "pushfd", "pushfw", "pxor", "rcl", "rcr", "rdmsr", + "rdpmc", "rdtsc", "resb", "resd", "resq", "rest", "resw", "ret", + "retf", "retn", "rol", "ror", "rsm", "sahf", "sal", "salc", + "sar", "sbb", "scasb", "scasd", "scasw", "sgdt", "shl", "shld", + "shr", "shrd", "sidt", "sldt", "smsw", "stc", "std", "sti", + "stosb", "stosd", "stosw", "str", "sub", "test", "umov", "verr", + "verw", "wait", "wbinvd", "wrmsr", "xadd", "xchg", "xlatb", + "xor" +}; + +static char *icn[] = { /* conditional instructions */ + "cmov", "j", "set" +}; + +static int ico[] = { /* and the corresponding opcodes */ + I_CMOVcc, I_Jcc, I_SETcc +}; + +static char *conditions[] = { /* condition code names */ + "a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae", + "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np", + "ns", "nz", "o", "p", "pe", "po", "s", "z" +}; @@ -0,0 +1,648 @@ +/* The Netwide Assembler main program module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "parser.h" +#include "assemble.h" +#include "labels.h" +#include "outform.h" + +static void report_error (int, char *, ...); +static void parse_cmdline (int, char **); +static void assemble_file (char *); +static int getkw (char *buf, char **value); +static void register_output_formats(void); +static void usage(void); + +static char *obuf; +static char inname[FILENAME_MAX]; +static char outname[FILENAME_MAX]; +static char realout[FILENAME_MAX]; +static int lineno; /* for error reporting */ +static int pass; +static struct ofmt *ofmt = NULL; + +static FILE *ofile = NULL; +static int sb = 16; /* by default */ + +static long current_seg; +static struct RAA *offsets; +static long abs_offset; +#define OFFSET_DELTA 256 + +/* + * get/set current offset... + */ +#define get_curr_ofs (current_seg==NO_SEG?abs_offset:\ + raa_read(offsets,current_seg)) +#define set_curr_ofs(x) (current_seg==NO_SEG?(void)(abs_offset=(x)):\ + (void)(offsets=raa_write(offsets,current_seg,(x)))) + +static int want_usage; +static int terminate_after_phase; + +int main(int argc, char **argv) { + want_usage = terminate_after_phase = FALSE; + + nasm_set_malloc_error (report_error); + offsets = raa_init(); + + seg_init(); + + register_output_formats(); + + parse_cmdline(argc, argv); + + if (terminate_after_phase) { + if (want_usage) + usage(); + return 1; + } + + if (!*outname) { + ofmt->filename (inname, realout, report_error); + strcpy(outname, realout); + } + + ofile = fopen(outname, "wb"); + if (!ofile) { + report_error (ERR_FATAL | ERR_NOFILE, + "unable to open output file `%s'", outname); + } + ofmt->init (ofile, report_error, define_label); + assemble_file (inname); + if (!terminate_after_phase) { + ofmt->cleanup (); + cleanup_labels (); + } + fclose (ofile); + if (terminate_after_phase) + remove(outname); + + if (want_usage) + usage(); + + return 0; +} + +static void parse_cmdline(int argc, char **argv) { + char *param; + + *inname = *outname = '\0'; + while (--argc) { + char *p = *++argv; + if (p[0]=='-') { + switch (p[1]) { + case 'o': /* these parameters take values */ + case 'f': + if (p[2]) /* the parameter's in the option */ + param = p+2; + else if (!argv[1]) { + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "option `-%c' requires an argument", + p[1]); + break; + } else + --argc, param = *++argv; + if (p[1]=='o') { /* output file */ + strcpy (outname, param); + } else if (p[1]=='f') { /* output format */ + ofmt = ofmt_find(param); + if (!ofmt) { + report_error (ERR_FATAL | ERR_NOFILE | ERR_USAGE, + "unrecognised output format `%s'", + param); + } + } + break; + case 'h': + fprintf(stderr, + "usage: nasm [-o outfile] [-f format] filename\n"); + fprintf(stderr, + " or nasm -r for version info\n\n"); + fprintf(stderr, + "valid output formats for -f are" + " (`*' denotes default):\n"); + ofmt_list(ofmt); + exit (0); /* never need usage message here */ + break; + case 'r': + fprintf(stderr, "NASM version %s\n", NASM_VER); + exit (0); /* never need usage message here */ + break; + default: + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "unrecognised option `-%c'", + p[1]); + break; + } + } else { + if (*inname) { + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "more than one input file specified"); + } else + strcpy(inname, p); + } + } + if (!*inname) + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "no input file specified"); +} + +/* used by error function to report location */ +static char currentfile[FILENAME_MAX]; + +static void assemble_file (char *fname) { + FILE *fp = fopen (fname, "r"); + FILE *oldfile = NULL; /* jrh - used when processing include files */ + int oldfileline = 0; + char *value, *p, buffer[1024+2]; /* maximum line length defined here */ + insn output_ins; + int i, seg, rn_error; + + if (!fp) { /* couldn't open file */ + report_error (ERR_FATAL | ERR_NOFILE, + "unable to open input file `%s'", fname); + return; + } + + init_labels (); + strcpy(currentfile,fname); + + /* pass one */ + pass = 1; + current_seg = ofmt->section(NULL, pass, &sb); + lineno = 0; + while (1) { + if (! fgets(buffer, sizeof(buffer), fp)) { /* EOF on current file */ + if (oldfile) { + fclose(fp); + fp = oldfile; + lineno = oldfileline; + strcpy(currentfile,fname); + oldfile = NULL; + continue; + } + else + break; + } + lineno++; + if (buffer[strlen(buffer)-1] == '\n') { + buffer[strlen(buffer)-1] = '\0'; + } else { + /* + * We have a line that's too long. Throw an error, read + * to EOL, and ignore the line for assembly purposes. + */ + report_error (ERR_NONFATAL, "line is longer than %d characters", + sizeof(buffer)-2); + while (fgets(buffer, sizeof(buffer), fp) && + buffer[strlen(buffer)-1] != '\n'); + continue; /* read another line */ + } + + /* here we parse our directives; this is not handled by the 'real' + * parser. */ + + if ( (i = getkw (buffer, &value)) ) { + switch (i) { + case 1: /* [SEGMENT n] */ + seg = ofmt->section (value, pass, &sb); + if (seg == NO_SEG) { + report_error (ERR_NONFATAL, + "segment name `%s' not recognised", + value); + } else { + current_seg = seg; + } + break; + case 2: /* [EXTERN label] */ + if (*value == '$') + value++; /* skip initial $ if present */ + declare_as_global (value, report_error); + define_label (value, seg_alloc(), 0L, ofmt, report_error); + break; + case 3: /* [BITS bits] */ + switch (atoi(value)) { + case 16: + case 32: + sb = atoi(value); + break; + default: + report_error(ERR_NONFATAL, + "`%s' is not a valid argument to [BITS]", + value); + break; + } + break; + case 4: /* [INC file] */ + oldfile = fp; + oldfileline = lineno; + lineno = 0; + strcpy(currentfile,value); + fp = fopen(value,"r"); + if (!fp) { + lineno = oldfileline; + fp = oldfile; + strcpy(currentfile,fname); + report_error (ERR_FATAL, + "unable to open include file `%s'\n", + value); + } + break; + case 5: /* [GLOBAL symbol] */ + if (*value == '$') + value++; /* skip initial $ if present */ + declare_as_global (value, report_error); + break; + case 6: /* [COMMON symbol size] */ + p = value; + while (*p && !isspace(*p)) + p++; + if (*p) { + long size; + + while (*p && isspace(*p)) + *p++ = '\0'; + size = readnum (p, &rn_error); + if (rn_error) + report_error (ERR_NONFATAL, "invalid size specified" + " in COMMON declaration"); + else + define_common (value, seg_alloc(), size, + ofmt, report_error); + } else + report_error (ERR_NONFATAL, "no size specified in" + " COMMON declaration"); + break; + case 7: /* [ABSOLUTE address] */ + current_seg = NO_SEG; + abs_offset = readnum(value, &rn_error); + if (rn_error) { + report_error (ERR_NONFATAL, "invalid address specified" + " for ABSOLUTE directive"); + abs_offset = 0x100;/* don't go near zero in case of / */ + } + break; + default: + if (!ofmt->directive (buffer+1, value, 1)) + report_error (ERR_NONFATAL, "unrecognised directive [%s]", + buffer+1); + break; + } + } else { + long offs = get_curr_ofs; + parse_line (current_seg, offs, lookup_label, + 1, buffer, &output_ins, ofmt, report_error); + if (output_ins.opcode == I_EQU) { + /* + * Special `..' EQUs get processed in pass two. + */ + if (!output_ins.label) + report_error (ERR_NONFATAL, + "EQU not preceded by label"); + else if (output_ins.label[0] != '.' || + output_ins.label[1] != '.') { + if (output_ins.operands == 1 && + (output_ins.oprs[0].type & IMMEDIATE)) { + define_label (output_ins.label, + output_ins.oprs[0].segment, + output_ins.oprs[0].offset, + ofmt, report_error); + } else if (output_ins.operands == 2 && + (output_ins.oprs[0].type & IMMEDIATE) && + (output_ins.oprs[0].type & COLON) && + output_ins.oprs[0].segment == NO_SEG && + (output_ins.oprs[1].type & IMMEDIATE) && + output_ins.oprs[1].segment == NO_SEG) { + define_label (output_ins.label, + output_ins.oprs[0].offset | SEG_ABS, + output_ins.oprs[1].offset, + ofmt, report_error); + } else + report_error(ERR_NONFATAL, "bad syntax for EQU"); + } + } else { + if (output_ins.label) + define_label (output_ins.label, + current_seg, offs, + ofmt, report_error); + offs += insn_size (current_seg, offs, sb, + &output_ins, report_error); + set_curr_ofs (offs); + } + cleanup_insn (&output_ins); + } + } + + if (terminate_after_phase) { + fclose(ofile); + remove(outname); + if (want_usage) + usage(); + exit (1); + } + + /* pass two */ + pass = 2; + rewind (fp); + current_seg = ofmt->section(NULL, pass, &sb); + raa_free (offsets); + offsets = raa_init(); + lineno = 0; + while (1) { + if (!fgets(buffer, sizeof(buffer), fp)) { + if (oldfile) { + fclose(fp); + fp = oldfile; + lineno = oldfileline; + strcpy(currentfile,fname); + oldfile = NULL; + continue; + } else + break; + } + lineno++; + if (buffer[strlen(buffer)-1] == '\n') + buffer[strlen(buffer)-1] = '\0'; + else + report_error (ERR_PANIC, + "too-long line got through from pass one"); + + /* here we parse our directives; this is not handled by + * the 'real' parser. */ + + if ( (i = getkw (buffer, &value)) ) { + switch (i) { + case 1: /* [SEGMENT n] */ + seg = ofmt->section (value, pass, &sb); + if (seg == NO_SEG) { + report_error (ERR_PANIC, + "invalid segment name on pass two"); + } else + current_seg = seg; + break; + case 2: /* [EXTERN label] */ + break; + case 3: /* [BITS bits] */ + switch (atoi(value)) { + case 16: + case 32: + sb = atoi(value); + break; + default: + report_error(ERR_PANIC, + "invalid [BITS] value on pass two", + value); + break; + } + break; + case 4: + oldfile = fp; + oldfileline = lineno; + lineno = 0; + strcpy(currentfile,value); + fp = fopen(value,"r"); + if (!fp) { + lineno = oldfileline; + fp = oldfile; + strcpy(currentfile,fname); + /* + * We don't report this error in the PANIC + * class, even though we might expect to have + * already picked it up during pass one, + * because of the tiny chance that some other + * process may have removed the include file + * between the passes. + */ + report_error (ERR_FATAL, + "unable to open include file `%s'\n", + value); + } + break; + case 5: /* [GLOBAL symbol] */ + break; + case 6: /* [COMMON symbol size] */ + break; + case 7: /* [ABSOLUTE addr] */ + current_seg = NO_SEG; + abs_offset = readnum(value, &rn_error); + if (rn_error) + report_error (ERR_PANIC, "invalid ABSOLUTE address " + "in pass two"); + break; + default: + if (!ofmt->directive (buffer+1, value, 2)) + report_error (ERR_PANIC, "invalid directive on pass two"); + break; + } + } else { + long offs = get_curr_ofs; + parse_line (current_seg, offs, lookup_label, 2, + buffer, &output_ins, ofmt, report_error); + obuf = buffer; + if (output_ins.label) + define_label_stub (output_ins.label, report_error); + if (output_ins.opcode == I_EQU) { + /* + * Special `..' EQUs get processed here. + */ + if (output_ins.label[0] == '.' && + output_ins.label[1] == '.') { + if (output_ins.operands == 1 && + (output_ins.oprs[0].type & IMMEDIATE)) { + define_label (output_ins.label, + output_ins.oprs[0].segment, + output_ins.oprs[0].offset, + ofmt, report_error); + } else if (output_ins.operands == 2 && + (output_ins.oprs[0].type & IMMEDIATE) && + (output_ins.oprs[0].type & COLON) && + output_ins.oprs[0].segment == NO_SEG && + (output_ins.oprs[1].type & IMMEDIATE) && + output_ins.oprs[1].segment == NO_SEG) { + define_label (output_ins.label, + output_ins.oprs[0].offset | SEG_ABS, + output_ins.oprs[1].offset, + ofmt, report_error); + } else + report_error(ERR_NONFATAL, "bad syntax for EQU"); + } + } + offs += assemble (current_seg, offs, sb, + &output_ins, ofmt, report_error); + cleanup_insn (&output_ins); + set_curr_ofs (offs); + } + } +} + +static int getkw (char *buf, char **value) { + char *p, *q; + + if (*buf!='[') + return 0; + p = buf; + while (*p && *p != ']') p++; + if (!*p) + return 0; + q = p++; + while (*p && *p != ';') { + if (!isspace(*p)) + return 0; + p++; + } + q[1] = '\0'; + + p = buf+1; + while (*buf && *buf!=' ' && *buf!=']' && *buf!='\t') + buf++; + if (*buf==']') { + *buf = '\0'; + *value = buf; + } else { + *buf++ = '\0'; + *value = buf; + while (*buf!=']') buf++; + *buf++ = '\0'; + } + for (q=p; *q; q++) + *q = tolower(*q); + if (!strcmp(p, "segment") || !strcmp(p, "section")) + return 1; + if (!strcmp(p, "extern")) + return 2; + if (!strcmp(p, "bits")) + return 3; + if (!strcmp(p, "inc") || !strcmp(p, "include")) + return 4; + if (!strcmp(p, "global")) + return 5; + if (!strcmp(p, "common")) + return 6; + if (!strcmp(p, "absolute")) + return 7; + return -1; +} + +static void report_error (int severity, char *fmt, ...) { + va_list ap; + + if (severity & ERR_NOFILE) + fputs ("nasm: ", stderr); + else + fprintf (stderr, "%s:%d: ", currentfile, lineno); + + if ( (severity & ERR_MASK) == ERR_WARNING) + fputs ("warning: ", stderr); + else if ( (severity & ERR_MASK) == ERR_PANIC) + fputs ("panic: ", stderr); + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + fputc ('\n', stderr); + + if (severity & ERR_USAGE) + want_usage = TRUE; + + switch (severity & ERR_MASK) { + case ERR_WARNING: + /* no further action, by definition */ + break; + case ERR_NONFATAL: + terminate_after_phase = TRUE; + break; + case ERR_FATAL: + fclose(ofile); + remove(outname); + if (want_usage) + usage(); + exit(1); /* instantly die */ + break; /* placate silly compilers */ + case ERR_PANIC: + abort(); /* panic and dump core */ + break; + } +} + +static void usage(void) { + fputs("type `nasm -h' for help\n", stderr); +} + +static void register_output_formats(void) { + /* Flat-form binary format */ +#ifdef OF_BIN + extern struct ofmt of_bin; +#endif + /* Unix formats: a.out, COFF, ELF */ +#ifdef OF_AOUT + extern struct ofmt of_aout; +#endif +#ifdef OF_COFF + extern struct ofmt of_coff; +#endif +#ifdef OF_ELF + extern struct ofmt of_elf; +#endif + /* Linux strange format: as86 */ +#ifdef OF_AS86 + extern struct ofmt of_as86; +#endif + /* DOS formats: OBJ, Win32 */ +#ifdef OF_OBJ + extern struct ofmt of_obj; +#endif +#ifdef OF_WIN32 + extern struct ofmt of_win32; +#endif +#ifdef OF_RDF + extern struct ofmt of_rdf; +#endif +#ifdef OF_DBG /* debug format must be included specifically */ + extern struct ofmt of_dbg; +#endif + +#ifdef OF_BIN + ofmt_register (&of_bin); +#endif +#ifdef OF_AOUT + ofmt_register (&of_aout); +#endif +#ifdef OF_COFF + ofmt_register (&of_coff); +#endif +#ifdef OF_ELF + ofmt_register (&of_elf); +#endif +#ifdef OF_AS86 + ofmt_register (&of_as86); +#endif +#ifdef OF_OBJ + ofmt_register (&of_obj); +#endif +#ifdef OF_WIN32 + ofmt_register (&of_win32); +#endif +#ifdef OF_RDF + ofmt_register (&of_rdf); +#endif +#ifdef OF_DBG + ofmt_register (&of_dbg); +#endif + /* + * set the default format + */ + ofmt = &OF_DEFAULT; +} diff --git a/nasm.doc b/nasm.doc new file mode 100644 index 0000000..dd2073b --- /dev/null +++ b/nasm.doc @@ -0,0 +1,996 @@ + The Netwide Assembler, NASM + =========================== + +Introduction +============ + +The Netwide Assembler grew out of an idea on comp.lang.asm.x86 (or +possibly alt.lang.asm, I forget which), which was essentially that +there didn't seem to be a good free x86-series assembler around, and +that maybe someone ought to write one. + +- A86 is good, but not free, and in particular you don't get any + 32-bit capability until you pay. It's DOS only, too. + +- GAS is free, and ports over DOS/Unix, but it's not very good, + since it's designed to be a back end to gcc, which always feeds it + correct code. So its error checking is minimal. Also its syntax is + horrible, from the point of view of anyone trying to actually + _write_ anything in it. Plus you can't write 16-bit code in it. + +- AS86 is Linux specific, and (my version at least) doesn't seem to + have much (or any) documentation. + +- MASM isn't very good. And it's expensive. And it runs only under + DOS. + +- TASM is better, but still strives for MASM compatibility, which + means millions of directives and tons of red tape. And its syntax + is essentially MASM's, with the contradictions and quirks that + entails (although it sorts out some of those by means of Ideal + mode). It's expensive too. And it's DOS only. + +So here, for your coding pleasure, is NASM. At present it's still in +prototype stage - we don't promise that it can outperform any of +these assemblers. But please, _please_ send us bug reports and fixes +and anything else you can get your hands on, and we'll improve it +out of all recognition. Again. + +Please see the file `Licence' for the legalese. + +Getting Started: Installation +============================= + +NASM is distributed in source form, in what we hope is totally +ANSI-compliant C. It uses no non-portable code at all, that we know +of. It ought to compile without change on any system you care to try +it on. We also supply a pre-compiled 16-bit DOS binary. + +To install it, edit the Makefile to describe your C compiler, and +type `make'. Then copy the binary to somewhere on your path. That's +all - NASM relies on no files other than its own executable. +Although if you're on a Unix system, you may also want to install +the NASM manpage (`nasm.1'). You may also want to install the binary +and manpage for the Netwide Disassembler, NDISASM (also see +`ndisasm.doc'). + +Running NASM +============ + +To assemble a file, you issue a command of the form + + nasm -f <format> <filename> [-o <output>] + +For example, + + nasm -f elf myfile.asm + +will assemble `myfile.asm' into an ELF object file `myfile.o'. And + + nasm -f bin myfile.asm -o myfile.com + +will assemble `myfile.asm' into a raw binary program `myfile.com'. + +To get usage instructions from NASM, try typing `nasm -h'. This will +also list the available output file formats, and what they are. + +If you use Linux but aren't sure whether your system is a.out or +ELF, type `file /usr/bin/nasm' or wherever you put the NASM binary. +If it says something like + +/usr/bin/nasm: ELF 32-bit LSB executable i386 (386 and up) Version 1 + +then your system is ELF, and you should use `-f elf' when you want +NASM to produce Linux object files. If it says + +/usr/bin/nasm: Linux/i386 demand-paged executable (QMAGIC) + +or something similar, your system is a.out, and you should use `-f +aout' instead. + +Like Unix compilers and assemblers, NASM is silent unless it goes +wrong: you won't see any output at all, unless it gives error +messages. + +Writing Programs with NASM +========================== + +Each line of a NASM source file should contain some combination of +the four fields + +LABEL: INSTRUCTION OPERANDS ; COMMENT + +`LABEL' defines a label pointing to that point in the source. There +are no restrictions on white space: labels may have white space +before them, or not, as you please. The colon after the label is +also optional. + +Valid characters in labels are letters, numbers, `_', `$', `#', `@', +`~', `?', and `.'. The only characters which may be used as the +_first_ character of an identifier are letters, `_' and `?', and +(with special meaning: see `Local Labels') `.'. An identifier may +also be prefixed with a $ sign to indicate that it is intended to be +read as an identifier and not a reserved word; thus, if some other +module you are linking with defines a symbol `eax', you can refer to +`$eax' in NASM code to distinguish it from the register name. + +`INSTRUCTION' can be any machine opcode (Pentium and P6 opcodes, FPU +opcodes, MMX opcodes and even undocumented opcodes are all +supported). The instruction may be prefixed by LOCK, REP, REPE/REPZ +or REPNE/REPNZ, in the usual way. Explicit address-size and operand- +size prefixes A16, A32, O16 and O32 are provided - one example of +their use is given in the `Unusual Instruction Sizes' section below. +You can also use a segment register as a prefix: coding `es mov +[bx],ax' is equivalent to coding `mov [es:bx],ax'. We recommend the +latter syntax, since it is consistent with other syntactic features +of the language, but for instructions such as `lodsb' there isn't +anywhere to put a segment override except as a prefix. This is why +we support it. + +The `INSTRUCTION' field may also contain some pseudo-opcodes: see +the section on pseudo-opcodes for details. + +`OPERANDS' can be nonexistent, or huge, depending on the +instruction, of course. When operands are registers, they are given +simply as register names: `eax', `ss', `di' for example. NASM does +_not_ use the GAS syntax, in which register names are prefixed by a +`%' sign. Operands may also be effective addresses, or they may be +constants or expressions. See the separate sections on these for +details. + +`COMMENT' is anything after the first semicolon on the line, +excluding semicolons inside quoted strings. + +Of course, all these fields are optional: the presence or absence of +the OPERANDS field is required by the nature of the INSTRUCTION +field, but any line may contain a LABEL or not, may contain an +INSTRUCTION or not, and may contain a COMMENT or not, independently +of each other. + +Lines may also contain nothing but a directive: see `Assembler +Directives' below for details. + +NASM can currently not handle any line longer than 1024 characters. +This may be fixed in a future release. + +Floating Point Instructions +=========================== + +NASM has support for assembling FPU opcodes. However, its syntax is +not necessarily the same as anyone else's. + +NASM uses the notation `st0', `st1', etc. to denote the FPU stack +registers. NASM also accepts a wide range of single-operand and +two-operand forms of the instructions. For people who wish to use +the single-operand form exclusively (this is in fact the `canonical' +form from NASM's point of view, in that it is the form produced by +the Netwide Disassembler), there is a TO keyword which makes +available the opcodes which cannot be so easily accessed by one +operand. Hence: + + fadd st1 ; this sets st0 := st0 + st1 + fadd st0,st1 ; so does this + fadd st1,st0 ; this sets st1 := st1 + st0 + fadd to st1 ; so does this + +It's also worth noting that the FPU instructions that reference +memory must use the prefixes DWORD, QWORD or TWORD to indicate what +size of memory operand they refer to. + +NASM, in keeping with our policy of not trying to second-guess the +programmer, will _never_ automatically insert WAIT instructions into +your code stream. You must code WAIT yourself before _any_ +instruction that needs it. (Of course, on 286 processors or above, +it isn't needed anyway...) + +NASM supports specification of floating point constants by means of +`dd' (single precision), `dq' (double precision) and `dt' (extended +precision). Floating-point _arithmetic_ is not done, due to +portability constraints (not all platforms on which NASM can be run +support the same floating point types), but simple constants can be +specified. For example: + +gamma dq 0.5772156649 ; Euler's constant + +Pseudo-Opcodes +============== + +Pseudo-opcodes are not real x86 machine opcodes, but are used in the +instruction field anyway because that's the most convenient place to +put them. The current pseudo-opcodes are DB, DW and DD, their +uninitialised counterparts RESB, RESW and RESD, the EQU command, and +the TIMES prefix. + +DB, DW and DD work as you would expect: they can each take an +arbitrary number of operands, and when assembled, they generate +nothing but those operands. All three of them can take string +constants as operands, which no other instruction can currently do. +See the `Constants' section for details about string constants. + +RESB, RESW and RESD are designed to be used in the BSS section of a +module: they declare _uninitialised_ storage space. Each takes a +single operand, which is the number of bytes, words or doublewords +to reserve. We do not support the MASM/TASM syntax of reserving +uninitialised space by writing `DW ?' or similar: this is what we do +instead. (But see `Critical Expressions' for a caveat on the nature +of the operand.) + +(An aside: if you want to be able to write `DW ?' and have something +vaguely useful happen, you can always code `? EQU 0'...) + +EQU defines a symbol to a specified value: when EQU is used, the +LABEL field must be present. The action of EQU is to define the +given label name to the value of its (only) operand. This definition +is absolute, and cannot change later. So, for example, + +message db 'hello, world' +msglen equ $-message + +defines `msglen' to be the constant 12. `msglen' may not then be +redefined later. This is not a preprocessor definition either: the +value of `msglen' is evaluated _once_, using the value of `$' (see +the section `Expressions' for details of `$') at the point of +definition, rather than being evaluated wherever it is referenced +and using the value of `$' at the point of reference. Note that the +caveat in `Critical Expressions' applies to EQU too, at the moment. + +Finally, the TIMES prefix causes the instruction to be assembled +multiple times. This is partly NASM's equivalent of the DUP syntax +supported by MASM-compatible assemblers, in that one can do + +zerobuf: times 64 db 0 + +or similar, but TIMES is more versatile than that. TIMES takes not +just a numeric constant, but a numeric _expression_, so one can do +things like + +buffer: db 'hello, world' + times 64-$+buffer db ' ' + +which will store exactly enough spaces to make the total length of +`buffer' up to 64. (See the section `Critical Expressions' for a +caveat on the use of TIMES.) Finally, TIMES can be applied to +ordinary opcodes, so you can code trivial unrolled loops in it: + + times 100 movsb + +Note that there is no effective difference between `times 100 resb +1' and `resb 100'. + +Effective Addresses +=================== + +NASM's addressing scheme is very simple, although it can involve +more typing than other assemblers. Where other assemblers +distinguish between a _variable_ (label declared without a colon) +and a _label_ (declared with a colon), and use different means of +addressing the two, NASM is totally consistent. + +To refer to the contents of a memory location, square brackets are +required. This applies to simple variables, computed offsets, +segment overrides, effective addresses - _everything_. E.g.: + +wordvar dw 123 + mov ax,[wordvar] + mov ax,[wordvar+1] + mov ax,[es:wordvar+bx] + +NASM does _not_ support the various strange syntaxes used by MASM +and others, such as + + mov ax,wordvar ; this is legal, but means something else + mov ax,es:wordvar[bx] ; not even slightly legal + es mov ax,wordvar[1] ; the prefix is OK, but not the rest + +If no square brackets are used, NASM interprets label references to +mean the address of the label. Hence there is no need for MASM's +OFFSET keyword, but + + mov ax,wordvar + +loads AX with the _address_ of the variable `wordvar'. + +More complicated effective addresses are handled by enclosing them +within square brackets as before: + + mov eax,[ebp+2*edi+offset] + mov ax,[bx+di+8] + +NASM will cope with some fairly strange effective addresses, if you +try it: provided your effective address expression evaluates +_algebraically_ to something that the instruction set supports, it +will be able to assemble it. For example, + + mov eax,[ebx*5] ; actually assembles to [ebx+ebx*4] + mov ax,[bx-si+2*si] ; actually assembles to [bx+si] + +will both work. + +There is an ambiguity in the instruction set, which allows two forms +of 32-bit effective address with equivalent meaning: + + mov eax,[2*eax+0] + mov eax,[eax+eax] + +These two expressions clearly refer to the same address. The +difference is that the first one, if assembled `as is', requires a +four-byte offset to be stored as part of the instruction, so it +takes up more space. NASM will generate the second (smaller) form +for both of the above instructions, in an effort to save space. +There is not, currently, any means for forcing NASM to generate the +larger form of the instruction. + +Mixing 16 and 32 Bit Code: Unusual Instruction Sizes +==================================================== + +A number of assemblers seem to have trouble assembling instructions +that use a different operand or address size from the one they are +expecting; as86 is a good example, even though the Linux kernel boot +process (which is assembled using as86) needs several such +instructions and as86 can't do them. + +Instructions such as `mov eax,2' in 16-bit mode are easy, of course, +and NASM can do them just as well as any other assembler. The +difficult instructions are things like far jumps. + +Suppose you are in a 16-bit segment, in protected mode, and you want +to execute a far jump to a point in a 32-bit segment. You need to +code a 32-bit far jump in a 16-bit segment; not many assemblers I +know of will easily support this. NASM can, by means of the `word' +and `dword' specifiers. So you can code + + call 1234h:5678h ; this uses the default segment size + call word 1234h:5678h ; this is guaranteed to be 16-bit + call dword 1234h:56789ABCh ; and this is guaranteed 32-bit + +and NASM will generate correct code for them. + +Similarly, if you are coding in a 16-bit code segment, but trying to +access memory in a 32-bit data segment, your effective addresses +will want to be 32-bit. Of course as soon as you specify an +effective address containing a 32-bit register, like `[eax]', the +addressing is forced to be 32-bit anyway. But if you try to specify +a simple offset, such as `[label]' or `[0x10000]', you will get the +default address size, which in this case will be wrong. However, +NASM allows you to code `[dword 0x10000]' to force a 32-bit address +size, or conversely `[word wlabel]' to force 16 bits. + +Be careful not to confuse `word' and `dword' _inside_ the square +brackets with _outside_: consider the instruction + + mov word [dword 0x123456],0x7890 + +which moves 16 bits of data to an address specified by a 32-bit +offset. There is no contradiction between the `word' and `dword' in +this instruction, since they modify different aspects of the +functionality. Or, even more confusingly, + + call dword far [fs:word 0x4321] + +which takes an address specified by a 16-bit offset, and extracts a +48-bit DWORD FAR pointer from it to call. + +Using this effective-address syntax, the `dword' or `word' override +may come before or after the segment override if any: NASM isn't +fussy. Hence: + + mov ax,[fs:dword 0x123456] + mov ax,[dword fs:0x123456] + +are equivalent forms, and generate the same code. + +The LOOP instruction comes in strange sizes, too: in a 16-bit +segment it uses CX as its count register by default, and in a 32-bit +segment it uses ECX. But it's possible to do either one in the other +segment, and NASM will cope by letting you specify the count +register as a second operand: + + loop label ; uses CX or ECX depending on mode + loop label,cx ; always uses CX + loop label,ecx ; always uses ECX + +Finally, the string instructions LODSB, STOSB, MOVSB, CMPSB, SCASB, +INSB, and OUTSB can all have strange address sizes: typically, in a +16-bit segment they read from [DS:SI] and write to [ES:DI], and in a +32-bit segment they read from [DS:ESI] and write to [ES:EDI]. +However, this can be changed by the use of the explicit address-size +prefixes `a16' and `a32'. These prefixes generate null code if used +in the same size segment as they specify, but generate an 0x67 +prefix otherwise. Hence `a16' generates no code in a 16-bit segment, +but 0x67 in a 32-bit one, and vice versa. So `a16 lodsb' will always +generate code to read a byte from [DS:SI], no matter what the size +of the segment. There are also explicit operand-size override +prefixes, `o16' and `o32', which will optionally generate 0x66 +bytes, but these are provided for completeness and should never have +to be used. + +Constants +========= + +NASM can accept three kinds of constant: _numeric_, _character_ and +_string_ constants. + +Numeric constants are simply numbers. NASM supports a variety of +syntaxes for expressing numbers in strange bases: you can do any of + + 100 ; this is decimal + 0x100 ; hex + 100h ; hex as well + $100 ; hex again + 100q ; octal + 100b ; binary + +NASM does not support A86's syntax of treating anything with a +leading zero as hex, nor does it support the C syntax of treating +anything with a leading zero as octal. Leading zeros make no +difference to NASM. (Except that, as usual, if you have a hex +constant beginning with a letter, and you want to use the trailing-H +syntax to represent it, you have to use a leading zero so that NASM +will recognise it as a number instead of a label.) + +The `x' in `0x100', and the trailing `h', `q' and `b', may all be +upper case if you want. + +Character constants consist of up to four characters enclosed in +single or double quotes. No escape character is defined for +including the quote character itself: if you want to declare a +character constant containing a double quote, enclose it in single +quotes, and vice versa. + +Character constants' values are worked out in terms of a +little-endian computer: if you code + + mov eax,'abcd' + +then if you were to examine the binary output from NASM, it would +contain the visible string `abcd', which of course means that the +actual value loaded into EAX would be 0x64636261, not 0x61626364. + +String constants are like character constants, only more so: if a +character constant appearing as operand to a DB, DW or DD is longer +than the word size involved (1, 2 or 4 respectively), it will be +treated as a string constant instead, which is to say the +concatenation of separate character constants. + +For example, + + db 'hello, world' + +declares a twelve-character string constant. And + + dd 'dontpanic' + +(a string constant) is equivalent to writing + + dd 'dont','pani','c' + +(three character constants), so that what actually gets assembled is +equivalent to + + db 'dontpanic',0,0,0 + +(It's worth noting that one of the reasons for the reversal of +character constants is so that the instruction `dw "ab"' has the +same meaning whether "ab" is treated as a character constant or a +string constant. Hence there is less confusion.) + +Expressions +=========== + +Expressions in NASM can be formed of the following operators: `|' +(bitwise OR), `^' (bitwise XOR), `&' (bitwise AND), `<<' and `>>' +(logical bit shifts), `+', `-', `*' (ordinary addition, subtraction +and multiplication), `/', `%' (unsigned division and modulo), `//', +`%%' (signed division and modulo), `~' (bitwise NOT), and the +operators SEG and WRT (see `SEG and WRT' below). + +The order of precedence is: + +| lowest +^ +& +<< >> +binary + and - +* / % // %% +unary + and -, ~, SEG highest + +As usual, operators within a precedence level associate to the left +(i.e. `2-3-4' evaluates the same way as `(2-3)-4'). + +A form of algebra is done by NASM when evaluating expressions: I +have already stated that an effective address expression such as +`[EAX*6-EAX]' will be recognised by NASM as algebraically equivalent +to `[EAX*4+EAX]', and assembled as such. In addition, algebra can be +done on labels as well: `label2*2-label1' is an acceptable way to +define an address as far beyond `label2' as `label1' is before it. +(In less algebraically capable assemblers, one might have to write +that as `label2 + (label2-label1)', where the value of every +sub-expression is either a valid address or a constant. NASM can of +course cope with that version as well.) + +Expressions may also contain the special token `$', known as a Here +token, which always evaluates to the address of the current assembly +point. (That is, the address of the assembly point _before_ the +current instruction gets assembled.) The special token `$$' +evaluates to the address of the beginning of the current section; +this can be used for alignment, as shown below: + + times ($$-$) & 3 nop ; pad with NOPs to 4-byte boundary + +SEG and WRT +=========== + +NASM contains the capability for its object file formats (currently, +only `obj' makes use of this) to permit programs to directly refer +to the segment-base values of their segments. This is achieved +either by the object format defining the segment names as symbols +(`obj' does this), or by the use of the SEG operator. + +SEG is a unary prefix operator which, when applied to a symbol +defined in a segment, will yield the segment base value of that +segment. (In `obj' format, symbols defined in segments which are +grouped are considered to be primarily a member of the _group_, not +the segment, and the return value of SEG reflects this.) + +SEG may be used for far pointers: it is guaranteed that for any +symbol `sym', using the offset `sym' from the segment base `SEG sym' +yields a correct pointer to the symbol. Hence you can code a far +call by means of + + CALL SEG routine:routine + +or store a far pointer in a data segment by + + DW routine, SEG routine + +For convenience, NASM supports the forms + + CALL FAR routine + JMP FAR routine + +as direct synonyms for the canonical syntax + + CALL SEG routine:routine + JMP SEG routine:routine + +No alternative syntax for + + DW routine, SEG routine + +is supported. + +Simply referring to `sym', for some symbol, will return the offset +of `sym' from its _preferred_ segment base (as returned from `SEG +sym'); sometimes, you may want to obtain the offset of `sym' from +some _other_ segment base. (E.g. the offset of `sym' from the base +of the segment it's in, where normally you'd get the offset from a +group base). This is accomplished using the WRT (With Reference To) +keyword: if `sym' is defined in segment `seg' but you want its +offset relative to the beginning of segment `seg2', you can do + + mov ax,sym WRT seg2 + +The right-hand operand to WRT must be a segment-base value. You can +also do `sym WRT SEG sym2' if you need to. + +Critical Expressions +==================== + +NASM is a two-pass assembler: it goes over the input once to +determine the location of all the symbols, then once more to +actually generate the output code. Most expressions are +non-critical, in that if they contain a forward reference and hence +their correct value is unknown during the first pass, it doesn't +matter. However, arguments to RESB, RESW and RESD, and the argument +to the TIMES prefix, can actually affect the _size_ of the generated +code, and so it is critical that the expression can be evaluated +correctly on the first pass. So in these situations, expressions may +not contain forward references. This prevents NASM from having to +sort out a mess such as + + times (label-$) db 0 +label: db 'where am I?' + +in which the TIMES argument could equally legally evaluate to +_anything_, or perhaps even worse, + + times (label-$+1) db 0 +label: db 'NOW where am I?' + +in which any value for the TIMES argument is by definition invalid. + +Since NASM is a two-pass assembler, this criticality condition also +applies to the argument to EQU. Suppose, if this were not the case, +we were to have the setup + + mov ax,a +a equ b +b: + +On pass one, `a' cannot be defined properly, since `b' is not known +yet. On pass two, `b' is known, so line two can define `a' properly. +Unfortunately, line 1 needed `a' to be defined properly, so this +code will not assemble using only two passes. + +Local Labels +============ + +NASM takes its local label scheme mainly from the old Amiga +assembler Devpac: a local label is one that begins with a period. +The `localness' comes from the fact that local labels are associated +with the previous non-local label, so that you may declare the same +local label twice if a non-local one intervenes. Hence: + +label1 ; some code +.loop ; some more code + jne .loop + ret +label2 ; some code +.loop ; some more code + jne .loop + ret + +In the above code, each `jne' instruction jumps to the line of code +before it, since the `.loop' labels are distinct from each other. + +NASM, however, introduces an extra capability not present in Devpac, +which is that the local labels are actually _defined_ in terms of +their associated non-local label. So if you really have to, you can +write + +label3 ; some more code + ; and some more + jmp label1.loop + +So although local labels are _usually_ local, it is possible to +reference them from anywhere in your program, if you really have to. + +Assembler Directives +==================== + +Assembler directives appear on a line by themselves (apart from a +comment), and must be enclosed in square brackets. No white space +may appear before the opening square bracket, although white space +and a comment may come after the closing bracket. + +Some directives are universal: they may be used in any situation, +and do not change their syntax. The universal directives are listed +below. + +[BITS 16] or [BITS 32] switches NASM into 16-bit or 32-bit mode. +(This is equivalent to USE16 and USE32 segments, in TASM or MASM.) +In 32-bit mode, instructions are prefixed with 0x66 or 0x67 prefixes +when they use 16-bit data or addresses; in 16-bit mode, the reverse +happens. NASM's default depends on the object format; the defaults +are documented with the formats. (See `obj', in particular, for some +unusual behaviour.) + +[INCLUDE filename] or [INC filename] includes another source file +into the current one. At present, only one level of inclusion is +supported. + +[SECTION name] or [SEGMENT name] changes which section the code you +write will be assembled into. Acceptable section names vary between +output formats, but most formats (indeed, all formats at the moment) +support the names `.text', `.data' and `.bss'. Note that `.bss' is +an uninitialised data section, and so you will receive a warning +from NASM if you try to assemble any code or data in it. The only +thing you can do in `.bss' without triggering a warning is use RESB, +RESW and RESD. That's what they're for. + +[ABSOLUTE address] can be considered a different form of [SECTION], +in that it must be overridden using a SECTION directive once you +have finished using it. It is used to assemble notional code at an +absolute offset address; of course, you can't actually assemble +_code_ there, since no object file format is capable of putting the +code in place, but you can use RESB, RESW and RESD, and you can +define labels. Hence you could, for example, define a C-like data +structure by means of + + [ABSOLUTE 0] + stLong resd 1 + stWord resw 1 + stByte1 resb 1 + stByte2 resb 1 + st_size: + [SEGMENT .text] + +and then carry on coding. This defines `stLong' to be zero, `stWord' +to be 4, `stByte1' to be 6, `stByte2' to be 7 and `st_size' to be 8. +So this has defined a data structure. + +[EXTERN symbol] defines a symbol as being `external', in the C +sense: `EXTERN' states that the symbol is _not_ declared in this +module, but is declared elsewhere, and that you wish to _reference_ +it in this module. + +[GLOBAL symbol] defines a symbol as being global, in the sense that +it is exported from this module and other modules may reference it. +All symbols are local, unless declared as global. Note that the +`GLOBAL' directive must appear before the definition of the symbol +it refers to. + +[COMMON symbol size] defines a symbol as being common: it is +declared to have the given size, and it is merged at link time with +any declarations of the same symbol in other modules. This is not +_fully_ supported in the `obj' file format: see the section on `obj' +for details. + +Directives may also be specific to the output file format. At +present, the `bin' and `obj' formats define extra directives, which +are specified below. + +Output Formats +============== + +The current output formats supported are `bin', `aout', `coff', +`elf' and `win32'. + +`bin': flat-form binary +----------------------- + +This is at present the only output format that generates instantly +runnable code: all the others produce object files that need linking +before they become executable. + +`bin' output files contain no red tape at all: they simply contain +the binary representation of the exact code you wrote. + +The `bin' format supports a format-specific directive, which is ORG. +[ORG addr] declares that your code should be assembled as if it were +to be loaded into memory at the address `addr'. So a DOS .COM file +should state [ORG 0x100], and a DOS .SYS file should state [ORG 0]. +There should be _one_ ORG directive, at most, in an assembly file: +NASM does not support the use of ORG to jump around inside an object +file, like MASM does (see the `Bugs' section for a use of the ORG +directive not supported by NASM). + +Like all formats, the `bin' format defines the section names +`.text', `.data' and `.bss'. The layout is that `.text' comes first +in the output file, followed by `.data', and notionally followed by +`.bss'. So if you declare a BSS section in a flat binary file, +references to the BSS section will refer to space past the end of +the actual file. The `.data' and `.bss' sections are considered to +be aligned on four-byte boundaries: this is achieved by inserting +padding zero bytes between the end of the text section and the start +of the data, if there is data present. Of course if no [SECTION] +directives are present, everything will go into `.text', and you +will get nothing in the output except the code you wrote. + +`bin' silently ignores GLOBAL directives, and will also not complain +at EXTERN ones. You only get an error if you actually _reference_ an +external symbol. + +Using the `bin' format, the default output filename is `filename' +for inputs of `filename.asm'. If there is no extension to be +removed, output will be placed in `nasm.out' and a warning will be +generated. + +`bin' defaults to 16-bit assembly mode. + +`aout' and `elf': Linux object files +------------------------------------ + +These two object formats are the ones used under Linux. They have no +format-specific directives, and their default output filename is +`filename.o'. + +ELF is a much more featureful object-file format than a.out: in +particular it has enough features to support the writing of position +independent code by means of a global offset table, and position +independent shared libraries by means of a procedure linkage table. +Unfortunately NASM, as yet, does not support these extensions, and +so NASM cannot be used to write shared library code under ELF. NASM +also does not support the capability, in ELF, for specifying precise +alignment constraints on common variables. + +Both `aout' and `elf' default to 32-bit assembly mode. + +`coff' and `win32': Common Object File Format +--------------------------------------------- + +The `coff' format generates standard Unix COFF object files, which +can be fed to (for example) the DJGPP linker. Its default output +filename, like the other Unix formats, is `filename.o'. + +The `win32' format generates Win32 (Windows 95 or Intel-platform +Windows NT) object files, which nominally use the COFF standard, but +in fact are not compatible. Its default output filename is +`filename.obj'. + +`coff' and `win32' are not quite compatible formats, due to the fact +that Microsoft's interpretation of the term `relative relocation' +does not seem to be the same as the interpretation used by anyone +else. It is therefore more correct to state that Win32 uses a +_variant_ of COFF. The object files will not therefore produce +correct output when fed to each other's linkers. + +In addition to this subtle incompatibility, Win32 also defines +extensions to basic COFF, such as a mechanism for importing symbols +from dynamic-link libraries at load time. NASM may eventually +support this extension in the form of a format-specific directive. +However, as yet, it does not. Neither the `coff' nor `win32' output +formats have any specific directives. + +The Microsoft linker also has a small blind spot: it cannot +correctly relocate a relative CALL or JMP to an absolute address. +Hence all PC-relative CALLs or JMPs, when using the `win32' format, +must have targets which are relative to sections, or to external +symbols. You can't do + call 0x123456 +_even_ if you happen to know that there is executable code at that +address. The linker simply won't get the reference right; so in the +interests of not generating incorrect code, NASM will not allow this +form of reference to be written to a Win32 object file. (Standard +COFF, or at least the DJGPP linker, seems to be able to cope with +this contingency. Although that may be due to the executable having +a zero load address.) + +Both `coff' and `win32' default to 32-bit assembly mode. + +`obj': Microsoft 16-bit Object Module Format +-------------------------------------------- + +The `obj' format generates 16-bit Microsoft object files, suitable +for feeding to 16-bit versions of Microsoft C, and probably +TLINK as well (although that hasn't been tested). The Use32 +extensions are supported. + +`obj' defines no special segment names: you can call segments what +you like. Unlike the other formats, too, segment names are actually +defined as symbols, so you can write + +[SEGMENT CODE] + mov ax,CODE + +and get the _segment_ address of the segment, suitable for loading +into a segment register. + +Segments can be declared with attributes: + +[SEGMENT CODE PRIVATE ALIGN=16 CLASS=CODE OVERLAY=OVL2 USE16] + +You can specify segments to be PRIVATE, PUBLIC, COMMON or STACK; +their alignment may be any power of two from 1 to 256 (although only +1, 2, 4, 16 and 256 are really supported, so anything else gets +rounded up to the next highest one of those); their class and +overlay names may be specified. You may also specify segments to be +USE16 or USE32. The defaults are PUBLIC ALIGN=1, no class, no +alignment, USE16. + +You can also specify that a segment is _absolute_ at a certain +segment address: + +[SEGMENT SCREEN ABSOLUTE=0xB800] + +This is an alternative to the ALIGN keyword. + +The format-specific directive GROUP allows segment grouping: [GROUP +DGROUP DATA BSS] defines the group DGROUP to contain segments DATA +and BSS. + +Segments are defined as part of their group by default: if `var' is +declared in segment `data', which is part of group `dgroup', then +`SEG var' returns `dgroup', and `var' signifies the offset of `var' +relative to the beginning of `dgroup'. You must use `var WRT data' +to get the offset of `var' relative to the beginning of its +_segment_. + +NASM allows a segment to be in two groups, but will generate a +warning. References to the symbols in that segment will be resolved +relative to the _first_ group it is defined in. + +The directive [UPPERCASE] causes all symbol, segment and group names +output to the object file to be uppercased. The actual _assembly_ is +still case sensitive. + +Common variables in OBJ files can be `near' or `far': currently, +NASM has a horribly grotty way to support that, which is that if you +specify the common variable's size as negative, it will be near, and +otherwise it will be far. The support isn't perfect: if you declare +a far common variable both in a NASM assembly module and in a C +program, you may well find the linker reports "mismatch in +array-size" or some such. The reason for this is that far common +variables are defined by means of _two_ size constants, which are +multiplied to give the real size. Apparently the Microsoft linker +(at least) likes both constants, not merely their product, to match +up. This may be fixed in a future release. + +If the module you're writing is intended to contain the program +entry point, you can declare this by defining the special label +`..start' at the start point, either as a label or by EQU (although +of course the normal caveats about EQU dependency still apply). + +`obj' has an unusual handling of assembly modes: instead of having a +global default for the whole file, there is a separate default for +each segment. Thus, each [SEGMENT] directive carries an implicit +[BITS] directive with it, which switches to 16-bit or 32-bit mode +depending on whether the segment is a Use16 or Use32 segment. If you +want to place 32-bit code in a Use16 segment, you can use an +explicit [BITS 32] override, but if you switch temporarily away from +that segment, you will have to repeat the override after coming back +to it. + +`as86': Linux as86 (bin86-0.3) +------------------------------ + +This output format replicates the format used to pass data between +the Linux x86 assembler and linker, as86 and ld86. Its default file +name, yet again, is `filename.o'. Its default segment-size attribute +is 16 bits. + +`rdf': Relocatable Dynamic Object File Format +--------------------------------------------- + +RDOFF was designed initially to test the object-file production +interface to NASM. It soon became apparent that it could be enhanced +for use in serious applications due to its simplicity; code to load +and execute an RDOFF object module is very simple. It also contains +enhancements to allow it to be linked with a dynamic link library at +either run- or load- time, depending on how complex you wish to make +your loader. + +The `rdoff' directory in the NASM distribution archive contains +source for an RDF linker and loader to run under Linux. + +`rdf' has a default segment-size attribute of 32 bits. + +Debugging format: `dbg' +----------------------- + +This output format is not built into NASM by default: it's for +debugging purposes. It produces a debug dump of everything that the +NASM assembly module feeds to the output driver, for the benefit of +people trying to write their own output drivers. + +Bugs +==== + +Apart from the missing features (correct OBJ COMMON support, ELF +alignment, ELF PIC support, etc.), there are no _known_ bugs. +However, any you find, with patches if possible, should be sent to +<jules@dcs.warwick.ac.uk> or <anakin@pobox.com>, and we'll try to +fix them. + +Beware of Pentium-specific instructions: Intel have provided a macro +file for MASM, to implement the eight or nine new Pentium opcodes as +MASM macros. NASM does not generate the same code for the CMPXCHG8B +instruction as these macros do: this is due to a bug in the _macro_, +not in NASM. The macro works by generating an SIDT instruction (if I +remember rightly), which has almost exactly the right form, then +using ORG to back up a bit and do a DB over the top of one of the +opcode bytes. The trouble is that Intel overlooked (or were unable +to allow for) the possibility that the SIDT instruction may contain +an 0x66 or 0x67 operand or address size prefix. If this happens, the +ORG will back up by the wrong amount, and the macro will generate +incorrect code. NASM gets it right. This, also, is not a bug in +NASM, so please don't report it as one. (Also please note that the +ORG directive in NASM doesn't work this way, and so you can't do +equivalent tricks with it...) + +That's All Folks! +================= + +Enjoy using NASM! Please feel free to send me comments, or +constructive criticism, or bug fixes, or requests, or general chat. + +Contributions are also welcome: if anyone knows anything about any +other object file formats I should support, please feel free to send +me documentation and some short example files (in my experience, +documentation is useless without at _least_ one example), or even to +write me an output module. OS/2 object files, in particular, spring +to mind. I don't have OS/2, though. + +Please keep flames to a minimum: I have had some very angry e-mails +in the past, condemning me for writing a useless assembler, that +output in no useful format (at the time, that was true), generated +incorrect code (several typos in the instruction table, since fixed) +and took up too much memory and disk space (the price you pay for +total portability, it seems). All these were criticisms I was happy +to hear, but I didn't appreciate the flames that went with them. +NASM _is_ still a prototype, and you use it at your own risk. I +_think_ it works, and if it doesn't then I want to know about it, +but I don't guarantee anything. So don't flame me, please. Blame, +but don't flame. + +- Simon Tatham <anakin@pobox.com>, 21-Nov-96 @@ -0,0 +1,443 @@ +/* nasm.h main header file for the Netwide Assembler: inter-module interface + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version: 27/iii/95 by Simon Tatham + */ + +#ifndef NASM_H +#define NASM_H + +#define NASM_MAJOR_VER 0 +#define NASM_MINOR_VER 91 +#define NASM_VER "0.91" + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef FALSE +#define FALSE 0 /* comes in handy */ +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#define NO_SEG -1L /* null segment value */ +#define SEG_ABS 0x40000000L /* mask for far-absolute segments */ + +#ifndef FILENAME_MAX +#define FILENAME_MAX 256 +#endif + +/* + * We must declare the existence of this structure type up here, + * since we have to reference it before we define it... + */ +struct ofmt; + +/* + * ------------------------- + * Error reporting functions + * ------------------------- + */ + +/* + * An error reporting function should look like this. + */ +typedef void (*efunc) (int severity, char *fmt, ...); + +/* + * These are the error severity codes which get passed as the first + * argument to an efunc. + */ + +#define ERR_WARNING 0 /* warn only: no further action */ +#define ERR_NONFATAL 1 /* terminate assembly after phase */ +#define ERR_FATAL 2 /* instantly fatal: exit with error */ +#define ERR_PANIC 3 /* internal error: panic instantly + * and dump core for reference */ +#define ERR_MASK 0x0F /* mask off the above codes */ +#define ERR_NOFILE 0x10 /* don't give source file name/line */ +#define ERR_USAGE 0x20 /* print a usage message */ + +/* + * ----------------------- + * Other function typedefs + * ----------------------- + */ + +/* + * A label-lookup function should look like this. + */ +typedef int (*lfunc) (char *label, long *segment, long *offset); + +/* + * And a label-definition function like this. + */ +typedef void (*ldfunc) (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error); + +/* + * ----------------------------------------------------------- + * Format of the `insn' structure returned from `parser.c' and + * passed into `assemble.c' + * ----------------------------------------------------------- + */ + +/* + * Here we define the operand types. These are implemented as bit + * masks, since some are subsets of others; e.g. AX in a MOV + * instruction is a special operand type, whereas AX in other + * contexts is just another 16-bit register. (Also, consider CL in + * shift instructions, DX in OUT, etc.) + */ + +/* size, and other attributes, of the operand */ +#define BITS8 0x00000001L +#define BITS16 0x00000002L +#define BITS32 0x00000004L +#define BITS64 0x00000008L /* FPU only */ +#define BITS80 0x00000010L /* FPU only */ +#define FAR 0x00000020L /* grotty: this means 16:16 or */ + /* 16:32, like in CALL/JMP */ +#define NEAR 0x00000040L +#define SHORT 0x00000080L /* and this means what it says :) */ + +#define SIZE_MASK 0x000000FFL /* all the size attributes */ +#define NON_SIZE (~SIZE_MASK) + +#define TO 0x00000100L /* reverse effect in FADD, FSUB &c */ +#define COLON 0x00000200L /* operand is followed by a colon */ + +/* type of operand: memory reference, register, etc. */ +#define MEMORY 0x00204000L +#define REGISTER 0x00001000L /* register number in 'basereg' */ +#define IMMEDIATE 0x00002000L + +#define REGMEM 0x00200000L /* for r/m, ie EA, operands */ +#define REGNORM 0x00201000L /* 'normal' reg, qualifies as EA */ +#define REG8 0x00201001L +#define REG16 0x00201002L +#define REG32 0x00201004L +#define FPUREG 0x01000000L /* floating point stack registers */ +#define FPU0 0x01000800L /* FPU stack register zero */ +#define MMXREG 0x00001008L /* MMX registers */ + +/* special register operands: these may be treated differently */ +#define REG_SMASK 0x00070000L /* a mask for the following */ +#define REG_ACCUM 0x00211000L /* accumulator: AL, AX or EAX */ +#define REG_AL 0x00211001L /* REG_ACCUM | BITSxx */ +#define REG_AX 0x00211002L /* ditto */ +#define REG_EAX 0x00211004L /* and again */ +#define REG_COUNT 0x00221000L /* counter: CL, CX or ECX */ +#define REG_CL 0x00221001L /* REG_COUNT | BITSxx */ +#define REG_CX 0x00221002L /* ditto */ +#define REG_ECX 0x00221004L /* another one */ +#define REG_DX 0x00241002L +#define REG_SREG 0x00081002L /* any segment register */ +#define REG_CS 0x01081002L /* CS */ +#define REG_DESS 0x02081002L /* DS, ES, SS (non-CS 86 registers) */ +#define REG_FSGS 0x04081002L /* FS, GS (386 extended registers) */ +#define REG_CDT 0x00101004L /* CRn, DRn and TRn */ +#define REG_CREG 0x08101004L /* CRn */ +#define REG_CR4 0x08101404L /* CR4 (Pentium only) */ +#define REG_DREG 0x10101004L /* DRn */ +#define REG_TREG 0x20101004L /* TRn */ + +/* special type of EA */ +#define MEM_OFFS 0x00604000L /* simple [address] offset */ + +/* special type of immediate operand */ +#define UNITY 0x00802000L /* for shift/rotate instructions */ + +/* + * Next, the codes returned from the parser, for registers and + * instructions. + */ + +enum { /* register names */ + R_AH = 1, R_AL, R_AX, R_BH, R_BL, R_BP, R_BX, R_CH, R_CL, R_CR0, + R_CR2, R_CR3, R_CR4, R_CS, R_CX, R_DH, R_DI, R_DL, R_DR0, R_DR1, + R_DR2, R_DR3, R_DR6, R_DR7, R_DS, R_DX, R_EAX, R_EBP, R_EBX, + R_ECX, R_EDI, R_EDX, R_ES, R_ESI, R_ESP, R_FS, R_GS, R_MM0, + R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7, R_SI, R_SP, + R_SS, R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7, + R_TR3, R_TR4, R_TR5, R_TR6, R_TR7, REG_ENUM_LIMIT +}; + +enum { /* instruction names */ + I_AAA, I_AAD, I_AAM, I_AAS, I_ADC, I_ADD, I_AND, I_ARPL, + I_BOUND, I_BSF, I_BSR, I_BSWAP, I_BT, I_BTC, I_BTR, I_BTS, + I_CALL, I_CBW, I_CDQ, I_CLC, I_CLD, I_CLI, I_CLTS, I_CMC, I_CMP, + I_CMPSB, I_CMPSD, I_CMPSW, I_CMPXCHG, I_CMPXCHG8B, I_CPUID, + I_CWD, I_CWDE, I_DAA, I_DAS, I_DB, I_DD, I_DEC, I_DIV, I_DQ, + I_DT, I_DW, I_EMMS, I_ENTER, I_EQU, I_F2XM1, I_FABS, I_FADD, + I_FADDP, I_FBLD, I_FBSTP, I_FCHS, I_FCLEX, I_FCMOVB, I_FCMOVBE, + I_FCMOVE, I_FCMOVNB, I_FCMOVNBE, I_FCMOVNE, I_FCMOVNU, I_FCMOVU, + I_FCOM, I_FCOMI, I_FCOMIP, I_FCOMP, I_FCOMPP, I_FCOS, I_FDECSTP, + I_FDISI, I_FDIV, I_FDIVP, I_FDIVR, I_FDIVRP, I_FENI, I_FFREE, + I_FIADD, I_FICOM, I_FICOMP, I_FIDIV, I_FIDIVR, I_FILD, I_FIMUL, + I_FINCSTP, I_FINIT, I_FIST, I_FISTP, I_FISUB, I_FISUBR, I_FLD, + I_FLD1, I_FLDCW, I_FLDENV, I_FLDL2E, I_FLDL2T, I_FLDLG2, + I_FLDLN2, I_FLDPI, I_FLDZ, I_FMUL, I_FMULP, I_FNOP, I_FPATAN, + I_FPREM, I_FPREM1, I_FPTAN, I_FRNDINT, I_FRSTOR, I_FSAVE, + I_FSCALE, I_FSETPM, I_FSIN, I_FSINCOS, I_FSQRT, I_FST, I_FSTCW, + I_FSTENV, I_FSTP, I_FSTSW, I_FSUB, I_FSUBP, I_FSUBR, I_FSUBRP, + I_FTST, I_FUCOM, I_FUCOMI, I_FUCOMIP, I_FUCOMP, I_FUCOMPP, + I_FXAM, I_FXCH, I_FXTRACT, I_FYL2X, I_FYL2XP1, I_HLT, I_ICEBP, + I_IDIV, I_IMUL, I_IN, I_INC, I_INSB, I_INSD, I_INSW, I_INT, + I_INT1, I_INT01, I_INT3, I_INTO, I_INVD, I_INVLPG, I_IRET, + I_IRETD, I_IRETW, I_JCXZ, I_JECXZ, I_JMP, I_LAHF, I_LAR, I_LDS, + I_LEA, I_LEAVE, I_LES, I_LFS, I_LGDT, I_LGS, I_LIDT, I_LLDT, + I_LMSW, I_LOADALL, I_LODSB, I_LODSD, I_LODSW, I_LOOP, I_LOOPE, + I_LOOPNE, I_LOOPNZ, I_LOOPZ, I_LSL, I_LSS, I_LTR, I_MOV, I_MOVD, + I_MOVQ, I_MOVSB, I_MOVSD, I_MOVSW, I_MOVSX, I_MOVZX, I_MUL, + I_NEG, I_NOP, I_NOT, I_OR, I_OUT, I_OUTSB, I_OUTSD, I_OUTSW, + I_PACKSSDW, I_PACKSSWB, I_PACKUSWB, I_PADDB, I_PADDD, I_PADDSB, + I_PADDSW, I_PADDUSB, I_PADDUSW, I_PADDW, I_PAND, I_PANDN, + I_PCMPEQB, I_PCMPEQD, I_PCMPEQW, I_PCMPGTB, I_PCMPGTD, + I_PCMPGTW, I_PMADDWD, I_PMULHW, I_PMULLW, I_POP, I_POPA, + I_POPAD, I_POPAW, I_POPF, I_POPFD, I_POPFW, I_POR, I_PSLLD, + I_PSLLQ, I_PSLLW, I_PSRAD, I_PSRAW, I_PSRLD, I_PSRLQ, I_PSRLW, + I_PSUBB, I_PSUBD, I_PSUBSB, I_PSUBSW, I_PSUBUSB, I_PSUBUSW, + I_PSUBW, I_PUNPCKHBW, I_PUNPCKHDQ, I_PUNPCKHWD, I_PUNPCKLBW, + I_PUNPCKLDQ, I_PUNPCKLWD, I_PUSH, I_PUSHA, I_PUSHAD, I_PUSHAW, + I_PUSHF, I_PUSHFD, I_PUSHFW, I_PXOR, I_RCL, I_RCR, I_RDMSR, + I_RDPMC, I_RDTSC, I_RESB, I_RESD, I_RESQ, I_REST, I_RESW, I_RET, + I_RETF, I_RETN, I_ROL, I_ROR, I_RSM, I_SAHF, I_SAL, I_SALC, + I_SAR, I_SBB, I_SCASB, I_SCASD, I_SCASW, I_SGDT, I_SHL, I_SHLD, + I_SHR, I_SHRD, I_SIDT, I_SLDT, I_SMSW, I_STC, I_STD, I_STI, + I_STOSB, I_STOSD, I_STOSW, I_STR, I_SUB, I_TEST, I_UMOV, I_VERR, + I_VERW, I_WAIT, I_WBINVD, I_WRMSR, I_XADD, I_XCHG, I_XLATB, + I_XOR, I_CMOVcc, I_Jcc, I_SETcc +}; + +enum { /* condition code names */ + C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE, + C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP, + C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z +}; + +/* + * Note that because segment registers may be used as instruction + * prefixes, we must ensure the enumerations for prefixes and + * register names do not overlap. + */ +enum { /* instruction prefixes */ + PREFIX_ENUM_START = REG_ENUM_LIMIT, + P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, P_REP, P_REPE, + P_REPNE, P_REPNZ, P_REPZ, P_TIMES +}; + +enum { /* extended operand types */ + EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER +}; + +typedef struct { /* operand to an instruction */ + long type; /* type of operand */ + int addr_size; /* 0 means default; 16; 32 */ + int basereg, indexreg, scale; /* registers and scale involved */ + long segment; /* immediate segment, if needed */ + long offset; /* any immediate number */ + long wrt; /* segment base it's relative to */ +} operand; + +typedef struct extop { /* extended operand */ + struct extop *next; /* linked list */ + long type; /* defined above */ + char *stringval; /* if it's a string, then here it is */ + int stringlen; /* ... and here's how long it is */ + long segment; /* if it's a number/address, then... */ + long offset; /* ... it's given here ... */ + long wrt; /* ... and here */ +} extop; + +#define MAXPREFIX 4 + +typedef struct { /* an instruction itself */ + char *label; /* the label defined, or NULL */ + int prefixes[MAXPREFIX]; /* instruction prefixes, if any */ + int nprefix; /* number of entries in above */ + int opcode; /* the opcode - not just the string */ + int condition; /* the condition code, if Jcc/SETcc */ + int operands; /* how many operands? 0-3 */ + operand oprs[3]; /* the operands, defined as above */ + extop *eops; /* extended operands */ + int times; /* repeat count (TIMES prefix) */ +} insn; + +/* + * ------------------------------------------------------------ + * The data structure defining an output format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct ofmt { + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + char *shortname; + + /* + * This procedure is called at the start of an output session. + * It tells the output format what file it will be writing to, + * what routine to report errors through, and how to interface + * to the label manager if necessary. It also gives it a chance + * to do other initialisation. + */ + void (*init) (FILE *fp, efunc error, ldfunc ldef); + + /* + * This procedure is called by assemble() to write actual + * generated code or data to the object file. Typically it + * doesn't have to actually _write_ it, just store it for + * later. + * + * The `type' argument specifies the type of output data, and + * usually the size as well: its contents are described below. + */ + void (*output) (long segto, void *data, unsigned long type, + long segment, long wrt); + + /* + * This procedure is called once for every symbol defined in + * the module being assembled. It gives the name and value of + * the symbol, in NASM's terms, and indicates whether it has + * been declared to be global. Note that the parameter "name", + * when passed, will point to a piece of static storage + * allocated inside the label manager - it's safe to keep using + * that pointer, because the label manager doesn't clean up + * until after the output driver has. + * + * Values of `is_global' are: 0 means the symbol is local; 1 + * means the symbol is global; 2 means the symbol is common (in + * which case `offset' holds the _size_ of the variable). + * Anything else is available for the output driver to use + * internally. + */ + void (*symdef) (char *name, long segment, long offset, int is_global); + + /* + * This procedure is called when the source code requests a + * segment change. It should return the corresponding segment + * _number_ for the name, or NO_SEG if the name is not a valid + * segment name. + * + * It may also be called with NULL, in which case it is to + * return the _default_ section number for starting assembly in. + * + * It is allowed to modify the string it is given a pointer to. + * + * It is also allowed to specify a default instruction size for + * the segment, by setting `*bits' to 16 or 32. Or, if it + * doesn't wish to define a default, it can leave `bits' alone. + */ + long (*section) (char *name, int pass, int *bits); + + /* + * This procedure is called to modify the segment base values + * returned from the SEG operator. It is given a segment base + * value (i.e. a segment value with the low bit set), and is + * required to produce in return a segment value which may be + * different. It can map segment bases to absolute numbers by + * means of returning SEG_ABS types. + */ + long (*segbase) (long segment); + + /* + * This procedure is called to allow the output driver to + * process its own specific directives. When called, it has the + * directive word in `directive' and the parameter string in + * `value'. It is called in both assembly passes, and `pass' + * will be either 1 or 2. + * + * This procedure should return zero if it does not _recognise_ + * the directive, so that the main program can report an error. + * If it recognises the directive but then has its own errors, + * it should report them itself and then return non-zero. It + * should also return non-zero if it correctly processes the + * directive. + */ + int (*directive) (char *directive, char *value, int pass); + + /* + * This procedure is called before anything else - even before + * the "init" routine - and is passed the name of the input + * file from which this output file is being generated. It + * should return its preferred name for the output file in + * `outfunc'. Since it is called before the driver is properly + * initialised, it has to be passed its error handler + * separately. + * + * This procedure may also take its own copy of the input file + * name for use in writing the output file: it is _guaranteed_ + * that it will be called before the "init" routine. + * + * The parameter `outname' points to an area of storage + * guaranteed to be at least FILENAME_MAX in size. + */ + void (*filename) (char *inname, char *outname, efunc error); + + /* + * This procedure is called after assembly finishes, to allow + * the output driver to clean itself up and free its memory. + * Typically, it will also be the point at which the object + * file actually gets _written_. + * + * One thing the cleanup routine should always do is to close + * the output file pointer. + */ + void (*cleanup) (void); +}; + +/* + * values for the `type' parameter to an output function. Each one + * must have the actual number of _bytes_ added to it. + * + * Exceptions are OUT_RELxADR, which denote an x-byte relocation + * which will be a relative jump. For this we need to know the + * distance in bytes from the start of the relocated record until + * the end of the containing instruction. _This_ is what is stored + * in the size part of the parameter, in this case. + * + * Also OUT_RESERVE denotes reservation of N bytes of BSS space, + * and the contents of the "data" parameter is irrelevant. + * + * The "data" parameter for the output function points to a "long", + * containing the address in question, unless the type is + * OUT_RAWDATA, in which case it points to an "unsigned char" + * array. + */ +#define OUT_RAWDATA 0x00000000UL +#define OUT_ADDRESS 0x10000000UL +#define OUT_REL2ADR 0x20000000UL +#define OUT_REL4ADR 0x30000000UL +#define OUT_RESERVE 0x40000000UL +#define OUT_TYPMASK 0xF0000000UL +#define OUT_SIZMASK 0x0FFFFFFFUL + +/* + * ----- + * Other + * ----- + */ + +/* + * This is a useful #define which I keep meaning to use more often: + * the number of elements of a statically defined array. + */ + +#define elements(x) ( sizeof(x) / sizeof(*(x)) ) + +#endif diff --git a/nasmlib.c b/nasmlib.c new file mode 100644 index 0000000..c8710b0 --- /dev/null +++ b/nasmlib.c @@ -0,0 +1,488 @@ +/* nasmlib.c library routines for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" + +static efunc nasm_malloc_error; + +void nasm_set_malloc_error (efunc error) { + nasm_malloc_error = error; +} + +void *nasm_malloc (size_t size) { + void *p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); + return p; +} + +void *nasm_realloc (void *q, size_t size) { + void *p = q ? realloc(q, size) : malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); + return p; +} + +void nasm_free (void *q) { + if (q) + free (q); +} + +char *nasm_strdup (char *s) { + char *p; + + p = nasm_malloc(strlen(s)+1); + strcpy (p, s); + return p; +} + +int nasm_stricmp (char *s1, char *s2) { + while (*s1 && toupper(*s1) == toupper(*s2)) + s1++, s2++; + if (!*s1 && !*s2) + return 0; + else if (toupper(*s1) < toupper(*s2)) + return -1; + else + return 1; +} + +int nasm_strnicmp (char *s1, char *s2, int n) { + while (n > 0 && *s1 && toupper(*s1) == toupper(*s2)) + s1++, s2++, n--; + if ((!*s1 && !*s2) || n==0) + return 0; + else if (toupper(*s1) < toupper(*s2)) + return -1; + else + return 1; +} + +#define isnumchar(c) ( isalnum(c) || (c) == '$') +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +long readnum (char *str, int *error) { + char *r = str, *q; + long radix; + long result; + + *error = FALSE; + + while (isspace(*r)) r++; /* find start of number */ + q = r; + + while (isnumchar(*q)) q++; /* find end of number */ + + /* + * If it begins 0x, 0X or $, or ends in H, it's in hex. if it + * ends in Q, it's octal. if it ends in B, it's binary. + * Otherwise, it's ordinary decimal. + */ + if (*r=='0' && (r[1]=='x' || r[1]=='X')) + radix = 16, r += 2; + else if (*r=='$') + radix = 16, r++; + else if (q[-1]=='H' || q[-1]=='h') + radix = 16 , q--; + else if (q[-1]=='Q' || q[-1]=='q') + radix = 8 , q--; + else if (q[-1]=='B' || q[-1]=='b') + radix = 2 , q--; + else + radix = 10; + + result = 0; + while (*r && r < q) { + if (*r<'0' || (*r>'9' && *r<'A') || numvalue(*r)>=radix) { + *error = TRUE; + return 0; + } + result = radix * result + numvalue(*r); + r++; + } + return result; +} + +static long next_seg; + +void seg_init(void) { + next_seg = 0; +} + +long seg_alloc(void) { + return (next_seg += 2) - 2; +} + +void fwriteshort (int data, FILE *fp) { + fputc (data & 255, fp); + fputc ((data >> 8) & 255, fp); +} + +void fwritelong (long data, FILE *fp) { + fputc (data & 255, fp); + fputc ((data >> 8) & 255, fp); + fputc ((data >> 16) & 255, fp); + fputc ((data >> 24) & 255, fp); +} + +void standard_extension (char *inname, char *outname, char *extension, + efunc error) { + char *p, *q; + + q = inname; + p = outname; + while (*q) *p++ = *q++; /* copy, and find end of string */ + *p = '\0'; /* terminate it */ + while (p > outname && *--p != '.');/* find final period (or whatever) */ + if (*p != '.') while (*p) p++; /* go back to end if none found */ + if (!strcmp(p, extension)) { /* is the extension already there? */ + if (*extension) + error(ERR_WARNING | ERR_NOFILE, + "file name already ends in `%s': " + "output will be in `nasm.out'", + extension); + else + error(ERR_WARNING | ERR_NOFILE, + "file name already has no extension: " + "output will be in `nasm.out'"); + strcpy(outname, "nasm.out"); + } else + strcpy(p, extension); +} + +#define RAA_BLKSIZE 4096 /* this many longs allocated at once */ +#define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */ + +typedef struct RAA RAA; +typedef union RAA_UNION RAA_UNION; +typedef struct RAA_LEAF RAA_LEAF; +typedef struct RAA_BRANCH RAA_BRANCH; + +struct RAA { + int layers; + long stepsize; + union RAA_UNION { + struct RAA_LEAF { + long data[RAA_BLKSIZE]; + } l; + struct RAA_BRANCH { + struct RAA *data[RAA_LAYERSIZE]; + } b; + } u; +}; + +#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF)) +#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH)) + +#define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE ) + +static struct RAA *real_raa_init (int layers) { + struct RAA *r; + + if (layers == 0) { + r = nasm_malloc (LEAFSIZ); + memset (r->u.l.data, 0, sizeof(r->u.l.data)); + r->layers = 0; + r->stepsize = 1L; + } else { + r = nasm_malloc (BRANCHSIZ); + memset (r->u.b.data, 0, sizeof(r->u.b.data)); + r->layers = layers; + r->stepsize = 1L; + while (layers--) + r->stepsize *= RAA_LAYERSIZE; + } + return r; +} + +struct RAA *raa_init (void) { + return real_raa_init (0); +} + +void raa_free (struct RAA *r) { + if (r->layers == 0) + nasm_free (r); + else { + struct RAA **p; + for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++) + if (*p) + raa_free (*p); + } +} + +long raa_read (struct RAA *r, long posn) { + if (posn > r->stepsize * LAYERSIZ(r)) + return 0L; + while (r->layers > 0) { + ldiv_t l; + l = ldiv (posn, r->stepsize); + r = r->u.b.data[l.quot]; + posn = l.rem; + if (!r) /* better check this */ + return 0L; + } + return r->u.l.data[posn]; +} + +struct RAA *raa_write (struct RAA *r, long posn, long value) { + struct RAA *result; + + if (posn < 0) + nasm_malloc_error (ERR_PANIC, "negative position in raa_write"); + + while (r->stepsize * LAYERSIZ(r) < posn) { + /* + * Must go up a layer. + */ + struct RAA *s; + + s = nasm_malloc (BRANCHSIZ); + memset (s->u.b.data, 0, sizeof(r->u.b.data)); + s->layers = r->layers + 1; + s->stepsize = RAA_LAYERSIZE * r->stepsize; + s->u.b.data[0] = r; + r = s; + } + + result = r; + + while (r->layers > 0) { + ldiv_t l; + struct RAA **s; + l = ldiv (posn, r->stepsize); + s = &r->u.b.data[l.quot]; + if (!*s) + *s = real_raa_init (r->layers - 1); + r = *s; + posn = l.rem; + } + + r->u.l.data[posn] = value; + + return result; +} + +#define SAA_MAXLEN 8192 + +struct SAA { + /* + * members `end' and `elem_len' are only valid in first link in + * list; `rptr' and `rpos' are used for reading + */ + struct SAA *next, *end, *rptr; + long elem_len, length, posn, start, rpos; + char *data; +}; + +struct SAA *saa_init (long elem_len) { + struct SAA *s; + + if (elem_len > SAA_MAXLEN) + nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements"); + + s = nasm_malloc (sizeof(struct SAA)); + s->posn = s->start = 0L; + s->elem_len = elem_len; + s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len); + s->data = nasm_malloc (s->length); + s->next = NULL; + s->end = s; + + return s; +} + +void saa_free (struct SAA *s) { + struct SAA *t; + + while (s) { + t = s->next; + nasm_free (s->data); + nasm_free (s); + s = t; + } +} + +void *saa_wstruct (struct SAA *s) { + void *p; + + if (s->end->length - s->end->posn < s->elem_len) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + + p = s->end->data + s->end->posn; + s->end->posn += s->elem_len; + return p; +} + +void saa_wbytes (struct SAA *s, void *data, long len) { + char *d = data; + + while (len > 0) { + long l = s->end->length - s->end->posn; + if (l > len) + l = len; + if (l > 0) { + if (d) { + memcpy (s->end->data + s->end->posn, d, l); + d += l; + } else + memset (s->end->data + s->end->posn, 0, l); + s->end->posn += l; + len -= l; + } + if (len > 0) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + } +} + +void saa_rewind (struct SAA *s) { + s->rptr = s; + s->rpos = 0L; +} + +void *saa_rstruct (struct SAA *s) { + void *p; + + if (!s->rptr) + return NULL; + + if (s->rptr->posn - s->rpos < s->elem_len) { + s->rptr = s->rptr->next; + if (!s->rptr) + return NULL; /* end of array */ + s->rpos = 0L; + } + + p = s->rptr->data + s->rpos; + s->rpos += s->elem_len; + return p; +} + +void *saa_rbytes (struct SAA *s, long *len) { + void *p; + + if (!s->rptr) + return NULL; + + p = s->rptr->data + s->rpos; + *len = s->rptr->posn - s->rpos; + s->rptr = s->rptr->next; + s->rpos = 0L; + return p; +} + +void saa_rnbytes (struct SAA *s, void *data, long len) { + char *d = data; + + while (len > 0) { + long l; + + if (!s->rptr) + return; + + l = s->rptr->posn - s->rpos; + if (l > len) + l = len; + if (l > 0) { + memcpy (d, s->rptr->data + s->rpos, l); + d += l; + s->rpos += l; + len -= l; + } + if (len > 0) { + s->rptr = s->rptr->next; + s->rpos = 0L; + } + } +} + +void saa_fread (struct SAA *s, long posn, void *data, long len) { + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn > s->rptr->start + s->rpos) + saa_rewind (s); + while (posn >= s->rptr->start + s->rptr->posn) { + s->rptr = s->rptr->next; + if (!s->rptr) + return; /* what else can we do?! */ + } + + p = s->rptr; + pos = posn - s->rptr->start; + while (len) { + long l = s->rptr->posn - pos; + if (l > len) + l = len; + memcpy (cdata, s->rptr->data+pos, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } +} + +void saa_fwrite (struct SAA *s, long posn, void *data, long len) { + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn > s->rptr->start + s->rpos) + saa_rewind (s); + while (posn >= s->rptr->start + s->rptr->posn) { + s->rptr = s->rptr->next; + if (!s->rptr) + return; /* what else can we do?! */ + } + + p = s->rptr; + pos = posn - s->rptr->start; + while (len) { + long l = s->rptr->posn - pos; + if (l > len) + l = len; + memcpy (s->rptr->data+pos, cdata, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } +} + +void saa_fpwrite (struct SAA *s, FILE *fp) { + char *data; + long len; + + saa_rewind (s); + while ( (data = saa_rbytes (s, &len)) ) + fwrite (data, 1, len, fp); +} diff --git a/nasmlib.h b/nasmlib.h new file mode 100644 index 0000000..d827371 --- /dev/null +++ b/nasmlib.h @@ -0,0 +1,115 @@ +/* nasmlib.c header file for nasmlib.h + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_NASMLIB_H +#define NASM_NASMLIB_H + +/* + * Wrappers around malloc, realloc and free. nasm_malloc will + * fatal-error and die rather than return NULL; nasm_realloc will + * do likewise, and will also guarantee to work right on being + * passed a NULL pointer; nasm_free will do nothing if it is passed + * a NULL pointer. + */ +void nasm_set_malloc_error (efunc); +void *nasm_malloc (size_t); +void *nasm_realloc (void *, size_t); +void nasm_free (void *); +char *nasm_strdup (char *); + +/* + * ANSI doesn't guarantee the presence of `stricmp' or + * `strcasecmp'. + */ +int nasm_stricmp (char *, char *); +int nasm_strnicmp (char *, char *, int); + +/* + * Convert a string into a number, using NASM number rules. Sets + * `*error' to TRUE if an error occurs, and FALSE otherwise. + */ +long readnum(char *str, int *error); + +/* + * seg_init: Initialise the segment-number allocator. + * seg_alloc: allocate a hitherto unused segment number. + */ +void seg_init(void); +long seg_alloc(void); + +/* + * many output formats will be able to make use of this: a standard + * function to add an extension to the name of the input file + */ +void standard_extension (char *inname, char *outname, char *extension, + efunc error); + +/* + * some handy macros that will probably be of use in more than one + * output format: convert integers into little-endian byte packed + * format in memory + */ + +#define WRITELONG(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + *(p)++ = ((v) >> 16) & 0xFF; \ + *(p)++ = ((v) >> 24) & 0xFF; \ + } while (0) + +#define WRITESHORT(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + } while (0) + +/* + * and routines to do the same thing to a file + */ +void fwriteshort (int data, FILE *fp); +void fwritelong (long data, FILE *fp); + +/* + * Routines to manage a dynamic random access array of longs which + * may grow in size to be more than the largest single malloc'able + * chunk. + */ + +struct RAA; + +struct RAA *raa_init (void); +void raa_free (struct RAA *); +long raa_read (struct RAA *, long); +struct RAA *raa_write (struct RAA *r, long posn, long value); + +/* + * Routines to manage a dynamic sequential-access array, under the + * same restriction on maximum mallocable block. This array may be + * written to in two ways: a contiguous chunk can be reserved of a + * given size, and a pointer returned, or single-byte data may be + * written. The array can also be read back in the same two ways: + * as a series of big byte-data blocks or as a list of structures + * of a given size. + */ + +struct SAA; + +struct SAA *saa_init (long elem_len); /* 1 == byte */ +void saa_free (struct SAA *); +void *saa_wstruct (struct SAA *); /* return a structure of elem_len */ +void saa_wbytes (struct SAA *, void *, long); /* write arbitrary bytes */ +void saa_rewind (struct SAA *); /* for reading from beginning */ +void *saa_rstruct (struct SAA *); /* return NULL on EOA */ +void *saa_rbytes (struct SAA *, long *); /* return 0 on EOA */ +void saa_rnbytes (struct SAA *, void *, long); /* read a given no. of bytes */ +void saa_fread (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fwrite (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fpwrite (struct SAA *, FILE *); + +#endif diff --git a/ndisasm.c b/ndisasm.c new file mode 100644 index 0000000..8e7a4cd --- /dev/null +++ b/ndisasm.c @@ -0,0 +1,270 @@ +/* ndisasm.c the Netwide Disassembler main module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "sync.h" +#include "disasm.h" + +#define BPL 8 /* bytes per line of hex dump */ + +static const char *help = +"usage: ndisasm [-a] [-i] [-h] [-r] [-u] [-b bits] [-o origin] [-s sync...]\n" +" [-e bytes] [-k start,bytes] file\n" +" -a or -i activates auto (intelligent) sync\n" +" -u sets USE32 (32-bit mode)\n" +" -b 16 or -b 32 sets number of bits too\n" +" -h displays this text\n" +" -r displays the version number\n" +" -e skips <bytes> bytes of header\n" +" -k avoids disassembling <bytes> bytes from position <start>\n"; + +static void output_ins (unsigned long, unsigned char *, int, char *); +static void skip (unsigned long dist, FILE *fp); + +int main(int argc, char **argv) { + unsigned char buffer[INSN_MAX * 2], *p, *q; + char outbuf[256]; + char *pname = *argv; + char *filename = NULL; + unsigned long nextsync, synclen, initskip = 0L; + int lenread, lendis; + int autosync = FALSE; + int bits = 16; + int rn_error; + long offset; + FILE *fp; + + offset = 0; + init_sync(); + + while (--argc) { + char *v, *vv, *p = *++argv; + if (*p == '-') { + p++; + while (*p) switch (tolower(*p)) { + case 'a': /* auto or intelligent sync */ + case 'i': + autosync = TRUE; + p++; + break; + case 'h': + fprintf(stderr, help); + return 0; + break; + case 'r': + fprintf(stderr, "NDISASM version " NASM_VER "\n"); + return 0; + break; + case 'u': /* USE32 */ + bits = 32; + p++; + break; + case 'b': /* bits */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-b' requires an argument\n", pname); + return 1; + } + if (!strcmp(v, "16")) + bits = 16; + else if (!strcmp(v, "32")) + bits = 32; + else { + fprintf(stderr, "%s: argument to `-b' should" + " be `16' or `32'\n", pname); + } + p = ""; /* force to next argument */ + break; + case 'o': /* origin */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-o' requires an argument\n", pname); + return 1; + } + offset = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-o' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 's': /* sync point */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-s' requires an argument\n", pname); + return 1; + } + add_sync (readnum (v, &rn_error), 0L); + if (rn_error) { + fprintf(stderr, "%s: `-s' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 'e': /* skip a header */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-e' requires an argument\n", pname); + return 1; + } + initskip = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-e' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 'k': /* skip a region */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-k' requires an argument\n", pname); + return 1; + } + vv = strchr(v, ','); + if (!vv) { + fprintf(stderr, "%s: `-k' requires two numbers separated" + " by a comma\n", pname); + return 1; + } + *vv++ = '\0'; + nextsync = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-k' requires numeric arguments\n", + pname); + return 1; + } + synclen = readnum (vv, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-k' requires numeric arguments\n", + pname); + return 1; + } + add_sync (nextsync, synclen); + p = ""; /* force to next argument */ + break; + } + } else if (!filename) { + filename = p; + } else { + fprintf(stderr, "%s: more than one filename specified\n", pname); + return 1; + } + } + + if (!filename) { + fprintf(stderr, help, pname); + return 0; + } + + fp = fopen(filename, "rb"); + if (initskip > 0) + skip (initskip, fp); + + /* + * This main loop is really horrible, and wants rewriting with + * an axe. It'll stay the way it is for a while though, until I + * find the energy... + */ + + p = q = buffer; + nextsync = next_sync (offset, &synclen); + do { + unsigned long to_read = buffer+sizeof(buffer)-p; + if (to_read > nextsync-offset-(p-q)) + to_read = nextsync-offset-(p-q); + lenread = fread (p, 1, to_read, fp); + p += lenread; + if (offset == nextsync) { + if (synclen) { + printf("%08lX skipping 0x%lX bytes\n", offset, synclen); + offset += synclen; + skip (synclen, fp); + } + p = q = buffer; + nextsync = next_sync (offset, &synclen); + } + while (p > q && (p - q >= INSN_MAX || lenread == 0)) { + lendis = disasm (q, outbuf, bits, offset, autosync); + if (!lendis || lendis > (p - q) || + lendis > nextsync-offset) + lendis = eatbyte (q, outbuf); + output_ins (offset, q, lendis, outbuf); + q += lendis; + offset += lendis; + } + if (q >= buffer+INSN_MAX) { + unsigned char *r = buffer, *s = q; + int count = p - q; + while (count--) + *r++ = *s++; + p -= (q - buffer); + q = buffer; + } + } while (lenread > 0 || !feof(fp)); + fclose (fp); + return 0; +} + +static void output_ins (unsigned long offset, unsigned char *data, + int datalen, char *insn) { + int bytes; + printf("%08lX ", offset); + + bytes = 0; + while (datalen > 0 && bytes < BPL) { + printf("%02X", *data++); + bytes++; + datalen--; + } + + printf("%*s%s\n", (BPL+1-bytes)*2, "", insn); + + while (datalen > 0) { + printf(" -"); + bytes = 0; + while (datalen > 0 && bytes < BPL) { + printf("%02X", *data++); + bytes++; + datalen--; + } + printf("\n"); + } +} + +/* + * Skip a certain amount of data in a file, either by seeking if + * possible, or if that fails then by reading and discarding. + */ +static void skip (unsigned long dist, FILE *fp) { + char buffer[256]; /* should fit on most stacks :-) */ + + /* + * Got to be careful with fseek: at least one fseek I've tried + * doesn't approve of SEEK_CUR. So I'll use SEEK_SET and + * ftell... horrible but apparently necessary. + */ + if (fseek (fp, dist+ftell(fp), SEEK_SET)) { + while (dist > 0) { + unsigned long len = (dist < sizeof(buffer) ? + dist : sizeof(buffer)); + if (fread (buffer, 1, len, fp) < len) { + perror("fread"); + exit(1); + } + dist -= len; + } + } +} diff --git a/ndisasm.doc b/ndisasm.doc new file mode 100644 index 0000000..5b5374a --- /dev/null +++ b/ndisasm.doc @@ -0,0 +1,199 @@ + The Netwide Disassembler, NDISASM + ================================= + +Introduction +============ + +The Netwide Disassembler is a small companion program to the Netwide +Assembler, NASM. It seemed a shame to have an x86 assembler, +complete with a full instruction table, and not make as much use of +it as possible, so here's a disassembler which shares the +instruction table (and some other bits of code) with NASM. + +The Netwide Disassembler does nothing except to produce +disassemblies of _binary_ source files. NDISASM does not have any +understanding of object file formats, like `objdump', and it will +not understand DOS .EXE files like `debug' will. It just +disassembles. + +Getting Started: Installation +============================= + +See `nasm.doc' for installation instructions. NDISASM, like NASM, +has a man page which you may want to put somewhere useful, if you +are on a Unix system. + +Running NDISASM +=============== + +To disassemble a file, you will typically use a command of the form + + ndisasm [-b16 | -b32] filename + +NDISASM can disassemble 16 bit code or 32 bit code equally easily, +provided of course that you remember to specify which it is to work +with. If no `-b' switch is present, NDISASM works in 16-bit mode by +default. The `-u' switch (for USE32) also invokes 32-bit mode. + +Two more command line options are `-r' which reports the version +number of NDISASM you are running, and `-h' which gives a short +summary of command line options. + +COM Files: Specifying an Origin +=============================== + +To disassemble a DOS .COM file correctly, a disassembler must assume +that the first instruction in the file is loaded at address 0x100, +rather than at zero. NDISASM, which assumes by default that any file +you give it is loaded at zero, will therefore need to be informed of +this. + +The `-o' option allows you to declare a different origin for the +file you are disassembling. Its argument may be expressed in any of +the NASM numeric formats: decimal by default, if it begins with `$' +or `0x' or ends in `H' it's hex, if it ends in `Q' it's octal, and +if it ends in `B' it's binary. + +Hence, to disassemble a .COM file: + + ndisasm -o100h filename.com + +will do the trick. + +Code Following Data: Synchronisation +==================================== + +Suppose you are disassembling a file which contains some data which +isn't machine code, and _then_ contains some machine code. NDISASM +will faithfully plough through the data section, producing machine +instructions wherever it can (although most of them will look +bizarre, and some may have unusual prefixes, e.g. `fs or +ax,0x240a'), and generating `db' instructions every so often if it's +totally stumped. Then it will reach the code section. + +Supposing NDISASM has just finished generating a strange machine +instruction from part of the data section, and its file position is +now one byte _before_ the beginning of the code section. It's +entirely possible that another spurious instruction will get +generated, starting with the final byte of the data section, and +then the correct first instruction in the code section will not be +seen because the starting point skipped over it. This isn't really +ideal. + +To avoid this, you can specify a `synchronisation' point, or indeed +as many synchronisation points as you like (although NDISASM can +only handle 8192 sync points internally). The definition of a sync +point is this: NDISASM guarantees to hit sync points exactly during +disassembly. If it is thinking about generating an instruction which +would cause it to jump over a sync point, it will discard that +instruction and output a `db' instead. So it _will_ start +disassembly exactly from the sync point, and so you _will_ see all +the instructions in your code section. + +Sync points are specified using the `-s' option: they are measured +in terms of the program origin, not the file position. So if you +want to synchronise after 32 bytes of a .COM file, you would have to +do + + ndisasm -o100h -s120h file.com + +rather than + + ndisasm -o100h -s20h file.com + +As stated above, you can specify multiple sync markers if you need +to, just by repeating the `-s' option. + +Mixed Code and Data: Automatic (Intelligent) Synchronisation +============================================================ + +Suppose you are disassembling the boot sector of a DOS floppy (maybe +it has a virus, and you need to understand the virus so that you +know what kinds of damage it might have done you). Typically, this +will contain a JMP instruction, then some data, then the rest of the +code. So there is a very good chance of NDISASM being misaligned +when the data ends and the code begins. Hence a sync point is +needed. + +On the other hand, why should you have to specify the sync point +manually? What you'd do in order to find where the sync point would +be, surely, would be to read the JMP instruction, and then to use +its target address as a sync point. So can NDISASM do that for you? + +The answer, of course, is yes: using either of the synonymous +switches `-a' (for automatic sync) or `-i' (for intelligent sync) +will enable auto-sync mode. Auto-sync mode automatically generates a +sync point for any forward-referring PC-relative jump or call +instruction that NDISASM encounters. (Since NDISASM is one-pass, if +it encounters a PC-relative jump whose target has already been +processed, there isn't much it can do about it...) + +Only PC-relative jumps are processed, since an absolute jump is +either through a register (in which case NDISASM doesn't know what +the register contains) or involves a segment address (in which case +the target code isn't in the same segment that NDISASM is working +in, and so the sync point can't be placed anywhere useful). + +For some kinds of file, this mechanism will automatically put sync +points in all the right places, and save you from having to place +any sync points manually. However, it should be stressed that +auto-sync mode is _not_ guaranteed to catch all the sync points, and +you may still have to place some manually. + +Auto-sync mode doesn't prevent you from declaring manual sync +points: it just adds automatically generated ones to the ones you +provide. It's perfectly feasible to specify `-i' _and_ some `-s' +options. + +Another caveat with auto-sync mode is that if, by some unpleasant +fluke, something in your data section should disassemble to a +PC-relative call or jump instruction, NDISASM may obediently place a +sync point in a totally random place, for example in the middle of +one of the instructions in your code section. So you may end up with +a wrong disassembly even if you use auto-sync. Again, there isn't +much I can do about this. If you have problems, you'll have to use +manual sync points, or use the `-k' option (documented below) to +suppress disassembly of the data area. + +Other Options +============= + +The `-e' option skips a header on the file, by ignoring the first N +bytes. This means that the header is _not_ counted towards the +disassembly offset: if you give `-e10 -o10', disassembly will start +at byte 10 in the file, and this will be given offset 10, not 20. + +The `-k' option is provided with two comma-separated numeric +arguments, the first of which is an assembly offset and the second +is a number of bytes to skip. This _will_ count the skipped bytes +towards the assembly offset: its use is to suppress disassembly of a +data section which wouldn't contain anything you wanted to see +anyway. + +Bugs and Improvements +===================== + +There are no known bugs. However, any you find, with patches if +possible, should be sent to <jules@dcs.warwick.ac.uk> or +<anakin@pobox.com>, and we'll try to fix them. Feel free to send +contributions and new features as well. + +Future plans include awareness of which processors certain +instructions will run on, and marking of instructions that are too +advanced for some processor (or are FPU instructions, or are +undocumented opcodes, or are privileged protected-mode instructions, +or whatever). + +That's All Folks! +================= + +I hope NDISASM is of some use to somebody. Including me. :-) + +I don't recommend taking NDISASM apart to see how an efficient +disassembler works, because as far as I know, it isn't an efficient +one anyway. You have been warned. + +Please feel free to send comments, suggestions, or chat to +<anakin@pobox.com>. As with NASM, no flames please. + +- Simon Tatham <anakin@pobox.com>, 21-Nov-96 diff --git a/outaout.c b/outaout.c new file mode 100644 index 0000000..e0ada3c --- /dev/null +++ b/outaout.c @@ -0,0 +1,466 @@ +/* outaout.c output routines for the Netwide Assembler to produce + * Linux a.out object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_AOUT + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* symbol number or -ve section id */ + int bytes; /* 2 or 4 */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + int type; /* symbol type - see flags below */ + long value; /* address, or COMMON variable size */ +}; + +/* + * Section IDs - used in Reloc.symbol when negative, and in + * Symbol.type when positive. + */ +#define SECT_ABS 2 /* absolute value */ +#define SECT_TEXT 4 /* text section */ +#define SECT_DATA 6 /* data section */ +#define SECT_BSS 8 /* bss section */ +#define SECT_MASK 0xE /* mask out any of the above */ + +/* + * Another flag used in Symbol.type. + */ +#define SYM_GLOBAL 1 /* it's a global symbol */ + +/* + * Bit more explanation of symbol types: SECT_xxx denotes a local + * symbol. SECT_xxx|SYM_GLOBAL denotes a global symbol, defined in + * this module. Just SYM_GLOBAL, with zero value, denotes an + * external symbol referenced in this module. And just SYM_GLOBAL, + * but with a non-zero value, declares a C `common' variable, of + * size `value'. + */ + +struct Section { + struct SAA *data; + unsigned long len, size, nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static FILE *aoutfp; +static efunc error; + +static void aout_write(void); +static void aout_write_relocs(struct Reloc *); +static void aout_write_syms(void); +static void aout_sect_write(struct Section *, unsigned char *, unsigned long); +static void aout_pad_sections(void); +static void aout_fixup_relocs(struct Section *); + +static void aout_init(FILE *fp, efunc errfunc, ldfunc ldef) { + aoutfp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = stext.size = sdata.len = sdata.size = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + strs = saa_init(1L); + strslen = 0; +} + +static void aout_cleanup(void) { + struct Reloc *r; + + aout_pad_sections(); + aout_fixup_relocs(&stext); + aout_fixup_relocs(&sdata); + aout_write(); + fclose (aoutfp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long aout_section_names (char *name, int pass, int *bits) { + /* + * Default to 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void aout_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen+4; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + + sym = saa_wstruct (syms); + + sym->strpos = pos; + sym->type = is_global ? SYM_GLOBAL : 0; + if (segment == NO_SEG) + sym->type |= SECT_ABS; + else if (segment == stext.index) + sym->type |= SECT_TEXT; + else if (segment == sdata.index) + sym->type |= SECT_DATA; + else if (segment == bssindex) + sym->type |= SECT_BSS; + else + sym->type = SYM_GLOBAL; + if (is_global == 2) + sym->value = offset; + else + sym->value = (sym->type == SYM_GLOBAL ? 0 : offset); + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + nsyms++; +} + +static void aout_add_reloc (struct Section *sect, long segment, + int relative, int bytes) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? -SECT_ABS : + segment == stext.index ? -SECT_TEXT : + segment == sdata.index ? -SECT_DATA : + segment == bssindex ? -SECT_BSS : + raa_read(bsym, segment)); + r->relative = relative; + r->bytes = bytes; + + sect->nrelocs++; +} + +static void aout_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by a.out output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + aout_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + aout_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, FALSE, realbytes); + } + p = mydata; + if (realbytes == 2) + WRITESHORT (p, *(long *)data); + else + WRITELONG (p, *(long *)data); + aout_sect_write (s, mydata, realbytes); + } else if (type == OUT_REL2ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, TRUE, 2); + p = mydata; + WRITESHORT (p, *(long*)data-(realbytes + s->len)); + aout_sect_write (s, mydata, 2L); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, TRUE, 4); + p = mydata; + WRITELONG (p, *(long*)data-(realbytes + s->len)); + aout_sect_write (s, mydata, 4L); + } +} + +static void aout_pad_sections(void) { + static unsigned char pad[] = { 0x90, 0x90, 0x90, 0x90 }; + /* + * Pad each of the text and data sections with NOPs until their + * length is a multiple of four. (NOP == 0x90.) Also increase + * the length of the BSS section similarly. + */ + aout_sect_write (&stext, pad, (-stext.len) & 3); + aout_sect_write (&sdata, pad, (-sdata.len) & 3); + bsslen = (bsslen + 3) & ~3; +} + +/* + * a.out files have the curious property that all references to + * things in the data or bss sections are done by addresses which + * are actually relative to the start of the _text_ section, in the + * _file_. (No relation to what happens after linking. No idea why + * this should be so. It's very strange.) So we have to go through + * the relocation table, _after_ the final size of each section is + * known, and fix up the relocations pointed to. + */ +static void aout_fixup_relocs(struct Section *sect) { + struct Reloc *r; + + saa_rewind (sect->data); + for (r = sect->head; r; r = r->next) { + unsigned char *p, *q, blk[4]; + long l; + + saa_fread (sect->data, r->address, blk, (long)r->bytes); + p = q = blk; + l = *p++; + l += ((long)*p++) << 8; + if (r->bytes == 4) { + l += ((long)*p++) << 16; + l += ((long)*p++) << 24; + } + if (r->symbol == -SECT_DATA) + l += stext.len; + else if (r->symbol == -SECT_BSS) + l += stext.len + sdata.len; + if (r->bytes == 4) + WRITELONG(q, l); + else + WRITESHORT(q, l); + saa_fwrite (sect->data, r->address, blk, (long)r->bytes); + } +} + +static void aout_write(void) { + /* + * Emit the a.out header. + */ + fwritelong (0x640107, aoutfp); /* OMAGIC, M_386, no flags */ + fwritelong (stext.len, aoutfp); + fwritelong (sdata.len, aoutfp); + fwritelong (bsslen, aoutfp); + fwritelong (nsyms * 12, aoutfp); /* length of symbol table */ + fwritelong (0L, aoutfp); /* object files have no entry point */ + fwritelong (stext.nrelocs * 8, aoutfp); /* size of text relocs */ + fwritelong (sdata.nrelocs * 8, aoutfp); /* size of data relocs */ + + /* + * Write out the code section and the data section. + */ + saa_fpwrite (stext.data, aoutfp); + saa_fpwrite (sdata.data, aoutfp); + + /* + * Write out the relocations. + */ + aout_write_relocs (stext.head); + aout_write_relocs (sdata.head); + + /* + * Write the symbol table. + */ + aout_write_syms (); + + /* + * And the string table. + */ + fwritelong (strslen+4, aoutfp); /* length includes length count */ + saa_fpwrite (strs, aoutfp); +} + +static void aout_write_relocs (struct Reloc *r) { + while (r) { + unsigned long word2; + + fwritelong (r->address, aoutfp); + + if (r->symbol >= 0) + word2 = r->symbol | 0x8000000; + else + word2 = -r->symbol; + if (r->relative) + word2 |= 0x1000000; + word2 |= (r->bytes == 2 ? 0x2000000 : 0x4000000); + fwritelong (word2, aoutfp); + + r = r->next; + } +} + +static void aout_write_syms (void) { + int i; + + saa_rewind (syms); + for (i=0; i<nsyms; i++) { + struct Symbol *sym = saa_rstruct(syms); + fwritelong (sym->strpos, aoutfp); + fwritelong ((long)sym->type, aoutfp); + /* + * Fix up the symbol value now we know the final section + * sizes. + */ + if ((sym->type & SECT_MASK) == SECT_DATA) + sym->value += stext.len; + if ((sym->type & SECT_MASK) == SECT_BSS) + sym->value += stext.len + sdata.len; + fwritelong (sym->value, aoutfp); + } +} + +static void aout_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static long aout_segbase (long segment) { + return segment; +} + +static int aout_directive (char *directive, char *value, int pass) { + return 0; +} + +static void aout_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_aout = { + "GNU a.out (i386) object files (e.g. Linux)", + "aout", + aout_init, + aout_out, + aout_deflabel, + aout_section_names, + aout_segbase, + aout_directive, + aout_filename, + aout_cleanup +}; + +#endif /* OF_AOUT */ diff --git a/outas86.c b/outas86.c new file mode 100644 index 0000000..82dedb2 --- /dev/null +++ b/outas86.c @@ -0,0 +1,548 @@ +/* outas86.c output routines for the Netwide Assembler to produce + * Linux as86 (bin86-0.3) object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_AS86 + +struct Piece { + struct Piece *next; + int type; /* 0 = absolute, 1 = seg, 2 = sym */ + long offset; /* relative offset */ + int number; /* symbol/segment number (4=bss) */ + long bytes; /* size of reloc or of absolute data */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + int flags; /* symbol flags */ + int segment; /* 4=bss at this point */ + long value; /* address, or COMMON variable size */ +}; + +/* + * Section IDs - used in Piece.number and Symbol.segment. + */ +#define SECT_TEXT 0 /* text section */ +#define SECT_DATA 3 /* data section */ +#define SECT_BSS 4 /* bss section */ + +/* + * Flags used in Symbol.flags. + */ +#define SYM_ENTRY (1<<8) +#define SYM_EXPORT (1<<7) +#define SYM_IMPORT (1<<6) +#define SYM_ABSOLUTE (1<<4) + +struct Section { + struct SAA *data; + unsigned long datalen, size, len; + long index; + struct Piece *head, *last, **tail; +}; + +static char as86_module[FILENAME_MAX]; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static int as86_reloc_size; + +static FILE *as86fp; +static efunc error; + +static void as86_write(void); +static void as86_write_section (struct Section *, int); +static int as86_add_string (char *name); +static void as86_sect_write(struct Section *, unsigned char *, unsigned long); + +static void as86_init(FILE *fp, efunc errfunc, ldfunc ldef) { + as86fp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.datalen = 0L; + stext.head = stext.last = NULL; + stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.datalen = 0L; + sdata.head = sdata.last = NULL; + sdata.tail = &sdata.head; + bsslen = + stext.len = stext.datalen = stext.size = + sdata.len = sdata.datalen = sdata.size = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + strs = saa_init(1L); + strslen = 0; + + as86_add_string (as86_module); +} + +static void as86_cleanup(void) { + struct Piece *p; + + as86_write(); + fclose (as86fp); + saa_free (stext.data); + while (stext.head) { + p = stext.head; + stext.head = stext.head->next; + nasm_free (p); + } + saa_free (sdata.data); + while (sdata.head) { + p = sdata.head; + sdata.head = sdata.head->next; + nasm_free (p); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long as86_section_names (char *name, int pass, int *bits) { + /* + * Default is 16 bits. + */ + if (!name) + *bits = 16; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static int as86_add_string (char *name) { + int pos = strslen; + int length = strlen(name); + + saa_wbytes (strs, name, (long)(length+1)); + strslen += 1+length; + + return pos; +} + +static void as86_deflabel (char *name, long segment, long offset, + int is_global) { + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + sym = saa_wstruct (syms); + + sym->strpos = as86_add_string (name); + sym->flags = 0; + if (segment == NO_SEG) + sym->flags |= SYM_ABSOLUTE, sym->segment = 0; + else if (segment == stext.index) + sym->segment = SECT_TEXT; + else if (segment == sdata.index) + sym->segment = SECT_DATA; + else if (segment == bssindex) + sym->segment = SECT_BSS; + else { + sym->flags |= SYM_IMPORT; + sym->segment = 15; + } + + if (is_global == 2) + sym->segment = 3; /* already have IMPORT */ + + if (is_global && !(sym->flags & SYM_IMPORT)) + sym->flags |= SYM_EXPORT; + + sym->value = offset; + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + nsyms++; +} + +static void as86_add_piece (struct Section *sect, int type, long offset, + long segment, long bytes, int relative) { + struct Piece *p; + + sect->len += bytes; + + if (type == 0 && sect->last && sect->last->type == 0) { + sect->last->bytes += bytes; + return; + } + + p = sect->last = *sect->tail = nasm_malloc(sizeof(struct Piece)); + sect->tail = &p->next; + p->next = NULL; + + p->type = type; + p->offset = offset; + p->bytes = bytes; + p->relative = relative; + + if (type == 1 && segment == stext.index) + p->number = SECT_TEXT; + else if (type == 1 && segment == sdata.index) + p->number = SECT_DATA; + else if (type == 1 && segment == bssindex) + p->number = SECT_BSS; + else if (type == 1) + p->number = raa_read (bsym, segment), p->type = 2; +} + +static void as86_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + long offset; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by as86 output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + as86_sect_write (s, NULL, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + as86_sect_write (s, data, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } else if (type == OUT_ADDRESS) { + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else{ + offset = * (long *) data; + as86_add_piece (s, 1, offset, segment, realbytes, 0); + } + } else { + p = mydata; + WRITELONG (p, * (long *) data); + as86_sect_write (s, data, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } + } else if (type == OUT_REL2ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else { + offset = * (long *) data; + as86_add_piece (s, 1, offset-realbytes+2, segment, 2L, 1); + } + } + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else { + offset = * (long *) data; + as86_add_piece (s, 1, offset-realbytes+4, segment, 4L, 1); + } + } + } +} + +static void as86_write(void) { + int i; + long symlen, seglen, segsize; + + /* + * First, go through the symbol records working out how big + * each will be. Also fix up BSS references at this time, and + * set the flags words up completely. + */ + symlen = 0; + saa_rewind (syms); + for (i = 0; i < nsyms; i++) { + struct Symbol *sym = saa_rstruct (syms); + if (sym->segment == SECT_BSS) + sym->segment = SECT_DATA, sym->value += sdata.len; + sym->flags |= sym->segment; + if (sym->value == 0) + sym->flags |= 0 << 14, symlen += 4; + else if (sym->value >= 0 && sym->value <= 255) + sym->flags |= 1 << 14, symlen += 5; + else if (sym->value >= 0 && sym->value <= 65535) + sym->flags |= 2 << 14, symlen += 6; + else + sym->flags |= 3 << 14, symlen += 8; + } + + /* + * Now do the same for the segments, and get the segment size + * descriptor word at the same time. + */ + seglen = segsize = 0; + if ((unsigned long) stext.len > 65535) + segsize |= 0x03000000, seglen += 4; + else + segsize |= 0x02000000, seglen += 2; + if ((unsigned long) sdata.len > 65535) + segsize |= 0xC0000000, seglen += 4; + else + segsize |= 0x80000000, seglen += 2; + + /* + * Emit the as86 header. + */ + fwritelong (0x000186A3, as86fp); + fputc (0x2A, as86fp); + fwritelong (27+symlen+seglen+strslen, as86fp); /* header length */ + fwritelong (stext.len+sdata.len, as86fp); + fwriteshort (strslen, as86fp); + fwriteshort (0, as86fp); /* class = revision = 0 */ + fwritelong (0x55555555, as86fp); /* segment max sizes: always this */ + fwritelong (segsize, as86fp); /* segment size descriptors */ + if (segsize & 0x01000000) + fwritelong (stext.len, as86fp); + else + fwriteshort (stext.len, as86fp); + if (segsize & 0x40000000) + fwritelong (sdata.len, as86fp); + else + fwriteshort (sdata.len, as86fp); + fwriteshort (nsyms, as86fp); + + /* + * Write the symbol table. + */ + saa_rewind (syms); + for (i = 0; i < nsyms; i++) { + struct Symbol *sym = saa_rstruct (syms); + fwriteshort (sym->strpos, as86fp); + fwriteshort (sym->flags, as86fp); + switch (sym->flags & (3<<14)) { + case 0<<14: break; + case 1<<14: fputc (sym->value, as86fp); break; + case 2<<14: fwriteshort (sym->value, as86fp); break; + case 3<<14: fwritelong (sym->value, as86fp); break; + } + } + + /* + * Write out the string table. + */ + saa_fpwrite (strs, as86fp); + + /* + * Write the program text. + */ + as86_reloc_size = -1; + as86_write_section (&stext, SECT_TEXT); + as86_write_section (&sdata, SECT_DATA); + fputc (0, as86fp); /* termination */ +} + +static void as86_set_rsize (int size) { + if (as86_reloc_size != size) { + switch (as86_reloc_size = size) { + case 1: fputc (0x01, as86fp); break; /* shouldn't happen */ + case 2: fputc (0x02, as86fp); break; + case 4: fputc (0x03, as86fp); break; + default: error (ERR_PANIC, "bizarre relocation size %d", size); + } + } +} + +static void as86_write_section (struct Section *sect, int index) { + struct Piece *p; + unsigned long s; + long length; + + fputc (0x20+index, as86fp); /* select the right section */ + + saa_rewind (sect->data); + + for (p = sect->head; p; p = p->next) + switch (p->type) { + case 0: + /* + * Absolute data. Emit it in chunks of at most 64 + * bytes. + */ + length = p->bytes; + do { + char buf[64]; + long tmplen = (length > 64 ? 64 : length); + fputc (0x40 | (tmplen & 0x3F), as86fp); + saa_rnbytes (sect->data, buf, tmplen); + fwrite (buf, 1, tmplen, as86fp); + length -= tmplen; + } while (length > 0); + break; + case 1: + /* + * A segment-type relocation. First fix up the BSS. + */ + if (p->number == SECT_BSS) + p->number = SECT_DATA, p->offset += sdata.len; + as86_set_rsize (p->bytes); + fputc (0x80 | (p->relative ? 0x20 : 0) | p->number, as86fp); + if (as86_reloc_size == 2) + fwriteshort (p->offset, as86fp); + else + fwritelong (p->offset, as86fp); + break; + case 2: + /* + * A symbol-type relocation. + */ + as86_set_rsize (p->bytes); + s = p->offset; + if (s > 65535) + s = 3; + else if (s > 255) + s = 2; + else if (s > 0) + s = 1; + else + s = 0; + fputc (0xC0 | + (p->relative ? 0x20 : 0) | + (p->number > 255 ? 0x04 : 0) | s, as86fp); + if (p->number > 255) + fwriteshort (p->number, as86fp); + else + fputc (p->number, as86fp); + switch ((int)s) { + case 0: break; + case 1: fputc (p->offset, as86fp); break; + case 2: fwriteshort (p->offset, as86fp); break; + case 3: fwritelong (p->offset, as86fp); break; + } + break; + } +} + +static void as86_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->datalen += len; +} + +static long as86_segbase (long segment) { + return segment; +} + +static int as86_directive (char *directive, char *value, int pass) { + return 0; +} + +static void as86_filename (char *inname, char *outname, efunc error) { + char *p; + + if ( (p = strrchr (inname, '.')) != NULL) { + strncpy (as86_module, inname, p-inname); + as86_module[p-inname] = '\0'; + } else + strcpy (as86_module, inname); + + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_as86 = { + "Linux as86 (bin86 version 0.3) object files", + "as86", + as86_init, + as86_out, + as86_deflabel, + as86_section_names, + as86_segbase, + as86_directive, + as86_filename, + as86_cleanup +}; + +#endif /* OF_AS86 */ diff --git a/outbin.c b/outbin.c new file mode 100644 index 0000000..82c8510 --- /dev/null +++ b/outbin.c @@ -0,0 +1,303 @@ +/* outbin.c output routines for the Netwide Assembler to produce + * flat-form binary files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_BIN + +static FILE *fp; +static efunc error; + +static struct Section { + struct SAA *contents; + long length; + long index; +} textsect, datasect; +static long bsslen, bssindex; + +static struct Reloc { + struct Reloc *next; + long posn; + long bytes; + long secref; + long secrel; + struct Section *target; +} *relocs, **reloctail; + +static int start_point; + +static void add_reloc (struct Section *s, long bytes, long secref, + long secrel) { + struct Reloc *r; + + r = *reloctail = nasm_malloc(sizeof(struct Reloc)); + reloctail = &r->next; + r->next = NULL; + r->posn = s->length; + r->bytes = bytes; + r->secref = secref; + r->secrel = secrel; + r->target = s; +} + +static void bin_init (FILE *afp, efunc errfunc, ldfunc ldef) { + fp = afp; + + error = errfunc; + (void) ldef; /* placate optimisers */ + + start_point = 0; /* default */ + textsect.contents = saa_init(1L); + datasect.contents = saa_init(1L); + textsect.length = datasect.length = 0; + textsect.index = seg_alloc(); + datasect.index = seg_alloc(); + bsslen = 0; + bssindex = seg_alloc(); + relocs = NULL; + reloctail = &relocs; +} + +static void bin_cleanup (void) { + struct Reloc *r; + long datapos, dataalign, bsspos; + + datapos = (start_point + textsect.length + 3) & ~3;/* align on 4 bytes */ + dataalign = datapos - (start_point + textsect.length); + + saa_rewind (textsect.contents); + saa_rewind (datasect.contents); + + bsspos = (datapos + datasect.length + 3) & ~3; + + for (r = relocs; r; r = r->next) { + unsigned char *p, *q, mydata[4]; + long l; + + saa_fread (r->target->contents, r->posn, mydata, r->bytes); + p = q = mydata; + l = *p++; + l += ((long)*p++) << 8; + if (r->bytes == 4) { + l += ((long)*p++) << 16; + l += ((long)*p++) << 24; + } + + if (r->secref == textsect.index) + l += start_point; + else if (r->secref == datasect.index) + l += datapos; + else if (r->secref == bssindex) + l += bsspos; + + if (r->secrel == textsect.index) + l -= start_point; + else if (r->secrel == datasect.index) + l -= datapos; + else if (r->secrel == bssindex) + l -= bsspos; + + if (r->bytes == 4) + WRITELONG(q, l); + else + WRITESHORT(q, l); + saa_fwrite (r->target->contents, r->posn, mydata, r->bytes); + } + saa_fpwrite (textsect.contents, fp); + if (datasect.length > 0) { + fwrite ("\0\0\0\0", dataalign, 1, fp); + saa_fpwrite (datasect.contents, fp); + } + fclose (fp); + saa_free (textsect.contents); + saa_free (datasect.contents); + while (relocs) { + r = relocs->next; + nasm_free (relocs); + relocs = r; + } +} + +static void bin_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + unsigned char *p, mydata[4]; + struct Section *s; + long realbytes; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by binary output format"); + } + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == bssindex) { /* BSS */ + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + s = NULL; + } else if (segto == textsect.index) { + s = &textsect; + } else if (segto == datasect.index) { + s = &datasect; + } else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &textsect; + } + + if ((type & OUT_TYPMASK) == OUT_ADDRESS) { + if (segment != NO_SEG && + segment != textsect.index && + segment != datasect.index && + segment != bssindex) { + if (segment % 2) + error(ERR_NONFATAL, "binary output format does not support" + " segment base references"); + else + error(ERR_NONFATAL, "binary output format does not support" + " external references"); + segment = NO_SEG; + } + if (s) { + if (segment != NO_SEG) + add_reloc (s, type & OUT_SIZMASK, segment, -1L); + p = mydata; + if ((type & OUT_SIZMASK) == 4) + WRITELONG (p, *(long *)data); + else + WRITESHORT (p, *(long *)data); + saa_wbytes (s->contents, mydata, type & OUT_SIZMASK); + s->length += type & OUT_SIZMASK; + } else + bsslen += type & OUT_SIZMASK; + } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) { + type &= OUT_SIZMASK; + p = data; + if (s) { + saa_wbytes (s->contents, data, type); + s->length += type; + } else + bsslen += type; + } else if ((type & OUT_TYPMASK) == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == textsect.index ? "code" : "data")); + } + type &= OUT_SIZMASK; + if (s) { + saa_wbytes (s->contents, NULL, type); + s->length += type; + } else + bsslen += type; + } else if ((type & OUT_TYPMASK) == OUT_REL2ADR || + (type & OUT_TYPMASK) == OUT_REL4ADR) { + realbytes = ((type & OUT_TYPMASK) == OUT_REL4ADR ? 4 : 2); + if (segment != NO_SEG && + segment != textsect.index && + segment != datasect.index && + segment != bssindex) { + if (segment % 2) + error(ERR_NONFATAL, "binary output format does not support" + " segment base references"); + else + error(ERR_NONFATAL, "binary output format does not support" + " external references"); + segment = NO_SEG; + } + if (s) { + add_reloc (s, realbytes, segment, segto); + p = mydata; + if (realbytes == 4) + WRITELONG (p, *(long*)data - realbytes - s->length); + else + WRITESHORT (p, *(long*)data - realbytes - s->length); + saa_wbytes (s->contents, mydata, realbytes); + s->length += realbytes; + } else + bsslen += realbytes; + } +} + +static void bin_deflabel (char *name, long segment, long offset, + int is_global) { + if (is_global == 2) { + error (ERR_NONFATAL, "binary output format does not support common" + " variables"); + } +} + +static long bin_secname (char *name, int pass, int *bits) { + /* + * Default is 16 bits. + */ + if (!name) + *bits = 16; + + if (!name) + return textsect.index; + + if (!strcmp(name, ".text")) + return textsect.index; + else if (!strcmp(name, ".data")) + return datasect.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static long bin_segbase (long segment) { + return segment; +} + +static int bin_directive (char *directive, char *value, int pass) { + int rn_error; + + if (!strcmp(directive, "org")) { + start_point = readnum (value, &rn_error); + if (rn_error) + error (ERR_NONFATAL, "argument to ORG should be numeric"); + return 1; + } else + return 0; +} + +static void bin_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, "", error); +} + +struct ofmt of_bin = { + "flat-form binary files (e.g. DOS .COM, .SYS)", + "bin", + bin_init, + bin_out, + bin_deflabel, + bin_secname, + bin_segbase, + bin_directive, + bin_filename, + bin_cleanup +}; + +#endif /* OF_BIN */ diff --git a/outcoff.c b/outcoff.c new file mode 100644 index 0000000..c3ae712 --- /dev/null +++ b/outcoff.c @@ -0,0 +1,611 @@ +/* outcoff.c output routines for the Netwide Assembler to produce + * COFF object files (for DJGPP and Win32) + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <time.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#if defined(OF_COFF) || defined(OF_WIN32) + +/* + * Notes on COFF: + * + * (0) When I say `standard COFF' below, I mean `COFF as output and + * used by DJGPP'. I assume DJGPP gets it right. + * + * (1) Win32 appears to interpret the term `relative relocation' + * differently from standard COFF. Standard COFF understands a + * relative relocation to mean that during relocation you add the + * address of the symbol you're referencing, and subtract the base + * address of the section you're in. Win32 COFF, by contrast, seems + * to add the address of the symbol and then subtract the address + * of THE BYTE AFTER THE RELOCATED DWORD. Hence the two formats are + * subtly incompatible. + * + * (2) Win32 doesn't bother putting any flags in the header flags + * field (at offset 0x12 into the file). + * + * (3) Win32 puts some weird flags into the section header table. + * It uses flags 0x80000000 (writable), 0x40000000 (readable) and + * 0x20000000 (executable) in the expected combinations, which + * standard COFF doesn't seem to bother with, but it also does + * something else strange: it also flags code sections as + * 0x00500000 and data/bss as 0x00300000. Even Microsoft's + * documentation doesn't explain what these things mean. I just go + * ahead and use them anyway - it seems to work. + * + * (4) Both standard COFF and Win32 COFF seem to use the DWORD + * field directly after the section name in the section header + * table for something strange: they store what the address of the + * section start point _would_ be, if you laid all the sections end + * to end starting at zero. Dunno why. Microsoft's documentation + * lists this field as "Virtual Size of Section", which doesn't + * seem to fit at all. In fact, Win32 even includes non-linked + * sections such as .drectve in this calculation. Not that I can be + * bothered with those things anyway. + * + * (5) Standard COFF does something very strange to common + * variables: the relocation point for a common variable is as far + * _before_ the variable as its size stretches out _after_ it. So + * we must fix up common variable references. Win32 seems to be + * sensible on this one. + */ + +/* Flag which version of COFF we are currently outputting. */ +static int win32; + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* symbol number */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + char name[9]; + long strpos; /* string table position of name */ + int section; /* section number where it's defined + * - in COFF codes, not NASM codes */ + int is_global; /* is it a global symbol or not? */ + long value; /* address, or COMMON variable size */ +}; + +static FILE *coffp; +static efunc error; +static char coff_infile[FILENAME_MAX]; + +struct Section { + struct SAA *data; + unsigned long len; + int nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym, *symval; + +static struct SAA *strs; +static unsigned long strslen; + +/* + * The symbol table contains a double entry for the file name, a + * double entry for each of the three sections, and an absolute + * symbol referencing address zero, followed by the _real_ symbols. + * That's nine extra symbols. + */ +#define SYM_INITIAL 9 + +/* + * Symbol table indices we can relocate relative to. + */ +#define SYM_ABS_SEG 8 +#define SYM_TEXT_SEG 2 +#define SYM_DATA_SEG 4 +#define SYM_BSS_SEG 6 + +/* + * The section header table ends at this offset: 0x14 for the + * header, plus 0x28 for each of three sections. + */ +#define COFF_HDRS_END 0x8c + +static void coff_gen_init(FILE *, efunc); +static void coff_sect_write (struct Section *, unsigned char *, + unsigned long); +static void coff_write (void); +static void coff_section_header (char *, long, long, long, long, int, long); +static void coff_write_relocs (struct Section *); +static void coff_write_symbols (void); + +static void coff_win32_init(FILE *fp, efunc errfunc, ldfunc ldef) { + win32 = TRUE; + (void) ldef; /* placate optimisers */ + coff_gen_init(fp, errfunc); +} + +static void coff_std_init(FILE *fp, efunc errfunc, ldfunc ldef) { + win32 = FALSE; + (void) ldef; /* placate optimisers */ + coff_gen_init(fp, errfunc); +} + +static void coff_gen_init(FILE *fp, efunc errfunc) { + coffp = fp; + error = errfunc; + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = sdata.len = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + symval = raa_init(); + strs = saa_init(1L); + strslen = 0; +} + +static void coff_cleanup(void) { + struct Reloc *r; + + coff_write(); + fclose (coffp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + raa_free (symval); + saa_free (strs); +} + +static long coff_section_names (char *name, int pass, int *bits) { + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void coff_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen+4; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + if (strlen(name) > 8) { + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + } else + pos = -1; + + sym = saa_wstruct (syms); + + sym->strpos = pos; + if (pos == -1) + strcpy (sym->name, name); + sym->is_global = !!is_global; + if (segment == NO_SEG) + sym->section = -1; /* absolute symbol */ + else if (segment == stext.index) + sym->section = 1; /* .text */ + else if (segment == sdata.index) + sym->section = 2; /* .data */ + else if (segment == bssindex) + sym->section = 3; /* .bss */ + else { + sym->section = 0; /* undefined */ + sym->is_global = TRUE; + } + if (is_global == 2) + sym->value = offset; + else + sym->value = (sym->section == 0 ? 0 : offset); + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + if (segment != NO_SEG) + symval = raa_write (symval, segment, sym->section ? 0 : sym->value); + + nsyms++; +} + +static long coff_add_reloc (struct Section *sect, long segment, + int relative) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? SYM_ABS_SEG : + segment == stext.index ? SYM_TEXT_SEG : + segment == sdata.index ? SYM_DATA_SEG : + segment == bssindex ? SYM_BSS_SEG : + raa_read (bsym, segment) + SYM_INITIAL); + r->relative = relative; + + sect->nrelocs++; + + /* + * Return the fixup for standard COFF common variables. + */ + if (r->symbol >= SYM_INITIAL && !win32) + return raa_read (symval, segment); + else + return 0; +} + +static void coff_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by COFF output formats"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + coff_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + coff_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (realbytes == 2 && (segment != NO_SEG || wrt != NO_SEG)) + error(ERR_NONFATAL, "COFF format does not support 16-bit" + " relocations"); + else { + long fix = 0; + if (segment != NO_SEG || wrt != NO_SEG) { + if (wrt != NO_SEG) { + error(ERR_NONFATAL, "COFF format does not support" + " WRT types"); + } else if (segment % 2) { + error(ERR_NONFATAL, "COFF format does not support" + " segment base references"); + } else + fix = coff_add_reloc (s, segment, FALSE); + } + p = mydata; + WRITELONG (p, *(long *)data + fix); + coff_sect_write (s, mydata, realbytes); + } + } else if (type == OUT_REL2ADR) { + error(ERR_NONFATAL, "COFF format does not support 16-bit" + " relocations"); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + else if (segment == NO_SEG && win32) + error(ERR_NONFATAL, "Win32 COFF does not correctly support" + " relative references to absolute addresses"); + else { + long fix = 0; + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "COFF format does not support" + " segment base references"); + } else + fix = coff_add_reloc (s, segment, TRUE); + p = mydata; + if (win32) { + WRITELONG (p, *(long*)data + 4 - realbytes + fix); + } else { + WRITELONG (p, *(long*)data-(realbytes + s->len) + fix); + } + coff_sect_write (s, mydata, 4L); + } + } +} + +static void coff_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static int coff_directives (char *directive, char *value, int pass) { + return 0; +} + +static void coff_write (void) { + long textpos, textrelpos, datapos, datarelpos, sympos; + + /* + * Work out how big the file will get. + */ + textpos = COFF_HDRS_END; + textrelpos = textpos + stext.len; + datapos = textrelpos + stext.nrelocs * 10; + datarelpos = datapos + sdata.len; + sympos = datarelpos + sdata.nrelocs * 10; + + /* + * Output the COFF header. + */ + fwriteshort (0x14C, coffp); /* MACHINE_i386 */ + fwriteshort (3, coffp); /* number of sections */ + fwritelong (time(NULL), coffp); /* time stamp */ + fwritelong (sympos, coffp); + fwritelong (nsyms + SYM_INITIAL, coffp); + fwriteshort (0, coffp); /* no optional header */ + /* Flags: 32-bit, no line numbers. Win32 doesn't even bother with them. */ + fwriteshort (win32 ? 0 : 0x104, coffp); + + /* + * Output the section headers. + */ + + coff_section_header (".text", 0L, stext.len, textpos, + textrelpos, stext.nrelocs, + (win32 ? 0x60500020L : 0x20L)); + coff_section_header (".data", stext.len, sdata.len, datapos, + datarelpos, sdata.nrelocs, + (win32 ? 0xC0300040L : 0x40L)); + coff_section_header (".bss", stext.len+sdata.len, bsslen, 0L, 0L, 0, + (win32 ? 0xC0300080L : 0x80L)); + + /* + * Output the text section, and its relocations. + */ + saa_fpwrite (stext.data, coffp); + coff_write_relocs (&stext); + + /* + * Output the data section, and its relocations. + */ + saa_fpwrite (sdata.data, coffp); + coff_write_relocs (&sdata); + + /* + * Output the symbol and string tables. + */ + coff_write_symbols(); + fwritelong (strslen+4, coffp); /* length includes length count */ + saa_fpwrite (strs, coffp); +} + +static void coff_section_header (char *name, long vsize, + long datalen, long datapos, + long relpos, int nrelocs, long flags) { + char padname[8]; + + memset (padname, 0, 8); + strncpy (padname, name, 8); + fwrite (padname, 8, 1, coffp); + fwritelong (vsize, coffp); + fwritelong (0L, coffp); /* RVA/offset - we ignore */ + fwritelong (datalen, coffp); + fwritelong (datapos, coffp); + fwritelong (relpos, coffp); + fwritelong (0L, coffp); /* no line numbers - we don't do 'em */ + fwriteshort (nrelocs, coffp); + fwriteshort (0, coffp); /* again, no line numbers */ + fwritelong (flags, coffp); +} + +static void coff_write_relocs (struct Section *s) { + struct Reloc *r; + + for (r = s->head; r; r = r->next) { + fwritelong (r->address, coffp); + fwritelong (r->symbol, coffp); + /* + * Strange: Microsoft's COFF documentation says 0x03 for an + * absolute relocation, but both Visual C++ and DJGPP agree + * that in fact it's 0x06. I'll use 0x06 until someone + * argues. + */ + fwriteshort (r->relative ? 0x14 : 0x06, coffp); + } +} + +static void coff_symbol (char *name, long strpos, long value, + int section, int type, int aux) { + char padname[8]; + + if (name) { + memset (padname, 0, 8); + strncpy (padname, name, 8); + fwrite (padname, 8, 1, coffp); + } else { + fwritelong (0L, coffp); + fwritelong (strpos, coffp); + } + fwritelong (value, coffp); + fwriteshort (section, coffp); + fwriteshort (0, coffp); + fputc (type, coffp); + fputc (aux, coffp); +} + +static void coff_write_symbols (void) { + char filename[18]; + int i; + + /* + * The `.file' record, and the file name auxiliary record. + */ + coff_symbol (".file", 0L, 0L, -2, 0x67, 1); + memset (filename, 0, 18); + strncpy (filename, coff_infile, 18); + fwrite (filename, 18, 1, coffp); + + /* + * The section records, with their auxiliaries. + */ + memset (filename, 0, 18); /* useful zeroed buffer */ + + coff_symbol (".text", 0L, 0L, 1, 3, 1); + fwritelong (stext.len, coffp); + fwriteshort (stext.nrelocs, coffp); + fwrite (filename, 12, 1, coffp); + coff_symbol (".data", 0L, 0L, 2, 3, 1); + fwritelong (sdata.len, coffp); + fwriteshort (sdata.nrelocs, coffp); + fwrite (filename, 12, 1, coffp); + coff_symbol (".bss", 0L, 0L, 3, 3, 1); + fwritelong (bsslen, coffp); + fwrite (filename, 14, 1, coffp); + + /* + * The absolute symbol, for relative-to-absolute relocations. + */ + coff_symbol (".absolut", 0L, 0L, -1, 3, 0); + + /* + * The real symbols. + */ + saa_rewind (syms); + for (i=0; i<nsyms; i++) { + struct Symbol *sym = saa_rstruct (syms); + coff_symbol (sym->strpos == -1 ? sym->name : NULL, + sym->strpos, sym->value, sym->section, + sym->is_global ? 2 : 3, 0); + } +} + +static long coff_segbase (long segment) { + return segment; +} + +static void coff_std_filename (char *inname, char *outname, efunc error) { + strcpy(coff_infile, inname); + standard_extension (inname, outname, ".o", error); +} + +static void coff_win32_filename (char *inname, char *outname, efunc error) { + strcpy(coff_infile, inname); + standard_extension (inname, outname, ".obj", error); +} + +#endif /* defined(OF_COFF) || defined(OF_WIN32) */ + +#ifdef OF_COFF + +struct ofmt of_coff = { + "COFF (i386) object files (e.g. DJGPP for DOS)", + "coff", + coff_std_init, + coff_out, + coff_deflabel, + coff_section_names, + coff_segbase, + coff_directives, + coff_std_filename, + coff_cleanup +}; + +#endif + +#ifdef OF_WIN32 + +struct ofmt of_win32 = { + "Microsoft Win32 (i386) object files", + "win32", + coff_win32_init, + coff_out, + coff_deflabel, + coff_section_names, + coff_segbase, + coff_directives, + coff_win32_filename, + coff_cleanup +}; + +#endif diff --git a/outdbg.c b/outdbg.c new file mode 100644 index 0000000..a55d3db --- /dev/null +++ b/outdbg.c @@ -0,0 +1,138 @@ +/* outdbg.c output routines for the Netwide Assembler to produce + * a debugging trace + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_DBG + +FILE *dbgf; +efunc dbgef; + +int segcode,segdata,segbss; + +static void dbg_init(FILE *fp, efunc errfunc, ldfunc ldef) +{ + dbgf = fp; + dbgef = errfunc; + (void) ldef; + segcode = seg_alloc(); + segdata = seg_alloc(); + segbss = seg_alloc(); + fprintf(fp,"NASM Output format debug dump - code=%d,data=%d,bss=%d\n", + segcode,segdata,segbss); +} + +static void dbg_cleanup(void) +{ + fclose(dbgf); +} + +static long dbg_section_names (char *name, int pass, int *bits) +{ + /* + * We must have an initial default: let's make it 16. + */ + if (!name) + *bits = 16; + + if (!name) + return 0; + + if (!strcmp(name, ".text")) + return segcode; + else if (!strcmp(name, ".data")) + return segdata; + else if (!strcmp(name, ".bss")) + return segbss; + else + return NO_SEG; +} + +static void dbg_deflabel (char *name, long segment, long offset, + int is_global) { + fprintf(dbgf,"deflabel %s := %08lx:%08lx %s (%d)\n",name,segment,offset, + is_global ? "global" : "local", is_global); +} + +static void dbg_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + long realbytes = type & OUT_SIZMASK; + long ldata; + int id; + + type &= OUT_TYPMASK; + + fprintf(dbgf,"out to %lx, len = %ld: ",segto,realbytes); + + switch(type) { + case OUT_RESERVE: + fprintf(dbgf,"reserved.\n"); break; + case OUT_RAWDATA: + fprintf(dbgf,"raw data = "); + while (realbytes--) { + id = *(unsigned char *)data; + data = (char *)data + 1; + fprintf(dbgf,"%02x ",id); + } + fprintf(dbgf,"\n"); break; + case OUT_ADDRESS: + ldata = 0; /* placate gcc */ + if (realbytes == 1) + ldata = *((char *)data); + else if (realbytes == 2) + ldata = *((short *)data); + else if (realbytes == 4) + ldata = *((long *)data); + fprintf(dbgf,"addr %08lx (seg %08lx, wrt %08lx)\n",ldata, + segment,wrt);break; + case OUT_REL2ADR: + fprintf(dbgf,"rel2adr %04x (seg %08lx)\n",(int)*(short *)data,segment); + break; + case OUT_REL4ADR: + fprintf(dbgf,"rel4adr %08lx (seg %08lx)\n",*(long *)data,segment); + break; + default: + fprintf(dbgf,"unknown\n"); + break; + } +} + +static long dbg_segbase(long segment) { + return segment; +} + +static int dbg_directive (char *directive, char *value, int pass) { + return 0; +} + +static void dbg_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, ".dbg", error); +} + +struct ofmt of_dbg = { + "Trace of all info passed to output stage", + "dbg", + dbg_init, + dbg_out, + dbg_deflabel, + dbg_section_names, + dbg_segbase, + dbg_directive, + dbg_filename, + dbg_cleanup +}; + +#endif /* OF_DBG */ diff --git a/outelf.c b/outelf.c new file mode 100644 index 0000000..b84bae3 --- /dev/null +++ b/outelf.c @@ -0,0 +1,620 @@ +/* outelf.c output routines for the Netwide Assembler to produce + * ELF32 (i386 of course) object file format + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_ELF + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* ELF symbol info thingy */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + long section; /* section ID of the symbol */ + int type; /* TRUE or FALSE */ + long value; /* address, or COMMON variable size */ +}; + +struct Section { + struct SAA *data; + unsigned long len, size, nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nlocals, nglobs; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static FILE *elffp; +static efunc error; + +static char elf_module[FILENAME_MAX]; + +#define SHN_ABS 0xFFF1 +#define SHN_COMMON 0xFFF2 +#define SHN_UNDEF 0 + +#define SYM_SECTION 0x04 +#define SYM_GLOBAL 0x10 + +#define GLOBAL_TEMP_BASE 6 /* bigger than any constant sym id */ + +#define SEG_ALIGN 16 /* alignment of sections in file */ +#define SEG_ALIGN_1 (SEG_ALIGN-1) + +static const char align_str[SEG_ALIGN] = ""; /* ANSI will pad this with 0s */ + +#define ELF_MAX_SECTIONS 16 /* really 10, but let's play safe */ +static struct ELF_SECTDATA { + void *data; + long len; + int is_saa; +} elf_sects[ELF_MAX_SECTIONS]; +static int elf_nsect; +static long elf_foffs; + +static void elf_write(void); +static void elf_sect_write(struct Section *, unsigned char *, unsigned long); +static void elf_section_header (int, int, int, void *, int, long, + int, int, int, int); +static void elf_write_sections (void); +static struct SAA *elf_build_symtab (long *, long *); +static struct SAA *elf_build_reltab (long *, struct Reloc *); + +static void elf_init(FILE *fp, efunc errfunc, ldfunc ldef) { + elffp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = stext.size = sdata.len = sdata.size = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nlocals = nglobs = 0; + bsym = raa_init(); + + strs = saa_init(1L); + saa_wbytes (strs, "\0", 1L); + saa_wbytes (strs, elf_module, (long)(strlen(elf_module)+1)); + strslen = 2+strlen(elf_module); +} + +static void elf_cleanup(void) { + struct Reloc *r; + + elf_write(); + fclose (elffp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long elf_section_names (char *name, int pass, int *bits) { + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void elf_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + + sym = saa_wstruct (syms); + + sym->strpos = pos; + sym->type = is_global ? SYM_GLOBAL : 0; + if (segment == NO_SEG) + sym->section = SHN_ABS; + else if (segment == stext.index) + sym->section = 1; + else if (segment == sdata.index) + sym->section = 2; + else if (segment == bssindex) + sym->section = 3; + else + sym->section = SHN_UNDEF; + + if (is_global == 2) { + sym->value = offset; + sym->section = SHN_COMMON; + } else + sym->value = (sym->section == SHN_UNDEF ? 0 : offset); + + if (sym->type == SYM_GLOBAL) { + if (sym->section == SHN_UNDEF || sym->section == SHN_COMMON) + bsym = raa_write (bsym, segment, nglobs); + nglobs++; + } else + nlocals++; +} + +static void elf_add_reloc (struct Section *sect, long segment, + int relative) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? 5 : + segment == stext.index ? 2 : + segment == sdata.index ? 3 : + segment == bssindex ? 4 : + GLOBAL_TEMP_BASE + raa_read(bsym, segment)); + r->relative = relative; + + sect->nrelocs++; +} + +static void elf_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by ELF output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + elf_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + elf_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (wrt != NO_SEG) + error(ERR_NONFATAL, "ELF format does not support WRT types"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "ELF format does not support" + " segment base references"); + } else + elf_add_reloc (s, segment, FALSE); + } + p = mydata; + if (realbytes == 2 && segment != NO_SEG) + error (ERR_NONFATAL, "ELF format does not support 16-bit" + " relocations"); + WRITELONG (p, *(long *)data); + elf_sect_write (s, mydata, realbytes); + } else if (type == OUT_REL2ADR) { + error (ERR_NONFATAL, "ELF format does not support 16-bit" + " relocations"); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "ELF format does not support" + " segment base references"); + } else + elf_add_reloc (s, segment, TRUE); + p = mydata; + WRITELONG (p, *(long*)data - realbytes); + elf_sect_write (s, mydata, 4L); + } +} + +static void elf_write(void) { + int nsections, align; + char shstrtab[80], *p; + int shstrtablen, commlen; + char comment[64]; + + struct SAA *symtab, *reltext, *reldata; + long symtablen, symtablocal, reltextlen, reldatalen; + + /* + * Work out how many sections we will have. + * + * Fixed sections are: + * SHN_UNDEF .text .data .bss .comment .shstrtab .symtab .strtab + * + * Optional sections are: + * .rel.text .rel.data + * + * (.rel.bss makes very little sense;-) + */ + nsections = 8; + *shstrtab = '\0'; + shstrtablen = 1; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".text"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".data"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".bss"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".comment"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".shstrtab"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".symtab"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".strtab"); + if (stext.head) { + nsections++; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.text"); + } + if (sdata.head) { + nsections++; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.data"); + } + + /* + * Do the comment. + */ + *comment = '\0'; + commlen = 2+sprintf(comment+1, "The Netwide Assembler %s", NASM_VER); + + /* + * Output the ELF header. + */ + fwrite ("\177ELF\1\1\1\0\0\0\0\0\0\0\0\0", 16, 1, elffp); + fwriteshort (1, elffp); /* ET_REL relocatable file */ + fwriteshort (3, elffp); /* EM_386 processor ID */ + fwritelong (1L, elffp); /* EV_CURRENT file format version */ + fwritelong (0L, elffp); /* no entry point */ + fwritelong (0L, elffp); /* no program header table */ + fwritelong (0x40L, elffp); /* section headers straight after + * ELF header plus alignment */ + fwritelong (0L, elffp); /* 386 defines no special flags */ + fwriteshort (0x34, elffp); /* size of ELF header */ + fwriteshort (0, elffp); /* no program header table, again */ + fwriteshort (0, elffp); /* still no program header table */ + fwriteshort (0x28, elffp); /* size of section header */ + fwriteshort (nsections, elffp); /* number of sections */ + fwriteshort (5, elffp); /* string table section index for + * section header table */ + fwritelong (0L, elffp); /* align to 0x40 bytes */ + fwritelong (0L, elffp); + fwritelong (0L, elffp); + + /* + * Build the symbol table and relocation tables. + */ + symtab = elf_build_symtab (&symtablen, &symtablocal); + reltext = elf_build_reltab (&reltextlen, stext.head); + reldata = elf_build_reltab (&reldatalen, sdata.head); + + /* + * Now output the section header table. + */ + + elf_foffs = 0x40 + 0x28 * nsections; + align = ((elf_foffs+SEG_ALIGN_1) & ~SEG_ALIGN_1) - elf_foffs; + elf_foffs += align; + elf_nsect = 0; + + elf_section_header (0, 0, 0, NULL, FALSE, 0L, 0, 0, 0, 0); /* SHN_UNDEF */ + p = shstrtab+1; + elf_section_header (p - shstrtab, 1, 6, stext.data, TRUE, + stext.len, 0, 0, 16, 0); /* .text */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 1, 3, sdata.data, TRUE, + sdata.len, 0, 0, 4, 0); /* .data */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 8, 3, NULL, TRUE, + bsslen, 0, 0, 4, 0); /* .bss */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 1, 0, comment, FALSE, + (long)commlen, 0, 0, 1, 0);/* .comment */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 3, 0, shstrtab, FALSE, + (long)shstrtablen, 0, 0, 1, 0);/* .shstrtab */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 2, 0, symtab, TRUE, + symtablen, 7, symtablocal, 4, 16);/* .symtab */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 3, 0, strs, TRUE, + strslen, 0, 0, 1, 0); /* .strtab */ + if (reltext) { + p += strlen(p)+1; + elf_section_header (p - shstrtab, 9, 0, reltext, TRUE, + reltextlen, 6, 1, 4, 8); /* .rel.text */ + } + if (reldata) { + p += strlen(p)+1; + elf_section_header (p - shstrtab, 9, 0, reldata, TRUE, + reldatalen, 6, 2, 4, 8); /* .rel.data */ + } + + fwrite (align_str, align, 1, elffp); + + /* + * Now output the sections. + */ + elf_write_sections(); + + saa_free (symtab); + if (reltext) + saa_free (reltext); + if (reldata) + saa_free (reldata); +} + +static struct SAA *elf_build_symtab (long *len, long *local) { + struct SAA *s = saa_init(1L); + struct Symbol *sym; + unsigned char entry[16], *p; + int i; + + *len = *local = 0; + + /* + * First, an all-zeros entry, required by the ELF spec. + */ + saa_wbytes (s, NULL, 16L); /* null symbol table entry */ + *len += 16; + (*local)++; + + /* + * Next, an entry for the file name. + */ + p = entry; + WRITELONG (p, 1); /* we know it's 1st thing in strtab */ + WRITELONG (p, 0); /* no value */ + WRITELONG (p, 0); /* no size either */ + WRITESHORT (p, 4); /* type FILE */ + WRITESHORT (p, SHN_ABS); + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + + /* + * Now four standard symbols defining segments, for relocation + * purposes. + */ + for (i = 1; i <= 4; i++) { + p = entry; + WRITELONG (p, 0); /* no symbol name */ + WRITELONG (p, 0); /* offset zero */ + WRITELONG (p, 0); /* size zero */ + WRITESHORT (p, 3); /* local section-type thing */ + WRITESHORT (p, (i==4 ? SHN_ABS : i)); /* the section id */ + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + } + + /* + * Now the other local symbols. + */ + saa_rewind (syms); + while ( (sym = saa_rstruct (syms)) ) { + if (sym->type == SYM_GLOBAL) + continue; + p = entry; + WRITELONG (p, sym->strpos); + WRITELONG (p, sym->value); + if (sym->section == SHN_COMMON) + WRITELONG (p, sym->value); + else + WRITELONG (p, 0); + WRITESHORT (p, 0); /* local non-typed thing */ + WRITESHORT (p, sym->section); + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + } + + /* + * Now the global symbols. + */ + saa_rewind (syms); + while ( (sym = saa_rstruct (syms)) ) { + if (sym->type != SYM_GLOBAL) + continue; + p = entry; + WRITELONG (p, sym->strpos); + WRITELONG (p, sym->value); + if (sym->section == SHN_COMMON) + WRITELONG (p, sym->value); + else + WRITELONG (p, 0); + WRITESHORT (p, SYM_GLOBAL); /* global non-typed thing */ + WRITESHORT (p, sym->section); + saa_wbytes (s, entry, 16L); + *len += 16; + } + + return s; +} + +static struct SAA *elf_build_reltab (long *len, struct Reloc *r) { + struct SAA *s; + unsigned char *p, entry[8]; + + if (!r) + return NULL; + + s = saa_init(1L); + *len = 0; + + while (r) { + long sym = r->symbol; + + if (sym >= GLOBAL_TEMP_BASE) + sym += -GLOBAL_TEMP_BASE + 6 + nlocals; + + p = entry; + WRITELONG (p, r->address); + WRITELONG (p, (sym << 8) + (r->relative ? 2 : 1)); + saa_wbytes (s, entry, 8L); + *len += 8; + + r = r->next; + } + + return s; +} + +static void elf_section_header (int name, int type, int flags, + void *data, int is_saa, long datalen, + int link, int info, int align, int eltsize) { + elf_sects[elf_nsect].data = data; + elf_sects[elf_nsect].len = datalen; + elf_sects[elf_nsect].is_saa = is_saa; + elf_nsect++; + + fwritelong ((long)name, elffp); + fwritelong ((long)type, elffp); + fwritelong ((long)flags, elffp); + fwritelong (0L, elffp); /* no address, ever, in object files */ + fwritelong (type == 0 ? 0L : elf_foffs, elffp); + fwritelong (datalen, elffp); + if (data) + elf_foffs += (datalen+SEG_ALIGN_1) & ~SEG_ALIGN_1; + fwritelong ((long)link, elffp); + fwritelong ((long)info, elffp); + fwritelong ((long)align, elffp); + fwritelong ((long)eltsize, elffp); +} + +static void elf_write_sections (void) { + int i; + for (i = 0; i < elf_nsect; i++) + if (elf_sects[i].data) { + long len = elf_sects[i].len; + long reallen = (len+SEG_ALIGN_1) & ~SEG_ALIGN_1; + long align = reallen - len; + if (elf_sects[i].is_saa) + saa_fpwrite (elf_sects[i].data, elffp); + else + fwrite (elf_sects[i].data, len, 1, elffp); + fwrite (align_str, align, 1, elffp); + } +} + +static void elf_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static long elf_segbase (long segment) { + return segment; +} + +static int elf_directive (char *directive, char *value, int pass) { + return 0; +} + +static void elf_filename (char *inname, char *outname, efunc error) { + strcpy(elf_module, inname); + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_elf = { + "ELF32 (i386) object files (e.g. Linux)", + "elf", + elf_init, + elf_out, + elf_deflabel, + elf_section_names, + elf_segbase, + elf_directive, + elf_filename, + elf_cleanup +}; + +#endif /* OF_ELF */ diff --git a/outform.c b/outform.c new file mode 100644 index 0000000..154c63f --- /dev/null +++ b/outform.c @@ -0,0 +1,42 @@ +/* outform.c manages a list of output formats, and associates + * them with their relevant drivers. Also has a + * routine to find the correct driver given a name + * for it + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <string.h> +#include "outform.h" + +static struct ofmt *drivers[MAX_OUTPUT_FORMATS]; +static int ndrivers = 0; + +struct ofmt *ofmt_find(char *name) /* find driver */ +{ + int i; + + for (i=0; i<ndrivers; i++) + if (!strcmp(name,drivers[i]->shortname)) + return drivers[i]; + + return NULL; +} + +void ofmt_list(struct ofmt *deffmt) +{ + int i; + for (i=0; i<ndrivers; i++) + fprintf(stderr," %c %-7s%s\n", + drivers[i] == deffmt ? '*' : ' ', + drivers[i]->shortname, + drivers[i]->fullname); +} + +void ofmt_register (struct ofmt *info) { + drivers[ndrivers++] = info; +} diff --git a/outform.h b/outform.h new file mode 100644 index 0000000..48b8276 --- /dev/null +++ b/outform.h @@ -0,0 +1,167 @@ +/* outform.h header file for binding output format drivers to the + * remainder of the code in the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* + * This header file allows configuration of which output formats + * get compiled into the NASM binary. You can configure by defining + * various preprocessor symbols beginning with "OF_", either on the + * compiler command line or at the top of this file. + * + * OF_ONLY -- only include specified object formats + * OF_name -- ensure that output format 'name' is included + * OF_NO_name -- remove output format 'name' + * OF_DOS -- ensure that 'obj', 'bin' & 'win32' are included. + * OF_UNIX -- ensure that 'aout', 'coff' and 'elf' are in. + * OF_OTHERS -- ensure that 'bin', 'as86' & 'rdf' are in. + * OF_ALL -- ensure that all formats are included. + * + * OF_DEFAULT=of_name -- ensure that 'name' is the default format. + * + * eg: -DOF_UNIX -DOF_ELF -DOF_DEFAULT=of_elf would be a suitable config + * for an average linux system. + * + * Default config = -DOF_ALL -DOF_DEFAULT=of_bin + * + * You probably only want to set these options while compiling 'nasm.c'. */ + +#ifndef NASM_OUTFORM_H +#define NASM_OUTFORM_H + +#include "nasm.h" + +#define MAX_OUTPUT_FORMATS 16 + +struct ofmt *ofmt_find(char *name); +void ofmt_list(struct ofmt *deffmt); +void ofmt_register (struct ofmt *); + +/* -------------- USER MODIFIABLE PART ---------------- */ + +/* + * Insert #defines here in accordance with the configuration + * instructions above. + * + * E.g. + * + * #define OF_ONLY + * #define OF_OBJ + * #define OF_BIN + * + * for a 16-bit DOS assembler with no extraneous formats. + */ + +/* ------------ END USER MODIFIABLE PART -------------- */ + +/* ====configurable info begins here==== */ +/* formats configurable: + * bin,obj,elf,aout,coff,win32,as86,rdf */ + +/* process options... */ + +#ifndef OF_ONLY +#ifndef OF_ALL +#define OF_ALL /* default is to have all formats */ +#endif +#endif + +#ifdef OF_ALL /* set all formats on... */ +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_OBJ +#define OF_OBJ +#endif +#ifndef OF_ELF +#define OF_ELF +#endif +#ifndef OF_COFF +#define OF_COFF +#endif +#ifndef OF_AOUT +#define OF_AOUT +#endif +#ifndef OF_WIN32 +#define OF_WIN32 +#endif +#ifndef OF_AS86 +#define OF_AS86 +#endif +#ifndef OF_RDF +#define OF_RDF +#endif +#endif /* OF_ALL */ + +/* turn on groups of formats specified.... */ +#ifdef OF_DOS +#ifndef OF_OBJ +#define OF_OBJ +#endif +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_WIN32 +#define OF_WIN32 +#endif +#endif + +#ifdef OF_UNIX +#ifndef OF_AOUT +#define OF_AOUT +#endif +#ifndef OF_COFF +#define OF_COFF +#endif +#ifndef OF_ELF +#define OF_ELF +#endif +#endif + +#ifdef OF_OTHERS +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_AS86 +#define OF_AS86 +#endif +#ifndef OF_RDF +#define OF_RDF +#endif +#endif + +/* finally... override any format specifically specifed to be off */ +#ifdef OF_NO_BIN +#undef OF_BIN +#endif +#ifdef OF_NO_OBJ +#undef OF_OBJ +#endif +#ifdef OF_NO_ELF +#undef OF_ELF +#endif +#ifdef OF_NO_AOUT +#undef OF_AOUT +#endif +#ifdef OF_NO_COFF +#undef OF_COFF +#endif +#ifdef OF_NO_WIN32 +#undef OF_WIN32 +#endif +#ifdef OF_NO_AS86 +#undef OF_AS86 +#endif +#ifdef OF_NO_RDF +#undef OF_RDF +#endif + +#ifndef OF_DEFAULT +#define OF_DEFAULT of_bin +#endif + +#endif /* NASM_OUTFORM_H */ diff --git a/outobj.c b/outobj.c new file mode 100644 index 0000000..b33b72d --- /dev/null +++ b/outobj.c @@ -0,0 +1,1229 @@ +/* outobj.c output routines for the Netwide Assembler to produce + * Microsoft 16-bit .OBJ object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_OBJ + +static char obj_infile[FILENAME_MAX]; +static int obj_uppercase; + +static efunc error; +static ldfunc deflabel; +static FILE *ofp; +static long first_seg; +static int any_segs; + +#define LEDATA_MAX 1024 /* maximum size of LEDATA record */ +#define RECORD_MAX 1024 /* maximum size of _any_ record */ +#define GROUP_MAX 256 /* we won't _realistically_ have more + * than this many segs in a group */ +#define EXT_BLKSIZ 256 /* block size for externals list */ + +static unsigned char record[RECORD_MAX], *recptr; + +static struct Public { + struct Public *next; + char *name; + long offset; + long segment; /* only if it's far-absolute */ +} *fpubhead, **fpubtail; + +static struct External { + struct External *next; + char *name; + long commonsize; +} *exthead, **exttail; + +static int externals; + +static struct ExtBack { + struct ExtBack *next; + int index[EXT_BLKSIZ]; +} *ebhead, **ebtail; + +static struct Segment { + struct Segment *next; + long index; /* the NASM segment id */ + long obj_index; /* the OBJ-file segment index */ + struct Group *grp; /* the group it belongs to */ + long currentpos; + long align; /* can be SEG_ABS + absolute addr */ + enum { + CMB_PRIVATE = 0, + CMB_PUBLIC = 2, + CMB_STACK = 5, + CMB_COMMON = 6 + } combine; + long use32; /* is this segment 32-bit? */ + struct Public *pubhead, **pubtail; + char *name; + char *segclass, *overlay; /* `class' is a C++ keyword :-) */ +} *seghead, **segtail, *obj_seg_needs_update; + +static struct Group { + struct Group *next; + char *name; + long index; /* NASM segment id */ + long obj_index; /* OBJ-file group index */ + long nentries; /* number of elements... */ + long nindices; /* ...and number of index elts... */ + union { + long index; + char *name; + } segs[GROUP_MAX]; /* ...in this */ +} *grphead, **grptail, *obj_grp_needs_update; + +static struct ObjData { + struct ObjData *next; + int nonempty; + struct Segment *seg; + long startpos; + int letype, ftype; + unsigned char ledata[LEDATA_MAX], *lptr; + unsigned char fixupp[RECORD_MAX], *fptr; +} *datahead, *datacurr, **datatail; + +static long obj_entry_seg, obj_entry_ofs; + +enum RecordID { /* record ID codes */ + + THEADR = 0x80, /* module header */ + COMENT = 0x88, /* comment record */ + + LNAMES = 0x96, /* list of names */ + + SEGDEF = 0x98, /* segment definition */ + GRPDEF = 0x9A, /* group definition */ + EXTDEF = 0x8C, /* external definition */ + PUBDEF = 0x90, /* public definition */ + COMDEF = 0xB0, /* common definition */ + + LEDATA = 0xA0, /* logical enumerated data */ + FIXUPP = 0x9C, /* fixups (relocations) */ + + MODEND = 0x8A /* module end */ +}; + +extern struct ofmt of_obj; + +static long obj_ledata_space(struct Segment *); +static int obj_fixup_free(struct Segment *); +static void obj_ledata_new(struct Segment *); +static void obj_ledata_commit(void); +static void obj_write_fixup (struct ObjData *, int, int, long, long, long); +static long obj_segment (char *, int, int *); +static void obj_write_file(void); +static unsigned char *obj_write_data(unsigned char *, unsigned char *, int); +static unsigned char *obj_write_byte(unsigned char *, int); +static unsigned char *obj_write_word(unsigned char *, int); +static unsigned char *obj_write_dword(unsigned char *, long); +static unsigned char *obj_write_rword(unsigned char *, int); +static unsigned char *obj_write_name(unsigned char *, char *); +static unsigned char *obj_write_index(unsigned char *, int); +static unsigned char *obj_write_value(unsigned char *, unsigned long); +static void obj_record(int, unsigned char *, unsigned char *); + +static void obj_init (FILE *fp, efunc errfunc, ldfunc ldef) { + ofp = fp; + error = errfunc; + deflabel = ldef; + first_seg = seg_alloc(); + any_segs = FALSE; + fpubhead = NULL; + fpubtail = &fpubhead; + exthead = NULL; + exttail = &exthead; + externals = 0; + ebhead = NULL; + ebtail = &ebhead; + seghead = obj_seg_needs_update = NULL; + segtail = &seghead; + grphead = obj_grp_needs_update = NULL; + grptail = &grphead; + datahead = datacurr = NULL; + datatail = &datahead; + obj_entry_seg = NO_SEG; + obj_uppercase = FALSE; +} + +static void obj_cleanup (void) { + obj_write_file(); + fclose (ofp); + while (seghead) { + struct Segment *segtmp = seghead; + seghead = seghead->next; + while (segtmp->pubhead) { + struct Public *pubtmp = segtmp->pubhead; + segtmp->pubhead = pubtmp->next; + nasm_free (pubtmp); + } + nasm_free (segtmp); + } + while (fpubhead) { + struct Public *pubtmp = fpubhead; + fpubhead = fpubhead->next; + nasm_free (pubtmp); + } + while (exthead) { + struct External *exttmp = exthead; + exthead = exthead->next; + nasm_free (exttmp); + } + while (ebhead) { + struct ExtBack *ebtmp = ebhead; + ebhead = ebhead->next; + nasm_free (ebtmp); + } + while (grphead) { + struct Group *grptmp = grphead; + grphead = grphead->next; + nasm_free (grptmp); + } + while (datahead) { + struct ObjData *datatmp = datahead; + datahead = datahead->next; + nasm_free (datatmp); + } +} + +static void obj_deflabel (char *name, long segment, + long offset, int is_global) { + /* + * We have three cases: + * + * (i) `segment' is a segment-base. If so, set the name field + * for the segment or group structure it refers to, and then + * return. + * + * (ii) `segment' is one of our segments, or a SEG_ABS segment. + * Save the label position for later output of a PUBDEF record. + * (Or a MODPUB, if we work out how.) + * + * (iii) `segment' is not one of our segments. Save the label + * position for later output of an EXTDEF, and also store a + * back-reference so that we can map later references to this + * segment number to the external index. + */ + struct External *ext; + struct ExtBack *eb; + struct Segment *seg; + int i; + + /* + * First check for the double-period, signifying something + * unusual. + */ + if (name[0] == '.' && name[1] == '.') { + if (!strcmp(name, "..start")) { + obj_entry_seg = segment; + obj_entry_ofs = offset; + } + return; + } + + /* + * Case (i): + */ + if (obj_seg_needs_update) { + obj_seg_needs_update->name = name; + return; + } else if (obj_grp_needs_update) { + obj_grp_needs_update->name = name; + return; + } + if (segment < SEG_ABS && segment != NO_SEG && segment % 2) + return; + + if (segment >= SEG_ABS) { + /* + * SEG_ABS subcase of (ii). + */ + if (is_global) { + struct Public *pub; + + pub = *fpubtail = nasm_malloc(sizeof(*pub)); + fpubtail = &pub->next; + pub->next = NULL; + pub->name = name; + pub->offset = offset; + pub->segment = segment & ~SEG_ABS; + } + return; + } + + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segment) { + /* + * Case (ii). Maybe MODPUB someday? + */ + if (is_global) { + struct Public *pub; + + pub = *seg->pubtail = nasm_malloc(sizeof(*pub)); + seg->pubtail = &pub->next; + pub->next = NULL; + pub->name = name; + pub->offset = offset; + } + return; + } + + /* + * Case (iii). + */ + ext = *exttail = nasm_malloc(sizeof(*ext)); + ext->next = NULL; + exttail = &ext->next; + ext->name = name; + if (is_global == 2) + ext->commonsize = offset; + else + ext->commonsize = 0; + + i = segment/2; + eb = ebhead; + if (!eb) { + eb = *ebtail = nasm_malloc(sizeof(*eb)); + eb->next = NULL; + ebtail = &eb->next; + } + while (i > EXT_BLKSIZ) { + if (eb && eb->next) + eb = eb->next; + else { + eb = *ebtail = nasm_malloc(sizeof(*eb)); + eb->next = NULL; + ebtail = &eb->next; + } + i -= EXT_BLKSIZ; + } + eb->index[i] = ++externals; +} + +static void obj_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + long size, realtype; + unsigned char *ucdata; + long ldata; + struct Segment *seg; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + /* + * If `any_segs' is still FALSE, we must define a default + * segment. + */ + if (!any_segs) { + int tempint; /* ignored */ + if (segto != obj_segment("__NASMDEFSEG", 2, &tempint)) + error (ERR_PANIC, "strange segment conditions in OBJ driver"); + } + + /* + * Find the segment we are targetting. + */ + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segto) + break; + if (!seg) + error (ERR_PANIC, "code directed to nonexistent segment?"); + + size = type & OUT_SIZMASK; + realtype = type & OUT_TYPMASK; + if (realtype == OUT_RAWDATA) { + ucdata = data; + while (size > 0) { + long len = obj_ledata_space(seg); + if (len == 0) { + obj_ledata_new(seg); + len = obj_ledata_space(seg); + } + if (len > size) + len = size; + datacurr->lptr = obj_write_data (datacurr->lptr, ucdata, len); + datacurr->nonempty = TRUE; + ucdata += len; + size -= len; + seg->currentpos += len; + } + } else if (realtype == OUT_ADDRESS || realtype == OUT_REL2ADR || + realtype == OUT_REL4ADR) { + if (segment == NO_SEG && realtype != OUT_ADDRESS) + error(ERR_NONFATAL, "relative call to absolute address not" + " supported by OBJ format"); + if (segment >= SEG_ABS) + error(ERR_NONFATAL, "far-absolute relocations not supported" + " by OBJ format"); + ldata = *(long *)data; + if (realtype == OUT_REL2ADR) + ldata += (size-2); + if (realtype == OUT_REL4ADR) + ldata += (size-4); + if (obj_ledata_space(seg) < 4 || !obj_fixup_free(seg)) + obj_ledata_new(seg); + if (size == 2) + datacurr->lptr = obj_write_word (datacurr->lptr, ldata); + else + datacurr->lptr = obj_write_dword (datacurr->lptr, ldata); + datacurr->nonempty = TRUE; + if (segment != NO_SEG) + obj_write_fixup (datacurr, size, + (realtype == OUT_REL2ADR ? 0 : 0x4000), + segment, wrt, + (seg->currentpos - datacurr->startpos)); + seg->currentpos += size; + } else if (realtype == OUT_RESERVE) { + obj_ledata_commit(); + seg->currentpos += size; + } +} + +static long obj_ledata_space(struct Segment *segto) { + if (datacurr && datacurr->seg == segto) + return datacurr->ledata + LEDATA_MAX - datacurr->lptr; + else + return 0; +} + +static int obj_fixup_free(struct Segment *segto) { + if (datacurr && datacurr->seg == segto) + return (datacurr->fixupp + RECORD_MAX - datacurr->fptr) > 8; + else + return 0; +} + +static void obj_ledata_new(struct Segment *segto) { + datacurr = *datatail = nasm_malloc(sizeof(*datacurr)); + datacurr->next = NULL; + datatail = &datacurr->next; + datacurr->nonempty = FALSE; + datacurr->lptr = datacurr->ledata; + datacurr->fptr = datacurr->fixupp; + datacurr->seg = segto; + if (segto->use32) + datacurr->letype = LEDATA+1; + else + datacurr->letype = LEDATA; + datacurr->startpos = segto->currentpos; + datacurr->ftype = FIXUPP; + + datacurr->lptr = obj_write_index (datacurr->lptr, segto->obj_index); + if (datacurr->letype == LEDATA) + datacurr->lptr = obj_write_word (datacurr->lptr, segto->currentpos); + else + datacurr->lptr = obj_write_dword (datacurr->lptr, segto->currentpos); +} + +static void obj_ledata_commit(void) { + datacurr = NULL; +} + +static void obj_write_fixup (struct ObjData *data, int bytes, + int segrel, long seg, long wrt, + long offset) { + int locat, method; + int base; + long tidx, fidx; + struct Segment *s = NULL; + struct Group *g = NULL; + + locat = 0x8000 | segrel | offset; + if (seg % 2) { + base = TRUE; + locat |= 0x800; + seg--; + if (bytes != 2) + error(ERR_NONFATAL, "OBJ format can only handle 2-byte" + " segment base references"); + } else { + base = FALSE; + if (bytes == 2) + locat |= 0x400; + else { + locat |= 0x2400; + data->ftype = FIXUPP+1; /* need new-style FIXUPP record */ + } + } + data->fptr = obj_write_rword (data->fptr, locat); + + tidx = fidx = -1, method = 0; /* placate optimisers */ + + /* + * See if we can find the segment ID in our segment list. If + * so, we have a T4 (LSEG) target. + */ + for (s = seghead; s; s = s->next) + if (s->index == seg) + break; + if (s) + method = 4, tidx = s->obj_index; + else { + for (g = grphead; g; g = g->next) + if (g->index == seg) + break; + if (g) + method = 5, tidx = g->obj_index; + else { + long i = seg/2; + struct ExtBack *eb = ebhead; + while (i > EXT_BLKSIZ) { + if (eb) + eb = eb->next; + else + break; + i -= EXT_BLKSIZ; + } + if (eb) + method = 6, tidx = eb->index[i]; + else + error(ERR_PANIC, + "unrecognised segment value in obj_write_fixup"); + } + } + + /* + * If no WRT given, assume the natural default, which is method + * F5 unless we are doing an OFFSET fixup for a grouped + * segment, in which case we require F1 (group). + */ + if (wrt == NO_SEG) { + if (!base && s && s->grp) + method |= 0x10, fidx = s->grp->obj_index; + else + method |= 0x50, fidx = -1; + } else { + /* + * See if we can find the WRT-segment ID in our segment + * list. If so, we have a F0 (LSEG) frame. + */ + for (s = seghead; s; s = s->next) + if (s->index == wrt-1) + break; + if (s) + method |= 0x00, fidx = s->obj_index; + else { + for (g = grphead; g; g = g->next) + if (g->index == wrt-1) + break; + if (g) + method |= 0x10, fidx = g->obj_index; + else { + long i = wrt/2; + struct ExtBack *eb = ebhead; + while (i > EXT_BLKSIZ) { + if (eb) + eb = eb->next; + else + break; + i -= EXT_BLKSIZ; + } + if (eb) + method |= 0x20, fidx = eb->index[i]; + else + error(ERR_PANIC, + "unrecognised WRT value in obj_write_fixup"); + } + } + } + + data->fptr = obj_write_byte (data->fptr, method); + if (fidx != -1) + data->fptr = obj_write_index (data->fptr, fidx); + data->fptr = obj_write_index (data->fptr, tidx); +} + +static long obj_segment (char *name, int pass, int *bits) { + /* + * We call the label manager here to define a name for the new + * segment, and when our _own_ label-definition stub gets + * called in return, it should register the new segment name + * using the pointer it gets passed. That way we save memory, + * by sponging off the label manager. + */ + if (!name) { + *bits = 16; + return first_seg; + } else { + struct Segment *seg; + struct Group *grp; + int obj_idx, i, attrs, rn_error; + char *p; + + /* + * Look for segment attributes. + */ + attrs = 0; + p = name; + while (*p && !isspace(*p)) + p++; + if (*p) { + *p++ = '\0'; + while (*p && isspace(*p)) + *p++ = '\0'; + } + while (*p) { + while (*p && !isspace(*p)) + p++; + if (*p) { + *p++ = '\0'; + while (*p && isspace(*p)) + *p++ = '\0'; + } + + attrs++; + } + + obj_idx = 1; + for (seg = seghead; seg; seg = seg->next) { + obj_idx++; + if (!strcmp(seg->name, name)) { + if (attrs > 0 && pass == 1) + error(ERR_WARNING, "segment attributes specified on" + " redeclaration of segment: ignoring"); + if (seg->use32) + *bits = 32; + else + *bits = 16; + return seg->index; + } + } + + *segtail = seg = nasm_malloc(sizeof(*seg)); + seg->next = NULL; + segtail = &seg->next; + seg->index = (any_segs ? seg_alloc() : first_seg); + seg->obj_index = obj_idx; + seg->grp = NULL; + any_segs = TRUE; + seg->name = NULL; + seg->currentpos = 0; + seg->align = 1; /* default */ + seg->use32 = FALSE; /* default */ + seg->combine = CMB_PUBLIC; /* default */ + seg->segclass = seg->overlay = NULL; + seg->pubhead = NULL; + seg->pubtail = &seg->pubhead; + + /* + * Process the segment attributes. + */ + p = name; + while (attrs--) { + p += strlen(p); + while (!*p) p++; + + /* + * `p' contains a segment attribute. + */ + if (!nasm_stricmp(p, "private")) + seg->combine = CMB_PRIVATE; + else if (!nasm_stricmp(p, "public")) + seg->combine = CMB_PUBLIC; + else if (!nasm_stricmp(p, "common")) + seg->combine = CMB_COMMON; + else if (!nasm_stricmp(p, "stack")) + seg->combine = CMB_STACK; + else if (!nasm_stricmp(p, "use16")) + seg->use32 = FALSE; + else if (!nasm_stricmp(p, "use32")) + seg->use32 = TRUE; + else if (!nasm_strnicmp(p, "class=", 6)) + seg->segclass = nasm_strdup(p+6); + else if (!nasm_strnicmp(p, "overlay=", 8)) + seg->overlay = nasm_strdup(p+8); + else if (!nasm_strnicmp(p, "align=", 6)) { + seg->align = readnum(p+6, &rn_error); + if (rn_error) { + seg->align = 1; + error (ERR_NONFATAL, "segment alignment should be" + " numeric"); + } + switch ((int) seg->align) { + case 1: /* BYTE */ + case 2: /* WORD */ + case 4: /* DWORD */ + case 16: /* PARA */ + case 256: /* PAGE */ + break; + case 8: + error(ERR_WARNING, "OBJ format does not support alignment" + " of 8: rounding up to 16"); + seg->align = 16; + break; + case 32: + case 64: + case 128: + error(ERR_WARNING, "OBJ format does not support alignment" + " of %d: rounding up to 256", seg->align); + seg->align = 256; + break; + default: + error(ERR_NONFATAL, "invalid alignment value %d", + seg->align); + seg->align = 1; + break; + } + } else if (!nasm_strnicmp(p, "absolute=", 9)) { + seg->align = SEG_ABS + readnum(p+9, &rn_error); + if (rn_error) + error (ERR_NONFATAL, "argument to `absolute' segment" + " attribute should be numeric"); + } + } + + obj_seg_needs_update = seg; + if (seg->align >= SEG_ABS) + deflabel (name, NO_SEG, seg->align - SEG_ABS, &of_obj, error); + else + deflabel (name, seg->index+1, 0L, &of_obj, error); + obj_seg_needs_update = NULL; + + /* + * See if this segment is defined in any groups. + */ + for (grp = grphead; grp; grp = grp->next) { + for (i = grp->nindices; i < grp->nentries; i++) { + if (!strcmp(grp->segs[i].name, seg->name)) { + nasm_free (grp->segs[i].name); + grp->segs[i] = grp->segs[grp->nindices]; + grp->segs[grp->nindices++].index = seg->obj_index; + if (seg->grp) + error(ERR_WARNING, "segment `%s' is already part of" + " a group: first one takes precedence", + seg->name); + else + seg->grp = grp; + } + } + } + + if (seg->use32) + *bits = 32; + else + *bits = 16; + return seg->index; + } +} + +static int obj_directive (char *directive, char *value, int pass) { + if (!strcmp(directive, "group")) { + char *p, *q; + if (pass == 1) { + struct Group *grp; + struct Segment *seg; + int obj_idx; + + q = value; + while (*q && !isspace(*q)) + q++; + if (isspace(*q)) { + *q++ = '\0'; + while (*q && isspace(*q)) + q++; + } + if (!*q) { + error(ERR_NONFATAL, "GROUP directive contains no segments"); + return 1; + } + + obj_idx = 1; + for (grp = grphead; grp; grp = grp->next) { + obj_idx++; + if (!strcmp(grp->name, value)) { + error(ERR_NONFATAL, "group `%s' defined twice", value); + return 1; + } + } + + *grptail = grp = nasm_malloc(sizeof(*grp)); + grp->next = NULL; + grptail = &grp->next; + grp->index = seg_alloc(); + grp->obj_index = obj_idx; + grp->nindices = grp->nentries = 0; + grp->name = NULL; + + obj_grp_needs_update = grp; + deflabel (value, grp->index+1, 0L, &of_obj, error); + obj_grp_needs_update = NULL; + + while (*q) { + p = q; + while (*q && !isspace(*q)) + q++; + if (isspace(*q)) { + *q++ = '\0'; + while (*q && isspace(*q)) + q++; + } + /* + * Now p contains a segment name. Find it. + */ + for (seg = seghead; seg; seg = seg->next) + if (!strcmp(seg->name, p)) + break; + if (seg) { + /* + * We have a segment index. Shift a name entry + * to the end of the array to make room. + */ + grp->segs[grp->nentries++] = grp->segs[grp->nindices]; + grp->segs[grp->nindices++].index = seg->obj_index; + if (seg->grp) + error(ERR_WARNING, "segment `%s' is already part of" + " a group: first one takes precedence", + seg->name); + else + seg->grp = grp; + } else { + /* + * We have an as-yet undefined segment. + * Remember its name, for later. + */ + grp->segs[grp->nentries++].name = nasm_strdup(p); + } + } + } + return 1; + } + if (!strcmp(directive, "uppercase")) { + obj_uppercase = TRUE; + return 1; + } + return 0; +} + +static long obj_segbase (long segment) { + struct Segment *seg; + + /* + * Find the segment in our list. + */ + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segment-1) + break; + + if (!seg) + return segment; /* not one of ours - leave it alone */ + + if (seg->align >= SEG_ABS) + return seg->align; /* absolute segment */ + if (seg->grp) + return seg->grp->index+1; /* grouped segment */ + + return segment; /* no special treatment */ +} + +static void obj_filename (char *inname, char *outname, efunc error) { + strcpy(obj_infile, inname); + standard_extension (inname, outname, ".obj", error); +} + +static void obj_write_file (void) { + struct Segment *seg; + struct Group *grp; + struct Public *pub; + struct External *ext; + struct ObjData *data; + static unsigned char boast[] = "The Netwide Assembler " NASM_VER; + int lname_idx, rectype; + + /* + * Write the THEADR module header. + */ + recptr = record; + recptr = obj_write_name (recptr, obj_infile); + obj_record (THEADR, record, recptr); + + /* + * Write the NASM boast comment. + */ + recptr = record; + recptr = obj_write_rword (recptr, 0); /* comment type zero */ + recptr = obj_write_data (recptr, boast, sizeof(boast)-1); + obj_record (COMENT, record, recptr); + + /* + * Write the first LNAMES record, containing LNAME one, which + * is null. Also initialise the LNAME counter. + */ + recptr = record; + recptr = obj_write_name (recptr, ""); + obj_record (LNAMES, record, recptr); + lname_idx = 2; + + /* + * Write the SEGDEF records. Each has an associated LNAMES + * record. + */ + for (seg = seghead; seg; seg = seg->next) { + int new_segdef; /* do we use the newer record type? */ + int acbp; + int sn, cn, on; /* seg, class, overlay LNAME idx */ + + if (seg->use32 || seg->currentpos >= 0x10000) + new_segdef = TRUE; + else + new_segdef = FALSE; + + recptr = record; + recptr = obj_write_name (recptr, seg->name); + sn = lname_idx++; + if (seg->segclass) { + recptr = obj_write_name (recptr, seg->segclass); + cn = lname_idx++; + } else + cn = 1; + if (seg->overlay) { + recptr = obj_write_name (recptr, seg->overlay); + on = lname_idx++; + } else + on = 1; + obj_record (LNAMES, record, recptr); + + acbp = (seg->combine << 2); /* C field */ + + if (seg->currentpos >= 0x10000 && !new_segdef) + acbp |= 0x02; /* B bit */ + + if (seg->use32) + acbp |= 0x01; /* P bit is Use32 flag */ + + /* A field */ + if (seg->align >= SEG_ABS) + acbp |= 0x00; + else if (seg->align >= 256) { + if (seg->align > 256) + error(ERR_NONFATAL, "segment `%s' requires more alignment" + " than OBJ format supports", seg->name); + acbp |= 0x80; + } else if (seg->align >= 16) { + acbp |= 0x60; + } else if (seg->align >= 4) { + acbp |= 0xA0; + } else if (seg->align >= 2) { + acbp |= 0x40; + } else + acbp |= 0x20; + + recptr = record; + recptr = obj_write_byte (recptr, acbp); + if (seg->align & SEG_ABS) { + recptr = obj_write_word (recptr, seg->align - SEG_ABS); + recptr = obj_write_byte (recptr, 0); + } + if (new_segdef) + recptr = obj_write_dword (recptr, seg->currentpos); + else + recptr = obj_write_word (recptr, seg->currentpos & 0xFFFF); + recptr = obj_write_index (recptr, sn); + recptr = obj_write_index (recptr, cn); + recptr = obj_write_index (recptr, on); + if (new_segdef) + obj_record (SEGDEF+1, record, recptr); + else + obj_record (SEGDEF, record, recptr); + } + + /* + * Write some LNAMES for the group names. lname_idx is left + * alone here - it will catch up when we write the GRPDEFs. + */ + recptr = record; + for (grp = grphead; grp; grp = grp->next) { + recptr = obj_write_name (recptr, grp->name); + if (recptr - record > 1024) { + obj_record (LNAMES, record, recptr); + recptr = record; + } + } + if (recptr > record) + obj_record (LNAMES, record, recptr); + + /* + * Write the GRPDEF records. + */ + for (grp = grphead; grp; grp = grp->next) { + int i; + + if (grp->nindices != grp->nentries) { + for (i = grp->nindices; i < grp->nentries; i++) { + error(ERR_NONFATAL, "group `%s' contains undefined segment" + " `%s'", grp->name, grp->segs[i].name); + nasm_free (grp->segs[i].name); + grp->segs[i].name = NULL; + } + } + recptr = record; + recptr = obj_write_index (recptr, lname_idx++); + for (i = 0; i < grp->nindices; i++) { + recptr = obj_write_byte (recptr, 0xFF); + recptr = obj_write_index (recptr, grp->segs[i].index); + } + obj_record (GRPDEF, record, recptr); + } + + /* + * Write the PUBDEF records: first the ones in the segments, + * then the far-absolutes. + */ + for (seg = seghead; seg; seg = seg->next) { + int any; + + recptr = record; + recptr = obj_write_index (recptr, seg->grp ? seg->grp->obj_index : 0); + recptr = obj_write_index (recptr, seg->obj_index); + any = FALSE; + if (seg->use32) + rectype = PUBDEF+1; + else + rectype = PUBDEF; + for (pub = seg->pubhead; pub; pub = pub->next) { + if (recptr - record + strlen(pub->name) > 1024) { + if (any) + obj_record (rectype, record, recptr); + recptr = record; + recptr = obj_write_index (recptr, 0); + recptr = obj_write_index (recptr, seg->obj_index); + } + recptr = obj_write_name (recptr, pub->name); + if (seg->use32) + recptr = obj_write_dword (recptr, pub->offset); + else + recptr = obj_write_word (recptr, pub->offset); + recptr = obj_write_index (recptr, 0); + any = TRUE; + } + if (any) + obj_record (rectype, record, recptr); + } + for (pub = fpubhead; pub; pub = pub->next) { /* pub-crawl :-) */ + recptr = record; + recptr = obj_write_index (recptr, 0); /* no group */ + recptr = obj_write_index (recptr, 0); /* no segment either */ + recptr = obj_write_word (recptr, pub->segment); + recptr = obj_write_name (recptr, pub->name); + recptr = obj_write_word (recptr, pub->offset); + recptr = obj_write_index (recptr, 0); + obj_record (PUBDEF, record, recptr); + } + + /* + * Write the EXTDEF and COMDEF records, in order. + */ + recptr = record; + for (ext = exthead; ext; ext = ext->next) { + if (ext->commonsize == 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + if (recptr - record > 1024) { + obj_record (EXTDEF, record, recptr); + recptr = record; + } + } else { + if (recptr > record) + obj_record (EXTDEF, record, recptr); + recptr = record; + if (ext->commonsize > 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + recptr = obj_write_byte (recptr, 0x61);/* far communal */ + recptr = obj_write_value (recptr, 1L); + recptr = obj_write_value (recptr, ext->commonsize); + obj_record (COMDEF, record, recptr); + } else if (ext->commonsize < 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + recptr = obj_write_byte (recptr, 0x62);/* near communal */ + recptr = obj_write_value (recptr, ext->commonsize); + obj_record (COMDEF, record, recptr); + } + recptr = record; + } + } + if (recptr > record) + obj_record (EXTDEF, record, recptr); + + /* + * Write a COMENT record stating that the linker's first pass + * may stop processing at this point. + */ + recptr = record; + recptr = obj_write_rword (recptr, 0x40A2); + recptr = obj_write_byte (recptr, 1); + obj_record (COMENT, record, recptr); + + /* + * Write the LEDATA/FIXUPP pairs. + */ + for (data = datahead; data; data = data->next) { + if (data->nonempty) { + obj_record (data->letype, data->ledata, data->lptr); + if (data->fptr != data->fixupp) + obj_record (FIXUPP, data->fixupp, data->fptr); + } + } + + /* + * Write the MODEND module end marker. + */ + recptr = record; + rectype = MODEND; + if (obj_entry_seg != NO_SEG) { + recptr = obj_write_byte (recptr, 0xC1); + /* + * Find the segment in the segment list. + */ + for (seg = seghead; seg; seg = seg->next) { + if (seg->index == obj_entry_seg) { + if (seg->grp) { + recptr = obj_write_byte (recptr, 0x10); + recptr = obj_write_index (recptr, seg->grp->obj_index); + } else { + recptr = obj_write_byte (recptr, 0x50); + } + recptr = obj_write_index (recptr, seg->obj_index); + if (seg->use32) { + rectype = MODEND+1; + recptr = obj_write_dword (recptr, obj_entry_ofs); + } else + recptr = obj_write_word (recptr, obj_entry_ofs); + break; + } + } + if (!seg) + error(ERR_NONFATAL, "entry point is not in this module"); + } else + recptr = obj_write_byte (recptr, 0); + obj_record (rectype, record, recptr); +} + +static unsigned char *obj_write_data(unsigned char *ptr, + unsigned char *data, int len) { + while (len--) + *ptr++ = *data++; + return ptr; +} + +static unsigned char *obj_write_byte(unsigned char *ptr, int data) { + *ptr++ = data; + return ptr; +} + +static unsigned char *obj_write_word(unsigned char *ptr, int data) { + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + return ptr; +} + +static unsigned char *obj_write_dword(unsigned char *ptr, long data) { + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + *ptr++ = (data >> 24) & 0xFF; + return ptr; +} + +static unsigned char *obj_write_rword(unsigned char *ptr, int data) { + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = data & 0xFF; + return ptr; +} + +static unsigned char *obj_write_name(unsigned char *ptr, char *data) { + *ptr++ = strlen(data); + if (obj_uppercase) { + while (*data) { + *ptr++ = (unsigned char) toupper(*data); + data++; + } + } else { + while (*data) + *ptr++ = (unsigned char) *data++; + } + return ptr; +} + +static unsigned char *obj_write_index(unsigned char *ptr, int data) { + if (data < 128) + *ptr++ = data; + else { + *ptr++ = 0x80 | ((data >> 8) & 0x7F); + *ptr++ = data & 0xFF; + } + return ptr; +} + +static unsigned char *obj_write_value(unsigned char *ptr, + unsigned long data) { + if (data <= 128) + *ptr++ = data; + else if (data <= 0xFFFF) { + *ptr++ = 129; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + } else if (data <= 0xFFFFFF) { + *ptr++ = 132; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + } else { + *ptr++ = 136; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + *ptr++ = (data >> 24) & 0xFF; + } + return ptr; +} + +static void obj_record(int type, unsigned char *start, unsigned char *end) { + unsigned long cksum, len; + + cksum = type; + fputc (type, ofp); + len = end-start+1; + cksum += (len & 0xFF) + ((len>>8) & 0xFF); + fwriteshort (len, ofp); + fwrite (start, 1, end-start, ofp); + while (start < end) + cksum += *start++; + fputc ( (-cksum) & 0xFF, ofp); +} + +struct ofmt of_obj = { + "Microsoft MS-DOS 16-bit object files", + "obj", + obj_init, + obj_out, + obj_deflabel, + obj_segment, + obj_segbase, + obj_directive, + obj_filename, + obj_cleanup +}; + +#endif /* OF_OBJ */ diff --git a/outrdf.c b/outrdf.c new file mode 100644 index 0000000..24fd480 --- /dev/null +++ b/outrdf.c @@ -0,0 +1,467 @@ +/* outrdf.c output routines for the Netwide Assembler to produce + * RDOFF format object files (which are intended mainly + * for use in proprietary projects, as the code to load and + * execute them is very simple). They will also be used + * for device drivers and possibly some executable files + * in the MOSCOW operating system. See Rdoff.txt for + * details. + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_RDF + +typedef short int16; /* not sure if this will be required to be altered + at all... best to typedef it just in case */ + +const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */ + +/* the records that can be found in the RDOFF header */ + +/* Note that whenever a segment is referred to in the RDOFF file, its number + * is always half of the segment number that NASM uses to refer to it; this + * is because NASM only allocates even numbered segments, so as to not + * waste any of the 16 bits of segment number written to the file - this + * allows up to 65533 external labels to be defined; otherwise it would be + * 32764. */ + +struct RelocRec { + char type; /* must be 1 */ + char segment; /* only 0 for code, or 1 for data supported, + * but add 64 for relative refs (ie do not require + * reloc @ loadtime, only linkage) */ + long offset; /* from start of segment in which reference is loc'd */ + char length; /* 1 2 or 4 bytes */ + int16 refseg; /* segment to which reference refers to */ +}; + +struct ImportRec { + char type; /* must be 2 */ + int16 segment; /* segment number allocated to the label for reloc + * records - label is assumed to be at offset zero + * in this segment, so linker must fix up with offset + * of segment and of offset within segment */ + char label[33]; /* zero terminated... should be written to file until + * the zero, but not after it - max len = 32 chars */ +}; + +struct ExportRec { + char type; /* must be 3 */ + char segment; /* segment referred to (0/1) */ + long offset; /* offset within segment */ + char label[33]; /* zero terminated as above. max len = 32 chars */ +}; + +struct DLLRec { + char type; /* must be 4 */ + char libname[128]; /* name of library to link with at load time */ +}; + +struct BSSRec { + char type; /* must be 5 */ + long amount; /* number of bytes BSS to reserve */ +}; + +/* code for managing buffers needed to seperate code and data into individual + * sections until they are ready to be written to the file. + * We'd better hope that it all fits in memory else we're buggered... */ + +#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096) + * on 80x86 machines for efficiency */ + +typedef struct memorybuffer { + int length; + char buffer[BUF_BLOCK_LEN]; + struct memorybuffer *next; +} memorybuffer; + +memorybuffer * newmembuf(){ + memorybuffer * t; + + t = nasm_malloc(sizeof(memorybuffer)); + + t->length = 0; + t->next = NULL; + return t; +} + +void membufwrite(memorybuffer *b, void *data, int bytes) { + int16 w; + long l; + + if (b->next) { /* memory buffer full - use next buffer */ + membufwrite(b->next,data,bytes); + return; + } + if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN) + || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) { + + /* buffer full and no next allocated... allocate and initialise next + * buffer */ + + b->next = newmembuf(); + membufwrite(b->next,data,bytes); + } + + switch(bytes) { + case -4: /* convert to little-endian */ + l = * (long *) data ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + break; + + case -2: + w = * (int16 *) data ; + b->buffer[b->length++] = w & 0xFF; + w >>= 8 ; + b->buffer[b->length++] = w & 0xFF; + break; + + default: + while(bytes--) { + b->buffer[b->length++] = *(* (unsigned char **) &data); + + (* (unsigned char **) &data)++ ; + } + break; + } +} + +void membufdump(memorybuffer *b,FILE *fp) +{ + if (!b) return; + + fwrite (b->buffer, 1, b->length, fp); + + membufdump(b->next,fp); +} + +int membuflength(memorybuffer *b) +{ + if (!b) return 0; + return b->length + membuflength(b->next); +} + +void freemembuf(memorybuffer *b) +{ + if (!b) return; + freemembuf(b->next); + nasm_free(b); +} + +/*********************************************************************** + * Actual code to deal with RDOFF ouput format begins here... + */ + +/* global variables set during the initialisation phase */ + +memorybuffer *seg[2]; /* seg 0 = code, seg 1 = data */ +memorybuffer *header; /* relocation/import/export records */ + +FILE *ofile; + +int seg_warned; +static efunc error; + +int segtext,segdata,segbss; +long bsslength; + +static void rdf_init(FILE *fp, efunc errfunc, ldfunc ldef) +{ + ofile = fp; + error = errfunc; + seg[0] = newmembuf(); + seg[1] = newmembuf(); + header = newmembuf(); + segtext = seg_alloc(); + segdata = seg_alloc(); + segbss = seg_alloc(); + if (segtext != 0 || segdata != 2 || segbss != 4) + error(ERR_PANIC,"rdf segment numbers not allocated as expected (%d,%d,%d)", + segtext,segdata,segbss); + bsslength=0; +} + +static long rdf_section_names(char *name, int pass, int *bits) +{ + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) return 0; + if (!strcmp(name, ".text")) return 0; + else if (!strcmp(name, ".data")) return 2; + else if (!strcmp(name, ".bss")) return 4; + else + return NO_SEG; +} + +static void write_reloc_rec(struct RelocRec *r) +{ + r->refseg >>= 1; /* adjust segment nos to RDF rather than NASM */ + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,1); + membufwrite(header,&r->offset,-4); + membufwrite(header,&r->length,1); + membufwrite(header,&r->refseg,-2); /* 9 bytes written */ +} + +static void write_export_rec(struct ExportRec *r) +{ + r->segment >>= 1; + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,1); + membufwrite(header,&r->offset,-4); + membufwrite(header,r->label,strlen(r->label) + 1); +} + +static void write_import_rec(struct ImportRec *r) +{ + r->segment >>= 1; + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,-2); + membufwrite(header,r->label,strlen(r->label) + 1); +} + +static void write_bss_rec(struct BSSRec *r) +{ + membufwrite(header,&r->type,1); + membufwrite(header,&r->amount,-4); +} + +static void rdf_deflabel(char *name, long segment, long offset, int is_global) +{ + struct ExportRec r; + struct ImportRec ri; + + if (is_global && segment > 4) { + error(ERR_WARNING,"common declarations not supported... using extern"); + is_global = 0; + } + + if (is_global) { + r.type = 3; + r.segment = segment; + r.offset = offset; + strncpy(r.label,name,32); + r.label[32] = 0; + write_export_rec(&r); + } + + if (segment > 4) { /* EXTERN declaration */ + ri.type = 2; + ri.segment = segment; + strncpy(ri.label,name,32); + ri.label[32] = 0; + write_import_rec(&ri); + } +} + +static void rdf_out (long segto, void *data, unsigned long type, + long segment, long wrt) +{ + long bytes = type & OUT_SIZMASK; + struct RelocRec rr; + unsigned char databuf[4],*pd; + + segto >>= 1; /* convert NASM segment no to RDF number */ + + if (segto != 0 && segto != 1 && segto != 2) { + error(ERR_NONFATAL,"specified segment not supported by rdf output format"); + return; + } + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by rdf output format"); + } + + type &= OUT_TYPMASK; + + if (segto == 2 && type != OUT_RESERVE) + { + error(ERR_NONFATAL, "BSS segments may not be initialised"); + + /* just reserve the space for now... */ + + if (type == OUT_REL2ADR) + bytes = 2; + else + bytes = 4; + type = OUT_RESERVE; + } + + if (type == OUT_RESERVE) { + if (segto == 2) /* BSS segment space reserverd */ + bsslength += bytes; + else + while (bytes --) + membufwrite(seg[segto],databuf,1); + } + else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + membufwrite(seg[segto],data,bytes); + } + else if (type == OUT_ADDRESS) { + + /* if segment == NO_SEG then we are writing an address of an + object within the same segment - do not produce reloc rec. */ + + if (segment != NO_SEG) + { + + /* it's an address, so we must write a relocation record */ + + rr.type = 1; /* type signature */ + rr.segment = segto; /* segment we're currently in */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = bytes; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + } + + pd = databuf; /* convert address to little-endian */ + if (bytes == 2) + WRITESHORT (pd, *(long *)data); + else + WRITELONG (pd, *(long *)data); + + membufwrite(seg[segto],databuf,bytes); + + } + else if (type == OUT_REL2ADR) + { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "rdf format does not support segment base refs"); + } + + rr.type = 1; /* type signature */ + rr.segment = segto+64; /* segment we're currently in + rel flag */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = 2; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + + /* work out what to put in the code: offset of the end of this operand, + * subtracted from any data specified, so that loader can just add + * address of imported symbol onto it to get address relative to end of + * instruction: import_address + data(offset) - end_of_instrn */ + + rr.offset = *(long *)data -(rr.offset + bytes); + + membufwrite(seg[segto],&rr.offset,-2); + } + else if (type == OUT_REL4ADR) + { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "rdf format does not support segment base refs"); + } + + rr.type = 1; /* type signature */ + rr.segment = segto+64; /* segment we're currently in + rel tag */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = 4; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + + rr.offset = *(long *)data -(rr.offset + bytes); + membufwrite(seg[segto],&rr.offset,-4); + } +} + +static void rdf_cleanup (void) { + long l; + unsigned char b[4],*d; + struct BSSRec bs; + + + /* should write imported & exported symbol declarations to header here */ + + /* generate the output file... */ + fwrite("RDOFF1",6,1,ofile); /* file type magic number */ + + if (bsslength != 0) /* reserve BSS */ + { + bs.type = 5; + bs.amount = bsslength; + write_bss_rec(&bs); + } + + l = membuflength(header);d=b; + WRITELONG(d,l); + + fwrite(b,4,1,ofile); /* write length of header */ + membufdump(header,ofile); /* dump header */ + + l = membuflength(seg[0]);d=b; /* code segment */ + WRITELONG(d,l); + + fwrite(b,4,1,ofile); + membufdump(seg[0],ofile); + + l = membuflength(seg[1]);d=b; /* data segment */ + WRITELONG(d,l); + + fwrite(b,4,1,ofile); + membufdump(seg[1],ofile); + + freemembuf(header); + freemembuf(seg[0]); + freemembuf(seg[1]); + fclose(ofile); +} + +static long rdf_segbase (long segment) { + return 0; +} + +static int rdf_directive (char *directive, char *value, int pass) { + return 0; +} + +static void rdf_filename (char *inname, char *outname, efunc error) { + standard_extension(inname,outname,".rdf",error); +} + +struct ofmt of_rdf = { + "Relocatable Dynamic Object File Format v1.1", + "rdf", + rdf_init, + rdf_out, + rdf_deflabel, + rdf_section_names, + rdf_segbase, + rdf_directive, + rdf_filename, + rdf_cleanup +}; + +#endif /* OF_RDF */ diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..14c7a5b --- /dev/null +++ b/parser.c @@ -0,0 +1,1306 @@ +/* parser.c source line parser for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "parser.h" +#include "float.h" + +#include "names.c" + + +static long reg_flags[] = { /* sizes and special flags */ + 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, + REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, + REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, + REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, + REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, + MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, + REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, + FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, + REG_TREG +}; + +enum { /* special tokens */ + S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, + S_TWORD, S_WORD +}; + +static char *special_names[] = { /* and the actual text */ + "byte", "dword", "far", "long", "near", "qword", "short", "to", + "tword", "word" +}; + +static char *prefix_names[] = { + "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", + "repnz", "repz", "times" +}; + +/* + * Evaluator datatype. Expressions, within the evaluator, are + * stored as an array of these beasts, terminated by a record with + * type==0. Mostly, it's a vector type: each type denotes some kind + * of a component, and the value denotes the multiple of that + * component present in the expression. The exception is the WRT + * type, whose `value' field denotes the segment to which the + * expression is relative. These segments will be segment-base + * types, i.e. either odd segment values or SEG_ABS types. So it is + * still valid to assume that anything with a `value' field of zero + * is insignificant. + */ +typedef struct { + long type; /* a register, or EXPR_xxx */ + long value; /* must be >= 32 bits */ +} expr; + +static void eval_reset(void); +static expr *evaluate(int); + +/* + * ASSUMPTION MADE HERE. The number of distinct register names + * (i.e. possible "type" fields for an expr structure) does not + * exceed 126. + */ +#define EXPR_SIMPLE 126 +#define EXPR_WRT 127 +#define EXPR_SEGBASE 128 + +static int is_reloc(expr *); +static int is_simple(expr *); +static int is_really_simple (expr *); +static long reloc_value(expr *); +static long reloc_seg(expr *); +static long reloc_wrt(expr *); + +enum { /* token types, other than chars */ + TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, + TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, + TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, + TOKEN_FLOAT +}; + +struct tokenval { + long t_integer, t_inttwo; + char *t_charptr; +}; + +static char tempstorage[1024], *q; +static int bsi (char *string, char **array, int size);/* binary search */ + +static int nexttoken (void); +static int is_comma_next (void); + +static char *bufptr; +static int i; +static struct tokenval tokval; +static lfunc labelfunc; +static efunc error; +static char *label; +static struct ofmt *outfmt; + +static long seg, ofs; + +insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, + char *buffer, insn *result, struct ofmt *output, + efunc errfunc) { + int operand; + int critical; + + q = tempstorage; + bufptr = buffer; + labelfunc = lookup_label; + outfmt = output; + error = errfunc; + seg = segment; + ofs = offset; + label = ""; + + i = nexttoken(); + + result->eops = NULL; /* must do this, whatever happens */ + + if (i==0) { /* blank line - ignore */ + result->label = NULL; /* so, no label on it */ + result->opcode = -1; /* and no instruction either */ + return result; + } + if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && + (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { + error (ERR_NONFATAL, "label or instruction expected" + " at start of line"); + result->label = NULL; + result->opcode = -1; + return result; + } + + if (i == TOKEN_ID) { /* there's a label here */ + label = result->label = tokval.t_charptr; + i = nexttoken(); + if (i == ':') { /* skip over the optional colon */ + i = nexttoken(); + } + } else /* no label; so, moving swiftly on */ + result->label = NULL; + + if (i==0) { + result->opcode = -1; /* this line contains just a label */ + return result; + } + + result->nprefix = 0; + result->times = 1; + + while (i == TOKEN_PREFIX || + (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { + /* + * Handle special case: the TIMES prefix. + */ + if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { + expr *value; + + i = nexttoken(); + eval_reset(); + value = evaluate (pass); + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (!is_simple (value)) { + error (ERR_NONFATAL, + "non-constant argument supplied to TIMES"); + result->times = 1; + } else + result->times = value->value; + } else { + if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", MAXPREFIX); + else + result->prefixes[result->nprefix++] = tokval.t_integer; + i = nexttoken(); + } + } + + if (i != TOKEN_INSN) { + error (ERR_NONFATAL, "parser: instruction expected"); + result->opcode = -1; + return result; + } + + result->opcode = tokval.t_integer; + result->condition = tokval.t_inttwo; + + /* + * RESB, RESW and RESD cannot be satisfied with incorrectly + * evaluated operands, since the correct values _must_ be known + * on the first pass. Hence, even in pass one, we set the + * `critical' flag on calling evaluate(), so that it will bomb + * out on undefined symbols. Nasty, but there's nothing we can + * do about it. + * + * For the moment, EQU has the same difficulty, so we'll + * include that. + */ + if (result->opcode == I_RESB || + result->opcode == I_RESW || + result->opcode == I_RESD || + result->opcode == I_RESQ || + result->opcode == I_REST || + result->opcode == I_EQU) + critical = pass; + else + critical = (pass==2 ? 2 : 0); + + if (result->opcode == I_DB || + result->opcode == I_DW || + result->opcode == I_DD || + result->opcode == I_DQ || + result->opcode == I_DT) { + extop *eop, **tail = &result->eops; + int oper_num = 0; + + /* + * Begin to read the DB/DW/DD/DQ/DT operands. + */ + while (1) { + i = nexttoken(); + if (i == 0) + break; + eop = *tail = nasm_malloc(sizeof(extop)); + tail = &eop->next; + eop->next = NULL; + eop->type = EOT_NOTHING; + oper_num++; + + if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { + eop->type = EOT_DB_STRING; + eop->stringval = tokval.t_charptr; + eop->stringlen = tokval.t_inttwo; + i = nexttoken(); /* eat the comma */ + continue; + } + + if (i == TOKEN_FLOAT || i == '-') { + long sign = +1L; + + if (i == '-') { + char *save = bufptr; + i = nexttoken(); + sign = -1L; + if (i != TOKEN_FLOAT) { + bufptr = save; + i = '-'; + } + } + + if (i == TOKEN_FLOAT) { + eop->type = EOT_DB_STRING; + eop->stringval = q; + if (result->opcode == I_DD) + eop->stringlen = 4; + else if (result->opcode == I_DQ) + eop->stringlen = 8; + else if (result->opcode == I_DT) + eop->stringlen = 10; + else { + error(ERR_NONFATAL, "floating-point constant" + " encountered in `D%c' instruction", + result->opcode == I_DW ? 'W' : 'B'); + eop->type = EOT_NOTHING; + } + q += eop->stringlen; + if (!float_const (tokval.t_charptr, sign, + (unsigned char *)eop->stringval, + eop->stringlen, error)) + eop->type = EOT_NOTHING; + i = nexttoken(); /* eat the comma */ + continue; + } + } + + /* anything else */ { + expr *value; + eval_reset(); + value = evaluate (critical); + if (!value) { /* but, error in evaluator */ + result->opcode = -1;/* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (is_reloc(value)) { + eop->type = EOT_DB_NUMBER; + eop->offset = reloc_value(value); + eop->segment = reloc_seg(value); + eop->wrt = reloc_wrt(value); + } else { + error (ERR_NONFATAL, + "`%s' operand %d: expression is not simple" + " or relocatable", + insn_names[result->opcode], oper_num); + } + } + } + return result; + } + + /* right. Now we begin to parse the operands. There may be up to three + * of these, separated by commas, and terminated by a zero token. */ + + for (operand = 0; operand < 3; operand++) { + expr *seg, *value; /* used most of the time */ + int mref; /* is this going to be a memory ref? */ + + result->oprs[operand].addr_size = 0;/* have to zero this whatever */ + i = nexttoken(); + if (i == 0) break; /* end of operands: get out of here */ + result->oprs[operand].type = 0; /* so far, no override */ + while (i == TOKEN_SPECIAL) {/* size specifiers */ + switch ((int)tokval.t_integer) { + case S_BYTE: + result->oprs[operand].type |= BITS8; + break; + case S_WORD: + result->oprs[operand].type |= BITS16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].type |= BITS32; + break; + case S_QWORD: + result->oprs[operand].type |= BITS64; + break; + case S_TWORD: + result->oprs[operand].type |= BITS80; + break; + case S_TO: + result->oprs[operand].type |= TO; + break; + case S_FAR: + result->oprs[operand].type |= FAR; + break; + case S_NEAR: + result->oprs[operand].type |= NEAR; + break; + case S_SHORT: + result->oprs[operand].type |= SHORT; + break; + } + i = nexttoken(); + } + + if (i == '[') { /* memory reference */ + i = nexttoken(); + mref = TRUE; + if (i == TOKEN_SPECIAL) { /* check for address size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + } else /* immediate operand, or register */ + mref = FALSE; + + eval_reset(); + + value = evaluate (critical); + if (!value) { /* error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (i == ':' && mref) { /* it was seg:offset */ + seg = value; /* so shift this into the segment */ + i = nexttoken(); /* then skip the colon */ + if (i == TOKEN_SPECIAL) { /* another check for size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + value = evaluate (critical); + /* and get the offset */ + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + } else seg = NULL; + if (mref) { /* find ] at the end */ + if (i != ']') { + error (ERR_NONFATAL, "parser: expecting ]"); + do { /* error recovery again */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else /* we got the required ] */ + i = nexttoken(); + } else { /* immediate operand */ + if (i != 0 && i != ',' && i != ':') { + error (ERR_NONFATAL, "comma or end of line expected"); + do { /* error recovery */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else if (i == ':') { + result->oprs[operand].type |= COLON; + } + } + + /* now convert the exprs returned from evaluate() into operand + * descriptions... */ + + if (mref) { /* it's a memory reference */ + expr *e = value; + int b, i, s; /* basereg, indexreg, scale */ + long o; /* offset */ + + if (seg) { /* segment override */ + if (seg[1].type!=0 || seg->value!=1 || + REG_SREG & ~reg_flags[seg->type]) + error (ERR_NONFATAL, "invalid segment override"); + else if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", + MAXPREFIX); + else + result->prefixes[result->nprefix++] = seg->type; + } + + b = i = -1, o = s = 0; + + if (e->type < EXPR_SIMPLE) { /* this bit's a register */ + if (e->value == 1) /* in fact it can be basereg */ + b = e->type; + else /* no, it has to be indexreg */ + i = e->type, s = e->value; + e++; + } + if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ + if (e->value != 1) { /* it has to be indexreg */ + if (i != -1) { /* but it can't be */ + error(ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else + i = e->type, s = e->value; + } else { /* it can be basereg */ + if (b != -1) /* or can it? */ + i = e->type, s = 1; + else + b = e->type; + } + e++; + } + if (e->type != 0) { /* is there an offset? */ + if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else { + if (e->type == EXPR_SIMPLE) { + o = e->value; + e++; + } + if (e->type == EXPR_WRT) { + result->oprs[operand].wrt = e->value; + e++; + } else + result->oprs[operand].wrt = NO_SEG; + if (e->type != 0) { /* is there a segment id? */ + if (e->type < EXPR_SEGBASE) { + error (ERR_NONFATAL, + "invalid effective address"); + result->opcode = -1; + return result; + } else + result->oprs[operand].segment = (e->type - + EXPR_SEGBASE); + e++; + } else + result->oprs[operand].segment = NO_SEG; + } + } else { + o = 0; + result->oprs[operand].wrt = NO_SEG; + result->oprs[operand].segment = NO_SEG; + } + + if (e->type != 0) { /* there'd better be nothing left! */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } + + result->oprs[operand].type |= MEMORY; + if (b==-1 && (i==-1 || s==0)) + result->oprs[operand].type |= MEM_OFFS; + result->oprs[operand].basereg = b; + result->oprs[operand].indexreg = i; + result->oprs[operand].scale = s; + result->oprs[operand].offset = o; + } else { /* it's not a memory reference */ + if (is_reloc(value)) { /* it's immediate */ + result->oprs[operand].type |= IMMEDIATE; + result->oprs[operand].offset = reloc_value(value); + result->oprs[operand].segment = reloc_seg(value); + result->oprs[operand].wrt = reloc_wrt(value); + if (is_simple(value) && reloc_value(value)==1) + result->oprs[operand].type |= UNITY; + } else { /* it's a register */ + if (value->type>=EXPR_SIMPLE || value->value!=1) { + error (ERR_NONFATAL, "invalid operand type"); + result->opcode = -1; + return result; + } + /* clear overrides, except TO which applies to FPU regs */ + result->oprs[operand].type &= TO; + result->oprs[operand].type |= REGISTER; + result->oprs[operand].type |= reg_flags[value->type]; + result->oprs[operand].basereg = value->type; + } + } + } + + result->operands = operand; /* set operand count */ + + while (operand<3) /* clear remaining operands */ + result->oprs[operand++].type = 0; + + /* + * Transform RESW, RESD, RESQ, REST into RESB. + */ + switch (result->opcode) { + case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; + case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; + case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; + case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; + } + + return result; +} + +static int is_comma_next (void) { + char *p; + + p = bufptr; + while (isspace(*p)) p++; + return (*p == ',' || *p == ';' || !*p); +} + +/* isidstart matches any character that may start an identifier, and isidchar + * matches any character that may appear at places other than the start of an + * identifier. E.g. a period may only appear at the start of an identifier + * (for local labels), whereas a number may appear anywhere *but* at the + * start. */ + +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) +#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ + || (c)=='@' || (c)=='~' ) + +/* Ditto for numeric constants. */ + +#define isnumstart(c) ( isdigit(c) || (c)=='$' ) +#define isnumchar(c) ( isalnum(c) ) + +/* This returns the numeric value of a given 'digit'. */ + +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +/* + * This tokeniser routine has only one side effect, that of + * updating `bufptr'. Hence by saving `bufptr', lookahead may be + * performed. + */ + +static int nexttoken (void) { + char ourcopy[256], *r, *s; + + while (isspace(*bufptr)) bufptr++; + if (!*bufptr) return 0; + + /* we have a token; either an id, a number or a char */ + if (isidstart(*bufptr) || + (*bufptr == '$' && isidstart(bufptr[1]))) { + /* now we've got an identifier */ + int i; + int is_sym = FALSE; + + if (*bufptr == '$') { + is_sym = TRUE; + bufptr++; + } + + tokval.t_charptr = q; + *q++ = *bufptr++; + while (isidchar(*bufptr)) *q++ = *bufptr++; + *q++ = '\0'; + for (s=tokval.t_charptr, r=ourcopy; *s; s++) + *r++ = tolower (*s); + *r = '\0'; + if (is_sym) + return TOKEN_ID; /* bypass all other checks */ + /* right, so we have an identifier sitting in temp storage. now, + * is it actually a register or instruction name, or what? */ + if ((tokval.t_integer=bsi(ourcopy, reg_names, + elements(reg_names)))>=0) + return TOKEN_REG; + if ((tokval.t_integer=bsi(ourcopy, insn_names, + elements(insn_names)))>=0) + return TOKEN_INSN; + for (i=0; i<elements(icn); i++) + if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) { + char *p = ourcopy + strlen(icn[i]); + tokval.t_integer = ico[i]; + if ((tokval.t_inttwo=bsi(p, conditions, + elements(conditions)))>=0) + return TOKEN_INSN; + } + if ((tokval.t_integer=bsi(ourcopy, prefix_names, + elements(prefix_names)))>=0) { + tokval.t_integer += PREFIX_ENUM_START; + return TOKEN_PREFIX; + } + if ((tokval.t_integer=bsi(ourcopy, special_names, + elements(special_names)))>=0) + return TOKEN_SPECIAL; + if (!strcmp(ourcopy, "seg")) + return TOKEN_SEG; + if (!strcmp(ourcopy, "wrt")) + return TOKEN_WRT; + return TOKEN_ID; + } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { + /* + * It's a $ sign with no following hex number; this must + * mean it's a Here token ($), evaluating to the current + * assembly location, or a Base token ($$), evaluating to + * the base of the current segment. + */ + bufptr++; + if (*bufptr == '$') { + bufptr++; + return TOKEN_BASE; + } + return TOKEN_HERE; + } else if (isnumstart(*bufptr)) { /* now we've got a number */ + char *r = q; + int rn_error; + + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + if (*bufptr == '.') { + /* + * a floating point constant + */ + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + *q++ = '\0'; + tokval.t_charptr = r; + return TOKEN_FLOAT; + } + *q++ = '\0'; + tokval.t_integer = readnum(r, &rn_error); + if (rn_error) + return TOKEN_ERRNUM; /* some malformation occurred */ + tokval.t_charptr = NULL; + return TOKEN_NUM; + } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ + char quote = *bufptr++, *r; + r = tokval.t_charptr = bufptr; + while (*bufptr && *bufptr != quote) bufptr++; + tokval.t_inttwo = bufptr - r; /* store full version */ + if (!*bufptr) + return TOKEN_ERRNUM; /* unmatched quotes */ + tokval.t_integer = 0; + r = bufptr++; /* skip over final quote */ + while (quote != *--r) { + tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; + } + return TOKEN_NUM; + } else if (*bufptr == ';') { /* a comment has happened - stay */ + return 0; + } else if ((*bufptr == '>' || *bufptr == '<' || + *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { + bufptr += 2; + return (bufptr[-2] == '>' ? TOKEN_SHR : + bufptr[-2] == '<' ? TOKEN_SHL : + bufptr[-2] == '/' ? TOKEN_SDIV : + TOKEN_SMOD); + } else /* just an ordinary char */ + return (unsigned char) (*bufptr++); +} + +/* return index of "string" in "array", or -1 if no match. */ +static int bsi (char *string, char **array, int size) { + int i = -1, j = size; /* always, i < index < j */ + while (j-i >= 2) { + int k = (i+j)/2; + int l = strcmp(string, array[k]); + if (l<0) /* it's in the first half */ + j = k; + else if (l>0) /* it's in the second half */ + i = k; + else /* we've got it :) */ + return k; + } + return -1; /* we haven't got it :( */ +} + +void cleanup_insn (insn *i) { + extop *e; + + while (i->eops) { + e = i->eops; + i->eops = i->eops->next; + nasm_free (e); + } +} + +/* ------------- Evaluator begins here ------------------ */ + +static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ + +/* + * Add two vector datatypes. We have some bizarre behaviour on far- + * absolute segment types: we preserve them during addition _only_ + * if one of the segments is a truly pure scalar. + */ +static expr *add_vectors(expr *p, expr *q) { + expr *r = tempptr; + int preserve; + + preserve = is_really_simple(p) || is_really_simple(q); + + while (p->type && q->type && + p->type < EXPR_SEGBASE+SEG_ABS && + q->type < EXPR_SEGBASE+SEG_ABS) + if (p->type > q->type) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } else if (p->type < q->type) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } else { /* *p and *q have same type */ + tempptr->type = p->type; + tempptr->value = p->value + q->value; + tempptr++, p++, q++; + } + while (p->type && + (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } + while (q->type && + (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } + (tempptr++)->type = 0; + + return r; +} + +/* + * Multiply a vector by a scalar. Strip far-absolute segment part + * if present. + */ +static expr *scalar_mult(expr *vect, long scalar) { + expr *p = vect; + + while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { + p->value = scalar * (p->value); + p++; + } + p->type = 0; + + return vect; +} + +static expr *scalarvect (long scalar) { + expr *p = tempptr; + tempptr->type = EXPR_SIMPLE; + tempptr->value = scalar; + tempptr++; + tempptr->type = 0; + tempptr++; + return p; +} + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +static int is_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +static int is_really_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +static int is_reloc (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type < EXPR_SIMPLE) + return 0; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) + return 1; + } + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + if (!vect->type) + return 1; + return 1; +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +static long reloc_value (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +static long reloc_seg (expr *vect) { + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +static long reloc_wrt (expr *vect) { + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + +static void eval_reset(void) { + tempptr = exprtempstorage; /* initialise temporary storage */ +} + +/* + * The SEG operator: calculate the segment part of a relocatable + * value. Return NULL, as usual, if an error occurs. Report the + * error too. + */ +static expr *segment_part (expr *e) { + long seg; + + if (!is_reloc(e)) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } + + seg = reloc_seg(e); + if (seg == NO_SEG) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } else if (seg & SEG_ABS) + return scalarvect(seg & ~SEG_ABS); + else { + expr *f = tempptr++; + tempptr++->type = 0; + f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); + f->value = 1; + return f; + } +} + +/* + * Recursive-descent parser. Called with a single boolean operand, + * which is TRUE if the evaluation is critical (i.e. unresolved + * symbols are an error condition). Must update the global `i' to + * reflect the token after the parsed string. May return NULL. + * + * evaluate() should report its own errors: on return it is assumed + * that if NULL has been returned, the error has already been + * reported. + */ + +/* + * Grammar parsed is: + * + * expr : expr0 [ WRT expr6 ] + * expr0 : expr1 [ {|} expr1] + * expr1 : expr2 [ {^} expr2] + * expr2 : expr3 [ {&} expr3] + * expr3 : expr4 [ {<<,>>} expr4...] + * expr4 : expr5 [ {+,-} expr5...] + * expr5 : expr6 [ {*,/,%,//,%%} expr6...] + * expr6 : { ~,+,-,SEG } expr6 + * | (expr0) + * | symbol + * | $ + * | number + */ + +static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); +static expr *expr4(int), *expr5(int), *expr6(int); + +static expr *expr0(int critical) { + expr *e, *f; + + e = expr1(critical); + if (!e) + return NULL; + while (i == '|') { + i = nexttoken(); + f = expr1(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) | reloc_value(f)); + } + return e; +} + +static expr *expr1(int critical) { + expr *e, *f; + + e = expr2(critical); + if (!e) + return NULL; + while (i == '^') { + i = nexttoken(); + f = expr2(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) ^ reloc_value(f)); + } + return e; +} + +static expr *expr2(int critical) { + expr *e, *f; + + e = expr3(critical); + if (!e) + return NULL; + while (i == '&') { + i = nexttoken(); + f = expr3(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) & reloc_value(f)); + } + return e; +} + +static expr *expr3(int critical) { + expr *e, *f; + + e = expr4(critical); + if (!e) + return NULL; + while (i == TOKEN_SHL || i == TOKEN_SHR) { + int j = i; + i = nexttoken(); + f = expr4(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "shift operator may only be applied to" + " scalar values"); + } + switch (j) { + case TOKEN_SHL: + e = scalarvect (reloc_value(e) << reloc_value(f)); + break; + case TOKEN_SHR: + e = scalarvect (((unsigned long)reloc_value(e)) >> + reloc_value(f)); + break; + } + } + return e; +} + +static expr *expr4(int critical) { + expr *e, *f; + + e = expr5(critical); + if (!e) + return NULL; + while (i == '+' || i == '-') { + int j = i; + i = nexttoken(); + f = expr5(critical); + if (!f) + return NULL; + switch (j) { + case '+': + e = add_vectors (e, f); + break; + case '-': + e = add_vectors (e, scalar_mult(f, -1L)); + break; + } + } + return e; +} + +static expr *expr5(int critical) { + expr *e, *f; + + e = expr6(critical); + if (!e) + return NULL; + while (i == '*' || i == '/' || i == '*' || + i == TOKEN_SDIV || i == TOKEN_SMOD) { + int j = i; + i = nexttoken(); + f = expr6(critical); + if (!f) + return NULL; + if (j != '*' && (!is_simple(e) || !is_simple(f))) { + error(ERR_NONFATAL, "division operator may only be applied to" + " scalar values"); + return NULL; + } + if (j != '*' && reloc_value(f) == 0) { + error(ERR_NONFATAL, "division by zero"); + return NULL; + } + switch (j) { + case '*': + if (is_simple(e)) + e = scalar_mult (f, reloc_value(e)); + else if (is_simple(f)) + e = scalar_mult (e, reloc_value(f)); + else { + error(ERR_NONFATAL, "unable to multiply two " + "non-scalar objects"); + return NULL; + } + break; + case '/': + e = scalarvect (((unsigned long)reloc_value(e)) / + ((unsigned long)reloc_value(f))); + break; + case '%': + e = scalarvect (((unsigned long)reloc_value(e)) % + ((unsigned long)reloc_value(f))); + break; + case TOKEN_SDIV: + e = scalarvect (((signed long)reloc_value(e)) / + ((signed long)reloc_value(f))); + break; + case TOKEN_SMOD: + e = scalarvect (((signed long)reloc_value(e)) % + ((signed long)reloc_value(f))); + break; + } + } + return e; +} + +static expr *expr6(int critical) { + expr *e; + long label_seg, label_ofs; + + if (i == '-') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return scalar_mult (e, -1L); + } else if (i == '+') { + i = nexttoken(); + return expr6(critical); + } else if (i == '~') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + if (!is_simple(e)) { + error(ERR_NONFATAL, "`~' operator may only be applied to" + " scalar values"); + return NULL; + } + return scalarvect(~reloc_value(e)); + } else if (i == TOKEN_SEG) { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return segment_part(e); + } else if (i == '(') { + i = nexttoken(); + e = expr0(critical); + if (!e) + return NULL; + if (i != ')') { + error(ERR_NONFATAL, "expecting `)'"); + return NULL; + } + i = nexttoken(); + return e; + } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || + i == TOKEN_HERE || i == TOKEN_BASE) { + e = tempptr; + switch (i) { + case TOKEN_NUM: + e->type = EXPR_SIMPLE; + e->value = tokval.t_integer; + break; + case TOKEN_REG: + e->type = tokval.t_integer; + e->value = 1; + break; + case TOKEN_ID: + case TOKEN_HERE: + case TOKEN_BASE: + /* + * Since the whole line is parsed before the label it + * defines is given to the label manager, we have + * problems with lines such as + * + * end: TIMES 512-(end-start) DB 0 + * + * where `end' is not known on pass one, despite not + * really being a forward reference, and due to + * criticality it is _needed_. Hence we check our label + * against the currently defined one, and do our own + * resolution of it if we have to. + */ + if (i == TOKEN_BASE) { + label_seg = seg; + label_ofs = 0; + } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { + label_seg = seg; + label_ofs = ofs; + } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { + if (critical == 2) { + error (ERR_NONFATAL, "symbol `%s' undefined", + tokval.t_charptr); + return NULL; + } else if (critical == 1) { + error (ERR_NONFATAL, "symbol `%s' not defined before use", + tokval.t_charptr); + return NULL; + } else { + label_seg = seg; + label_ofs = ofs; + } + } + e->type = EXPR_SIMPLE; + e->value = label_ofs; + if (label_seg!=NO_SEG) { + tempptr++; + tempptr->type = EXPR_SEGBASE + label_seg; + tempptr->value = 1; + } + break; + } + tempptr++; + tempptr->type = 0; + tempptr++; + i = nexttoken(); + return e; + } else { + error(ERR_NONFATAL, "expression syntax error"); + return NULL; + } +} + +static expr *evaluate (int critical) { + expr *e; + expr *f = NULL; + + e = expr0 (critical); + if (!e) + return NULL; + + if (i == TOKEN_WRT) { + if (!is_reloc(e)) { + error(ERR_NONFATAL, "invalid left-hand operand to WRT"); + return NULL; + } + i = nexttoken(); /* eat the WRT */ + f = expr6 (critical); + if (!f) + return NULL; + } + e = scalar_mult (e, 1L); /* strip far-absolute segment part */ + if (f) { + expr *g = tempptr++; + tempptr++->type = 0; + g->type = EXPR_WRT; + if (!is_reloc(f)) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + g->value = reloc_seg(f); + if (g->value == NO_SEG) + g->value = reloc_value(f) | SEG_ABS; + else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + e = add_vectors (e, g); + } + return e; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..82d5235 --- /dev/null +++ b/parser.h @@ -0,0 +1,18 @@ +/* parser.h header file for the parser module of version 0.1 of the + * Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_PARSER_H +#define NASM_PARSER_H + +insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, + char *buffer, insn *result, struct ofmt *output, + efunc error); +void cleanup_insn (insn *instruction); + +#endif diff --git a/rdoff/Makefile b/rdoff/Makefile new file mode 100644 index 0000000..2e55dde --- /dev/null +++ b/rdoff/Makefile @@ -0,0 +1,43 @@ +# Makefile for RDOFF object file utils; part of the Netwide Assembler +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed for use under Unix (probably fairly +# portably). + +CC = gcc +CCFLAGS = -c -O -g -Wall -ansi -pedantic -I.. +LINK = gcc +LINKFLAGS = -o +DLINKFLAGS = -o +LIBRARIES = +STRIP = strip +LDRDFLIBS = rdoff.o ../nasmlib.o symtab.o collectn.o +RDXLIBS = rdoff.o rdfload.o symtab.o collectn.o + +.c.o: + $(CC) $(CCFLAGS) $*.c + +all : rdfdump ldrdf rdx + +rdfdump : rdfdump.o + $(LINK) $(LINKFLAGS) rdfdump rdfdump.o +ldrdf : ldrdf.o $(LDRDFLIBS) + $(LINK) $(LINKFLAGS) ldrdf ldrdf.o $(LDRDFLIBS) +rdx : rdx.o $(RDXLIBS) + $(LINK) $(LINKFLAGS) rdx rdx.o $(RDXLIBS) + +rdfdump.o : rdfdump.c +rdoff.o : rdoff.c rdoff.h +ldrdf.o : ldrdf.c rdoff.h ../nasmlib.h symtab.h collectn.h +symtab.o : symtab.c symtab.h +collectn.o : collectn.c collectn.h +rdx.o : rdx.c rdoff.h rdfload.h symtab.h +rdfload.o : rdfload.c rdfload.h rdoff.h collectn.h symtab.h + +clean : + rm -f *.o *~ rdfdump ldrdf rdx + make -C test clean diff --git a/rdoff/collectn.c b/rdoff/collectn.c new file mode 100644 index 0000000..c265c95 --- /dev/null +++ b/rdoff/collectn.c @@ -0,0 +1,40 @@ +/* collectn.c Implements variable length pointer arrays [collections] + * + * This file is public domain. + */ + +#include "collectn.h" +#include <stdlib.h> + +void collection_init(Collection * c) +{ + int i; + + for (i = 0; i < 32; i++) c->p[i] = NULL; + c->next = NULL; +} + +void ** colln(Collection * c, int index) +{ + while (index >= 32) { + index -= 32; + if (c->next == NULL) { + c->next = malloc(sizeof(Collection)); + collection_init(c->next); + } + c = c->next; + } + return &(c->p[index]); +} + +void collection_reset(Collection *c) +{ + int i; + if (c->next) { + collection_reset(c->next); + free(c->next); + } + + c->next = NULL; + for (i = 0; i < 32; i++) c->p[i] = NULL; +} diff --git a/rdoff/collectn.h b/rdoff/collectn.h new file mode 100644 index 0000000..b3f2d52 --- /dev/null +++ b/rdoff/collectn.h @@ -0,0 +1,22 @@ +/* collectn.h Header file for 'collection' abstract data type + * + * This file is public domain, and does not come under the NASM license. + * It, along with 'collectn.c' implements what is basically a variable + * length array (of pointers) + */ + +#ifndef _COLLECTN_H +#define _COLLECTN_H + +typedef struct tagCollection { + void *p[32]; /* array of pointers to objects */ + + struct tagCollection *next; +} Collection; + +void collection_init(Collection * c); +void ** colln(Collection * c, int index); +void collection_reset(Collection * c); + +#endif + diff --git a/rdoff/ldrdf.c b/rdoff/ldrdf.c new file mode 100644 index 0000000..ce86b7e --- /dev/null +++ b/rdoff/ldrdf.c @@ -0,0 +1,540 @@ +/* ldrdf.c RDOFF Object File linker/loader main program + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* TODO: Make the system skip a module (other than the first) if none + * of the other specified modules contain a reference to it. + * May require the system to make an extra pass of the modules to be + * loaded eliminating those that aren't required. + * + * Support libaries (.a files - requires a 'ranlib' type utility) + * + * -s option to strip resolved symbols from exports. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "nasm.h" +#include "rdoff.h" +#include "nasmlib.h" +#include "symtab.h" +#include "collectn.h" + +#define LDRDF_VERSION "0.11" + +/* global variables - those to set options: */ + +int verbose = 0; /* reflects setting of command line switch */ +int align = 16; +int errors = 0; /* set by functions to cause halt after current + stage of processing */ + +/* the linked list of modules that must be loaded & linked */ + +struct modulenode { + rdffile f; /* the file */ + long coderel; /* module's code relocation factor */ + long datarel; /* module's data relocation factor */ + long bssrel; /* module's bss data reloc. factor */ + void * header; /* header location, if loaded */ + char * name; /* filename */ + struct modulenode *next; +}; + +struct modulenode *modules = NULL,*lastmodule = NULL; + +void *symtab; /* The symbol table */ + +rdf_headerbuf * newheader ; /* New header to be written to output */ + +/* loadmodule - find the characteristics of a module and add it to the + * list of those being linked together */ + +void loadmodule(char *filename) +{ + struct modulenode *prev; + if (! modules) { + modules = malloc(sizeof(struct modulenode)); + lastmodule = modules; + prev = NULL; + } + else { + lastmodule->next = malloc(sizeof(struct modulenode)); + prev = lastmodule; + lastmodule = lastmodule->next; + } + + if (! lastmodule) { + fputs("ldrdf: not enough memory\n",stderr); + exit(1); + } + + if (rdfopen(&lastmodule->f,filename)) { + rdfperror("ldrdf",filename); + exit(1); + } + + lastmodule->header = NULL; /* header hasn't been loaded */ + lastmodule->name = filename; + lastmodule->next = NULL; + + if (prev) { + lastmodule->coderel = prev->coderel + prev->f.code_len; + if (lastmodule->coderel % align != 0) + lastmodule->coderel += align - (lastmodule->coderel % align); + lastmodule->datarel = prev->datarel + prev->f.data_len; + if (lastmodule->datarel % align != 0) + lastmodule->datarel += align - (lastmodule->datarel % align); + } + else { + lastmodule->coderel = 0; + lastmodule->datarel = 0; + } + + if (verbose) + printf("%s code = %08lx (+%04lx), data = %08lx (+%04lx)\n",filename, + lastmodule->coderel,lastmodule->f.code_len, + lastmodule->datarel,lastmodule->f.data_len); + +} + +/* load_segments() allocates memory for & loads the code & data segs + * from the RDF modules + */ + +char *text,*data; +long textlength,datalength,bsslength; + +void load_segments(void) +{ + struct modulenode *mod; + + if (!modules) { + fprintf(stderr,"ldrdf: nothing to do\n"); + exit(0); + } + if (!lastmodule) { + fprintf(stderr,"ldrdf: panic: module list exists, but lastmodule=NULL\n"); + exit(3); + } + + if (verbose) + printf("loading modules into memory\n"); + + /* The following stops 16 bit DOS from crashing whilst attempting to + work using segments > 64K */ + if (sizeof(int) == 2) { /* expect a 'code has no effect' warning on 32 bit + platforms... */ + if (lastmodule->coderel + lastmodule->f.code_len > 65535 || + lastmodule->datarel + lastmodule->f.data_len > 65535) { + fprintf(stderr,"ldrdf: segment length has exceeded 64K; use a 32 bit " + "version.\nldrdf: code size = %05lx, data size = %05lx\n", + lastmodule->coderel + lastmodule->f.code_len, + lastmodule->datarel + lastmodule->f.data_len); + exit(1); + } + } + + text = malloc(textlength = lastmodule->coderel + lastmodule->f.code_len); + data = malloc(datalength = lastmodule->datarel + lastmodule->f.data_len); + + if (!text || !data) { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + + mod = modules; + while (mod) { /* load the segments for each module */ + mod->header = malloc(mod->f.header_len); + if (!mod->header) { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + if (rdfloadseg(&mod->f,RDOFF_HEADER,mod->header) || + rdfloadseg(&mod->f,RDOFF_CODE,&text[mod->coderel]) || + rdfloadseg(&mod->f,RDOFF_DATA,&data[mod->datarel])) { + rdfperror("ldrdf",mod->name); + exit(1); + } + rdfclose(&mod->f); /* close file; segments remain */ + mod = mod->next; + } +} + +/* build_symbols() step through each module's header, and locate + * exported symbols, placing them in a global table + */ + +void build_symbols() +{ + struct modulenode *mod; + rdfheaderrec *r; + symtabEnt e; + long bssloc,cbBss; + + if (verbose) printf("building global symbol table:\n"); + newheader = rdfnewheader(); + + symtab = symtabNew(); + bssloc = 0; /* keep track of location of BSS symbols */ + + for (mod = modules; mod; mod = mod->next) + { + mod->bssrel = bssloc; + cbBss = 0; + rdfheaderrewind(&mod->f); + while ((r = rdfgetheaderrec(&mod->f))) + { + + if (r->type == 5) /* Allocate BSS */ + cbBss += r->b.amount; + + if (r->type != 3) continue; /* ignore all but export recs */ + + e.segment = r->e.segment; + e.offset = r->e.offset + + (e.segment == 0 ? mod->coderel : /* 0 -> code */ + e.segment == 1 ? mod->datarel : /* 1 -> data */ + mod->bssrel) ; /* 2 -> bss */ + e.flags = 0; + e.name = malloc(strlen(r->e.label) + 1); + if (! e.name) + { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + strcpy(e.name,r->e.label); + symtabInsert(symtab,&e); + } + bssloc += cbBss; + } + if (verbose) + { + symtabDump(symtab,stdout); + printf("BSS length = %ld bytes\n\n",bssloc); + } + bsslength = bssloc; +} + +/* link_segments() step through relocation records in each module's + * header, fixing up references. + */ + +void link_segments(void) +{ + struct modulenode *mod; + Collection imports; + symtabEnt *s; + long rel,relto = 0; /* placate gcc */ + char *seg; + rdfheaderrec *r; + int bRelative; + + if (verbose) printf("linking segments\n"); + + collection_init(&imports); + + for (mod = modules; mod; mod = mod->next) { + if (verbose >= 2) printf("* processing %s\n",mod->name); + rdfheaderrewind(&mod->f); + while((r = rdfgetheaderrec(&mod->f))) { + switch(r->type) { + case 1: /* relocation record */ + if (r->r.segment >= 64) { /* Relative relocation; */ + bRelative = 1; /* need to find location relative */ + r->r.segment -= 64; /* to start of this segment */ + relto = r->r.segment == 0 ? mod->coderel : mod->datarel; + } + else + bRelative = 0; /* non-relative - need to relocate + * at load time */ + + /* calculate absolute offset of reference, not rel to beginning of + segment */ + r->r.offset += r->r.segment == 0 ? mod->coderel : mod->datarel; + + /* calculate the relocation factor to apply to the operand - + the base address of one of this modules segments if referred + segment is 0 - 2, or the address of an imported symbol + otherwise. */ + + if (r->r.refseg == 0) rel = mod->coderel; + else if (r->r.refseg == 1) rel = mod->datarel; + else if (r->r.refseg == 2) rel = mod->bssrel; + else { /* cross module link - find reference */ + s = *colln(&imports,r->r.refseg - 2); + if (!s) { + fprintf(stderr,"ldrdf: link to undefined segment %04x in" + " %s:%d\n", r->r.refseg,mod->name,r->r.segment); + errors = 1; + break; + } + rel = s->offset; + + r->r.refseg = s->segment; /* change referred segment, + so that new header is + correct */ + } + + if (bRelative) /* Relative - subtract current segment start */ + rel -= relto; + else + { /* Add new relocation header */ + rdfaddheader(newheader,r); + } + + /* Work out which segment we're making changes to ... */ + if (r->r.segment == 0) seg = text; + else if (r->r.segment == 1) seg = data; + else { + fprintf(stderr,"ldrdf: relocation in unknown segment %d in " + "%s\n", r->r.segment,mod->name); + errors = 1; + break; + } + + /* Add the relocation factor to the datum specified: */ + + if (verbose >= 3) + printf(" - relocating %d:%08lx by %08lx\n",r->r.segment, + r->r.offset,rel); + + /**** The following code is non-portable. Rewrite it... ****/ + switch(r->r.length) { + case 1: + seg[r->r.offset] += (char) rel; + break; + case 2: + *(int16 *)(seg + r->r.offset) += (int16) rel; + break; + case 4: + *(long *)(seg + r->r.offset) += rel; + break; + } + break; + + case 2: /* import record */ + s = symtabFind(symtab, r->i.label); + if (s == NULL) { + /* Need to add support for dynamic linkage */ + fprintf(stderr,"ldrdf: undefined symbol %s in module %s\n", + r->i.label,mod->name); + errors = 1; + } + else + { + *colln(&imports,r->i.segment - 2) = s; + if (verbose >= 2) + printf("imported %s as %04x\n", r->i.label, r->i.segment); + } + break; + + case 3: /* export; dump to output new version */ + s = symtabFind(symtab, r->e.label); + if (! s) continue; /* eh? probably doesn't matter... */ + + r->e.offset = s->offset; + rdfaddheader(newheader,r); + break; + + case 4: /* DLL record */ + rdfaddheader(newheader,r); /* copy straight to output */ + break; + } + } + collection_reset(&imports); + } +} + +/* write_output() write linked program out to a file */ + +void write_output(char *filename) +{ + FILE * fp; + rdfheaderrec r; + + fp = fopen(filename,"wb"); + if (! fp) + { + fprintf(stderr,"ldrdf: could not open '%s' for writing\n",filename); + exit(1); + } + + + /* add BSS length count to header... */ + if (bsslength) + { + r.type = 5; + r.b.amount = bsslength; + rdfaddheader(newheader,&r); + } + + /* Write header */ + rdfwriteheader(fp,newheader); + rdfdoneheader(newheader); + newheader = NULL; + + /* Write text */ + if (fwrite(&textlength,1,4,fp) != 4 + || fwrite(text,1,textlength,fp) !=textlength) + { + fprintf(stderr,"ldrdf: error writing %s\n",filename); + exit(1); + } + + /* Write data */ + if (fwrite(&datalength,1,4,fp) != 4 || + fwrite(data,1,datalength,fp) != datalength) + { + fprintf (stderr,"ldrdf: error writing %s\n", filename); + exit(1); + } + fclose(fp); +} + + +/* main program: interpret command line, and pass parameters on to + * individual module loaders & the linker + * + * Command line format: + * ldrdf [-o outfile | -x] [-r xxxx] [-v] [--] infile [infile ...] + * + * Default action is to output a file named 'aout.rdx'. -x specifies + * that the linked object program should be executed, rather than + * written to a file. -r specifies that the object program should + * be prelocated at address 'xxxx'. This option cannot be used + * in conjunction with -x. + */ + +const char *usagemsg = "usage:\n" +" ldrdf [-o outfile | -x] [-a x] [-v] [-p x] [--] infile [infile ...]\n\n" +" ldrdf -h displays this message\n" +" ldrdf -r displays version information\n\n" +" -o selects output filename (default is aout.rdx)\n" +" -x causes ldrdx to link & execute rather than write to file\n" +" -a x causes object program to be statically relocated to address 'x'\n" +" -v turns on verbose mode\n" +" -p x causes segments to be aligned (padded) to x byte boundaries\n" +" (default is 16 bytes)\n"; + +void usage(void) +{ + fputs(usagemsg,stderr); +} + +int main(int argc,char **argv) +{ + char *ofilename = "aout.rdx"; + long relocateaddr = -1; /* -1 if no relocation is to occur */ + int execute = 0; /* 1 to execute after linking, 0 otherwise */ + int procsw = 1; /* set to 0 by '--' */ + int tmp; + + if (argc == 1) { + usage(); + exit(1); + } + + /* process command line switches, and add modules specified to linked list + of modules, keeping track of total memory required to load them */ + + while(argv++,--argc) { + if (procsw && !strcmp(*argv,"-h")) { /* Help command */ + usage(); exit(1); + } + else if (procsw && !strcmp(*argv,"-r")) { + printf("ldrdf version %s (%s) (%s)\n",LDRDF_VERSION,_RDOFF_H, + sizeof(int) == 2 ? "16 bit" : "32 bit"); + exit(1); + } + else if (procsw && !strcmp(*argv,"-o")) { + ofilename = *++argv; + --argc; + if (execute) { + fprintf(stderr,"ldrdf: -o and -x switches incompatible\n"); + exit(1); + } + if (verbose > 1) printf("output filename set to '%s'\n",ofilename); + } + else if (procsw && !strcmp(*argv,"-x")) { + execute++; + if (verbose > 1) printf("will execute linked object\n"); + } + else if (procsw && !strcmp(*argv,"-a")) { + relocateaddr = readnum(*++argv,&tmp); + --argc; + if (tmp) { + fprintf(stderr,"ldrdf: error in parameter to '-a' switch: '%s'\n", + *argv); + exit(1); + } + if (execute) { + fprintf(stderr,"ldrdf: -a and -x switches incompatible\n"); + exit(1); + } + if (verbose) printf("will relocate to %08lx\n",relocateaddr); + } + else if (procsw && !strcmp(*argv,"-v")) { + verbose++; + if (verbose == 1) printf("verbose mode selected\n"); + } + else if (procsw && !strcmp(*argv,"-p")) { + align = readnum(*++argv,&tmp); + --argc; + if (tmp) { + fprintf(stderr,"ldrdf: error in parameter to '-p' switch: '%s'\n", + *argv); + exit(1); + } + if (align != 1 && align != 2 && align != 4 && align != 8 && align != 16 + && align != 32 && align != 256) { + fprintf(stderr,"ldrdf: %d is an invalid alignment factor - must be" + "1,2,4,8,16 or 256\n",align); + exit(1); + } + if (verbose > 1) printf("alignment %d selected\n",align); + } + else if (procsw && !strcmp(*argv,"--")) { + procsw = 0; + } + else { /* is a filename */ + if (verbose > 1) printf("processing module %s\n",*argv); + loadmodule(*argv); + } + } + + /* we should be scanning for unresolved references, and removing + unreferenced modules from the list of modules here, so that + we know about the final size once libraries have been linked in */ + + load_segments(); /* having calculated size of reqd segments, load + each rdoff module's segments into memory */ + + build_symbols(); /* build a global symbol table... + perhaps this should be done before load_segs? */ + + link_segments(); /* step through each module's header, and resolve + references to the global symbol table. + This also does local address fixups. */ + + if (errors) { + fprintf(stderr,"ldrdf: there were errors - aborted\n"); + exit(errors); + } + if (execute) { + fprintf(stderr,"ldrdf: module execution not yet supported\n"); + exit(1); + } + if (relocateaddr != -1) { + fprintf(stderr,"ldrdf: static relocation not yet supported\n"); + exit(1); + } + + write_output(ofilename); + return 0; +} diff --git a/rdoff/rdf.doc b/rdoff/rdf.doc new file mode 100644 index 0000000..300c2bc --- /dev/null +++ b/rdoff/rdf.doc @@ -0,0 +1,99 @@ +RDOFF: Relocatable Dynamically-linked Object File Format +======================================================== + +RDOFF was designed initially to test the object-file production +interface to NASM. It soon became apparent that it could be enhanced +for use in serious applications due to its simplicity; code to load +and execute an RDOFF object module is very simple. It also contains +enhancements to allow it to be linked with a dynamic link library at +either run- or load- time, depending on how complex you wish to make +your loader. + +The RDOFF format (version 1.1, as produced by NASM v0.91) is defined +as follows: + +The first six bytes of the file contain the string 'RDOFF1'. Other +versions of the format may contain other last characters other than +'1' - all little endian versions of the file will always contain an +ASCII character with value greater than 32. If RDOFF is used on a +big-endian machine at some point in the future, the version will be +encoded in decimal rather than ASCII, so will be below 32. + +All multi-byte fields follwing this are encoded in either little- or +big-endian format depending on the system described by this version +information. Object files should be encoded in the endianness of +their target machine; files of incorrect endianness will be rejected +by the loader - this means that loaders do not need to convert +endianness, as RDOFF has been designed with simplicity of loading at +the forefront of the design requirements. + +The next 4 byte field is the length of the header in bytes. The +header consists of a sequence of variable length records. Each +record's type is identified by the first byte of the record. Record +types 1-4 are currently supported. Record type 5 will be added in +the near future, when I implement BSS segments. Record type 6 may be +to do with debugging, when I get debugging implemented. + +Type 1: Relocation +================== + +Offset Length Description +0 1 Type (contains 1) +1 1 Segment that contains reference (0 = text, 1 = data) + Add 64 to this number to indicate a relative linkage + to an external symbol (see notes) +2 4 Offset of reference +6 1 Length of reference (1,2 or 4 bytes) +7 2 Segment to which reference is made (0 = text, 1 = + data, 2 = BSS [when implemented]) others are external + symbols. + +Total length = 9 bytes + +Type 2: Symbol Import +===================== + +0 1 Type (2) +1 2 Segment number that will be used in references to this + symbol. +3 ? Null terminated string containing label (up to 32 + chars) to match against exports in linkage. + +Type 3: Symbol Export +===================== + +0 1 Type (3) +1 1 Segment containing object to be exported (0/1/2) +2 4 Offset within segment +6 ? Null terminate string containing label to export (32 + char maximum length) + +Type 4: Dynamic Link Library +============================ + +0 1 Type (4) +1 ? Library name (up to 128 chars) + +Type 5: Reserve BSS +=================== + +0 1 Type (5) +1 4 Amount of BSS space to reserve in bytes + +Total length: 5 bytes + +----------------------------------------------------------------------------- + +Following the header is the text (code) segment. This is preceded by +a 4-byte integer, which is its length in bytes. This is followed by +the length of the data segment (also 4 bytes), and finally the data +segment. + +Notes +===== + +Relative linking: The number stored at the address is offset +required from the imported symbol, with the address of the end of +the instruction subtracted from it. This means that the linker can +simply add the address of the label relative to the beginning of the +current segment to it. diff --git a/rdoff/rdfdump.c b/rdoff/rdfdump.c new file mode 100644 index 0000000..4d4f4df --- /dev/null +++ b/rdoff/rdfdump.c @@ -0,0 +1,156 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +FILE *infile; + +long translatelong(long in) { /* translate from little endian to + local representation */ + long r; + unsigned char *i; + + i = (unsigned char *)∈ + r = i[3]; + r = (r << 8) + i[2]; + r = (r << 8) + i[1]; + r = (r << 8) + *i; + + return r; +} + +int translateshort(short in) { + int r; + unsigned char *i; + + i = (unsigned char *)∈ + r = (i[1] << 8) + *i; + + return r; +} +void print_header(long length) { + unsigned char buf[129],t,s,l; + long o; + short rs; + + while (length > 0) { + fread(&t,1,1,infile); + switch(t) { + case 1: /* relocation record */ + fread(&s,1,1,infile); + fread(&o,4,1,infile); + fread(&l,1,1,infile); + fread(&rs,2,1,infile); + printf(" relocation: location (%04x:%08lx), length %d, " + "referred seg %04x\n",(int)s,translatelong(o),(int)l, + translateshort(rs)); + length -= 9; + break; + case 2: /* import record */ + fread(&rs,2,1,infile); + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" import: segment %04x = %s\n",translateshort(rs),buf); + length -= l + 3; + break; + case 3: /* export record */ + fread(&s,1,1,infile); + fread(&o,4,1,infile); + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" export: (%04x:%08lx) = %s\n",(int)s,translatelong(o),buf); + length -= l + 6; + break; + case 4: /* DLL record */ + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" dll: %s\n",buf); + length -= l + 1; + break; + case 5: /* BSS reservation */ + fread(&l,4,1,infile); + printf(" bss reservation: %08lx bytes\n",translatelong(l)); + length -= 5; + break; + default: + printf(" unrecognised record (type %d)\n",(int)t); + length --; + } + } +} + +int main(int argc,char **argv) { + char id[7]; + long l; + int verbose = 0; + + puts("RDOFF Dump utility v1.1 (C) Copyright 1996 Julian R Hall"); + + if (argc < 2) { + fputs("Usage: rdfdump [-v] <filename>\n",stderr); + exit(1); + } + + if (! strcmp (argv[1], "-v") ) + { + verbose = 1; + if (argc < 3) + { + fputs("required parameter missing\n",stderr); + exit(1); + } + argv++; + } + + infile = fopen(argv[1],"rb"); + if (! infile) { + fprintf(stderr,"rdfdump: Could not open %s",argv[1]); + exit(1); + } + + fread(id,6,1,infile); + if (strncmp(id,"RDOFF",5)) { + fputs("rdfdump: File does not contain valid RDOFF header\n",stderr); + exit(1); + } + + printf("File %s: RDOFF version %c\n\n",argv[1],id[5]); + if (id[5] < '1' || id[5] > '1') { + fprintf(stderr,"rdfdump: unknown RDOFF version '%c'\n",id[5]); + exit(1); + } + + fread(&l,4,1,infile); + l = translatelong(l); + printf("Header (%ld bytes):\n",l); + print_header(l); + + fread(&l,4,1,infile); + l = translatelong(l); + printf("\nText segment length = %ld bytes\n",l); + while(l--) { + fread(id,1,1,infile); + if (verbose) printf(" %02x",(int) (unsigned char)id[0]); + } + if (verbose) printf("\n\n"); + + fread(&l,4,1,infile); + l = translatelong(l); + printf("Data segment length = %ld bytes\n",l); + + if (verbose) + { + while (l--) { + fread(id,1,1,infile); + printf(" %02x",(int) (unsigned char) id[0]); + } + printf("\n"); + } + fclose(infile); + return 0; +} diff --git a/rdoff/rdfload.c b/rdoff/rdfload.c new file mode 100644 index 0000000..ad340b3 --- /dev/null +++ b/rdoff/rdfload.c @@ -0,0 +1,173 @@ +/* rdfload.c RDOFF Object File loader library + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * Permission to use this file in your own projects is granted, as long + * as acknowledgement is given in an appropriate manner to its authors, + * with instructions of how to obtain a copy via ftp. + */ + +#include <stdlib.h> +#include <stdio.h> + +#include "rdfload.h" +#include "symtab.h" +#include "rdoff.h" +#include "collectn.h" + +extern int rdf_errno; + +rdfmodule * rdfload(const char *filename) +{ + rdfmodule * f = malloc(sizeof(rdfmodule)); + long bsslength = 0; + char * hdr; + rdfheaderrec *r; + + if (f == NULL) + { + rdf_errno = 6; /* out of memory */ + return NULL; + } + + f->symtab = symtabNew(); + if (!f->symtab) + { + free(f); + rdf_errno = 6; + return NULL; + } + + /* open the file */ + if ( rdfopen( &(f->f), filename ) ) { + free(f); + return NULL; + } + + /* read in text and data segments, and header */ + + f->t = malloc (f->f.code_len); + f->d = malloc (f->f.data_len); /* BSS seg allocated later */ + hdr = malloc (f->f.header_len); + + if (! f->t || ! f->d || !hdr) { + rdf_errno = 6; + rdfclose(&f->f); + if (f->t) free(f->t); + if (f->d) free(f->d); + free(f); + return NULL; + } + + if ( rdfloadseg (&f->f,RDOFF_HEADER,hdr) || + rdfloadseg (&f->f,RDOFF_CODE,f->t) || + rdfloadseg (&f->f,RDOFF_DATA,f->d) ) + { + rdfclose(&f->f); + free(f->t); + free(f->d); + free(f); + free(hdr); + return NULL; + } + + rdfclose(&f->f); + + /* Allocate BSS segment; step through header and count BSS records */ + + while ( ( r = rdfgetheaderrec (&f->f) ) ) + { + if (r->type == 5) + bsslength += r->b.amount; + } + + f->b = malloc ( bsslength ); + if (! f->b ) + { + free(f->t); + free(f->d); + free(f); + free(hdr); + rdf_errno = 6; + return NULL; + } + + rdfheaderrewind (&f->f); + + f->textrel = (long)f->t; + f->datarel = (long)f->d; + f->bssrel = (long)f->b; + + return f; +} + +int rdf_relocate(rdfmodule * m) +{ + rdfheaderrec * r; + Collection imports; + symtabEnt e; + long rel; + unsigned char * seg; + + rdfheaderrewind ( & m->f ); + collection_init(&imports); + + while ( (r = rdfgetheaderrec ( & m->f ) ) ) + { + switch (r->type) + { + case 1: /* Relocation record */ + + /* calculate relocation factor */ + + if (r->r.refseg == 0) rel = m->textrel; + else if (r->r.refseg == 1) rel = m->datarel; + else if (r->r.refseg == 2) rel = m->bssrel; + else + /* We currently do not support load-time linkage. + This should be added some time soon... */ + + return 1; /* return error code */ + + if ((r->r.segment & 63) == 0) seg = m->t; + else if ((r->r.segment & 63) == 1) seg = m->d; + else + return 1; + + /* it doesn't matter in this case that the code is non-portable, + as the entire concept of executing a module like this is + non-portable */ + switch(r->r.length) { + case 1: + seg[r->r.offset] += (char) rel; + break; + case 2: + *(int16 *)(seg + r->r.offset) += (int16) rel; + break; + case 4: + *(long *)(seg + r->r.offset) += rel; + break; + } + break; + + case 3: /* export record - add to symtab */ + e.segment = r->e.segment; + e.offset = r->e.offset + + (e.segment == 0 ? m->textrel : /* 0 -> code */ + e.segment == 1 ? m->datarel : /* 1 -> data */ + m->bssrel) ; /* 2 -> bss */ + e.flags = 0; + e.name = malloc(strlen(r->e.label) + 1); + if (! e.name) + return 1; + + strcpy(e.name,r->e.label); + symtabInsert(m->symtab,&e); + break; + } + } + return 0; +} diff --git a/rdoff/rdfload.h b/rdoff/rdfload.h new file mode 100644 index 0000000..5e264b9 --- /dev/null +++ b/rdoff/rdfload.h @@ -0,0 +1,29 @@ +/* rdfload.h RDOFF Object File loader library header file + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * See the file 'rdfload.c' for special license information for this + * file. + */ + +#ifndef _RDFLOAD_H +#define _RDFLOAD_H + +#include "rdoff.h" + +typedef struct RDFModuleStruct { + rdffile f; /* file structure */ + unsigned char * t, * d, * b; /* text, data, and bss segments */ + long textrel; + long datarel; + long bssrel; + void * symtab; +} rdfmodule; + +rdfmodule * rdfload(const char * filename); +int rdf_relocate(rdfmodule * m); + +#endif diff --git a/rdoff/rdoff.c b/rdoff/rdoff.c new file mode 100644 index 0000000..9a969ad --- /dev/null +++ b/rdoff/rdoff.c @@ -0,0 +1,367 @@ +/* rdoff.c library of routines for manipulating rdoff files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* TODO: The functions in this module assume they are running + * on a little-endian machine. This should be fixed to + * make it portable. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "rdoff.h" + +/* ======================================================================== + * Code for memory buffers (for delayed writing of header until we know + * how long it is). + * ======================================================================== */ + + +memorybuffer * newmembuf(){ + memorybuffer * t; + + t = malloc(sizeof(memorybuffer)); + + t->length = 0; + t->next = NULL; + return t; +} + +void membufwrite(memorybuffer *b, void *data, int bytes) { + int16 w; + long l; + + if (b->next) { /* memory buffer full - use next buffer */ + membufwrite(b->next,data,bytes); + return; + } + if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN) + || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) { + + /* buffer full and no next allocated... allocate and initialise next + * buffer */ + + b->next = newmembuf(); + membufwrite(b->next,data,bytes); + } + + switch(bytes) { + case -4: /* convert to little-endian */ + l = * (long *) data ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + break; + + case -2: + w = * (int16 *) data ; + b->buffer[b->length++] = w & 0xFF; + w >>= 8 ; + b->buffer[b->length++] = w & 0xFF; + break; + + default: + while(bytes--) { + b->buffer[b->length++] = *(* (unsigned char **) &data); + + (* (unsigned char **) &data)++ ; + } + break; + } +} + +void membufdump(memorybuffer *b,FILE *fp) +{ + if (!b) return; + + fwrite (b->buffer, 1, b->length, fp); + + membufdump(b->next,fp); +} + +int membuflength(memorybuffer *b) +{ + if (!b) return 0; + return b->length + membuflength(b->next); +} + +void freemembuf(memorybuffer *b) +{ + if (!b) return; + freemembuf(b->next); + free(b); +} + +/* ========================================================================= + General purpose routines and variables used by the library functions + ========================================================================= */ + +long translatelong(long in) { /* translate from little endian to + local representation */ + long r; + unsigned char *i; + + i = (unsigned char *)∈ + r = i[3]; + r = (r << 8) + i[2]; + r = (r << 8) + i[1]; + r = (r << 8) + *i; + + return r; +} + +const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */ + +const char *rdf_errors[7] = { + "no error occurred","could not open file","invalid file format", + "error reading file","unknown error","header not read", + "out of memory"}; + +int rdf_errno = 0; + +/* ======================================================================== + The library functions + ======================================================================== */ + +int rdfopen(rdffile *f, const char *name) +{ + char buf[8]; + + if (translatelong(0x01020304) != 0x01020304) + { /* fix this to be portable! */ + fputs("*** this program requires a little endian machine\n",stderr); + fprintf(stderr,"01020304h = %08lxh\n",translatelong(0x01020304)); + exit(3); + } + + + f->fp = fopen(name,"rb"); + if (!f->fp) return rdf_errno = 1; /* error 1: file open error */ + + fread(buf,6,1,f->fp); /* read header */ + buf[6] = 0; + + if (strcmp(buf,RDOFFId)) { + fclose(f->fp); + return rdf_errno = 2; /* error 2: invalid file format */ + } + + if (fread(&f->header_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; /* error 3: file read error */ + } + + if (fseek(f->fp,f->header_len,SEEK_CUR)) { + fclose(f->fp); + return rdf_errno = 2; /* seek past end of file...? */ + } + + if (fread(&f->code_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; + } + + f->code_ofs = ftell(f->fp); + if (fseek(f->fp,f->code_len,SEEK_CUR)) { + fclose(f->fp); + return rdf_errno = 2; + } + + if (fread(&f->data_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; + } + + f->data_ofs = ftell(f->fp); + rewind(f->fp); + f->header_loc = NULL; + return 0; +} + +int rdfclose(rdffile *f) +{ + fclose(f->fp); + return 0; +} + +void rdfperror(const char *app,const char *name) +{ + fprintf(stderr,"%s:%s: %s\n",app,name,rdf_errors[rdf_errno]); +} + +int rdfloadseg(rdffile *f,int segment,void *buffer) +{ + long fpos; + long slen; + + switch(segment) { + case RDOFF_HEADER: + fpos = 10; + slen = f->header_len; + f->header_loc = (char *)buffer; + f->header_fp = 0; + break; + case RDOFF_CODE: + fpos = f->code_ofs; + slen = f->code_len; + break; + case RDOFF_DATA: + fpos = f->data_ofs; + slen = f->data_len; + break; + default: + fpos = 0; + slen = 0; + } + + if (fseek(f->fp,fpos,SEEK_SET)) + return rdf_errno = 4; + + if (fread(buffer,1,slen,f->fp) != slen) + return rdf_errno = 3; + + return 0; +} + +/* Macros for reading integers from header in memory */ + +#define RI8(v) v = f->header_loc[f->header_fp++] +#define RI16(v) { v = (f->header_loc[f->header_fp] + \ + (f->header_loc[f->header_fp+1] << 8)); \ + f->header_fp += 2; } + +#define RI32(v) { v = (f->header_loc[f->header_fp] + \ + (f->header_loc[f->header_fp+1] << 8) + \ + (f->header_loc[f->header_fp+2] << 16) + \ + (f->header_loc[f->header_fp+3] << 24)); \ + f->header_fp += 4; } + +#define RS(str,max) { for(i=0;i<max;i++){\ + RI8(str[i]); if (!str[i]) break;} str[i]=0; } + +rdfheaderrec *rdfgetheaderrec(rdffile *f) +{ + static rdfheaderrec r; + int i; + + if (!f->header_loc) { + rdf_errno = 5; + return NULL; + } + + if (f->header_fp >= f->header_len) return 0; + + RI8(r.type); + switch(r.type) { + case 1: /* Relocation record */ + RI8(r.r.segment); + RI32(r.r.offset); + RI8(r.r.length); + RI16(r.r.refseg); + break; + + case 2: /* Imported symbol record */ + RI16(r.i.segment); + RS(r.i.label,32); + break; + + case 3: /* Exported symbol record */ + RI8(r.e.segment); + RI32(r.e.offset); + RS(r.e.label,32); + break; + + case 4: /* DLL record */ + RS(r.d.libname,127); + break; + + case 5: /* BSS reservation record */ + RI32(r.b.amount); + break; + + default: + rdf_errno = 2; /* invalid file */ + return NULL; + } + return &r; +} + +void rdfheaderrewind(rdffile *f) +{ + f->header_fp = 0; +} + + +rdf_headerbuf * rdfnewheader(void) +{ + return newmembuf(); +} + +int rdfaddheader(rdf_headerbuf * h, rdfheaderrec * r) +{ + switch (r->type) + { + case 1: + membufwrite(h,&r->type,1); + membufwrite(h,&r->r.segment,1); + membufwrite(h,&r->r.offset,-4); + membufwrite(h,&r->r.length,1); + membufwrite(h,&r->r.refseg,-2); /* 9 bytes written */ + break; + + case 2: /* import */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->i.segment,-2); + membufwrite(h,&r->i.label,strlen(r->i.label) + 1); + break ; + + case 3: /* export */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->e.segment,1); + membufwrite(h,&r->e.offset,-4); + membufwrite(h,&r->e.label,strlen(r->e.label) + 1); + break ; + + case 4: /* DLL */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->d.libname,strlen(r->d.libname) + 1); + break ; + + case 5: /* BSS */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->b.amount,-4); + break ; + + default: + return (rdf_errno = 2); + } + return 0; +} + +int rdfwriteheader(FILE * fp, rdf_headerbuf * h) +{ + long l; + + fwrite (RDOFFId, 1, strlen(RDOFFId), fp) ; + + l = translatelong ( membuflength (h) ); + fwrite (&l, 4, 1, fp); + + membufdump(h, fp); + + return 0; /* no error handling in here... CHANGE THIS! */ +} + +void rdfdoneheader(rdf_headerbuf * h) +{ + freemembuf(h); +} diff --git a/rdoff/rdoff.h b/rdoff/rdoff.h new file mode 100644 index 0000000..b022400 --- /dev/null +++ b/rdoff/rdoff.h @@ -0,0 +1,112 @@ +/* rdoff.h RDOFF Object File manipulation routines header file + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef _RDOFF_H +#define _RDOFF_H "RDOFF1 support routines v0.1" + +typedef short int16; /* not sure if this will be required to be altered + at all... best to typedef it just in case */ + +/* the records that can be found in the RDOFF header */ + +struct RelocRec { + char type; /* must be 1 */ + char segment; /* only 0 for code, or 1 for data supported, + but add 64 for relative refs (ie do not require + reloc @ loadtime, only linkage) */ + long offset; /* from start of segment in which reference is loc'd */ + char length; /* 1 2 or 4 bytes */ + int16 refseg; /* segment to which reference refers to */ +}; + +struct ImportRec { + char type; /* must be 2 */ + int16 segment; /* segment number allocated to the label for reloc + records - label is assumed to be at offset zero + in this segment, so linker must fix up with offset + of segment and of offset within segment */ + char label[33]; /* zero terminated... should be written to file until + the zero, but not after it - max len = 32 chars */ +}; + +struct ExportRec { + char type; /* must be 3 */ + char segment; /* segment referred to (0/1) */ + long offset; /* offset within segment */ + char label[33]; /* zero terminated as above. max len = 32 chars */ +}; + +struct DLLRec { + char type; /* must be 4 */ + char libname[128]; /* name of library to link with at load time */ +}; + +struct BSSRec { + char type; /* must be 5 */ + long amount; /* number of bytes BSS to reserve */ +}; + +typedef union RDFHeaderRec { + char type; /* invariant throughout all below */ + struct RelocRec r; /* type == 1 */ + struct ImportRec i; /* type == 2 */ + struct ExportRec e; /* type == 3 */ + struct DLLRec d; /* type == 4 */ + struct BSSRec b; /* type == 5 */ +} rdfheaderrec; + +typedef struct RDFFileInfo { + FILE *fp; /* file descriptor; must be open to use this struct */ + int rdoff_ver; /* should be 1; any higher => not guaranteed to work */ + long header_len; + long code_len; + long data_len; + long code_ofs; + long data_ofs; + char *header_loc; /* keep location of header */ + long header_fp; /* current location within header for reading */ +} rdffile; + +#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096) + * on 80x86 machines for efficiency */ +typedef struct memorybuffer { + int length; + char buffer[BUF_BLOCK_LEN]; + struct memorybuffer *next; +} memorybuffer; + +typedef memorybuffer rdf_headerbuf; + +/* segments used by RDOFF, understood by rdoffloadseg */ +#define RDOFF_CODE 0 +#define RDOFF_DATA 1 +#define RDOFF_HEADER -1 +/* mask for 'segment' in relocation records to find if relative relocation */ +#define RDOFF_RELATIVEMASK 64 +/* mask to find actual segment value in relocation records */ +#define RDOFF_SEGMENTMASK 63 + +/* RDOFF file manipulation functions */ +int rdfopen(rdffile *f,const char *name); +int rdfclose(rdffile *f); +int rdfloadseg(rdffile *f,int segment,void *buffer); +rdfheaderrec *rdfgetheaderrec(rdffile *f); /* returns static storage */ +void rdfheaderrewind(rdffile *f); /* back to start of header */ +void rdfperror(const char *app,const char *name); + +/* functions to write a new RDOFF header to a file - + use rdfnewheader to allocate a header, rdfaddheader to add records to it, + rdfwriteheader to write 'RDOFF1', length of header, and the header itself + to a file, and then rdfdoneheader to dispose of the header */ + +rdf_headerbuf *rdfnewheader(void); +int rdfaddheader(rdf_headerbuf *h,rdfheaderrec *r); +int rdfwriteheader(FILE *fp,rdf_headerbuf *h); +void rdfdoneheader(rdf_headerbuf *h); + +#endif /* _RDOFF_H */ diff --git a/rdoff/rdx.c b/rdoff/rdx.c new file mode 100644 index 0000000..28ffc42 --- /dev/null +++ b/rdoff/rdx.c @@ -0,0 +1,61 @@ +/* rdx.c RDOFF Object File loader program + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* note: most of the actual work of this program is done by the modules + "rdfload.c", which loads and relocates the object file, and by "rdoff.c", + which contains general purpose routines to manipulate RDOFF object + files. You can use these files in your own program to load RDOFF objects + and execute the code in them in a similar way to what is shown here. */ + +#include <stdio.h> +#include <stdlib.h> + +#include "rdfload.h" +#include "rdoff.h" +#include "symtab.h" + +typedef int (*main_fn) (int,char**); /* Main function prototype */ + +int main(int argc, char **argv) +{ + rdfmodule * m; + main_fn code; + symtabEnt * s; + + if (argc < 2) + { + puts("usage: rdf <rdoff-executable> [params]\n"); + exit(255); + } + + m = rdfload(argv[1]); + + if (! m) + { + rdfperror("rdf",argv[1]); + exit(255); + } + + rdf_relocate(m); /* in this instance, the default relocation + values will work fine, but they may need changing + in other cases... */ + + s = symtabFind(m->symtab, "_main"); + if (! s) + { + fprintf(stderr,"rdx: could not find symbol '_main' in '%s'\n",argv[1]); + exit(255); + } + + code = (main_fn) s->offset; + + argv++, argc--; /* remove 'rdx' from command line */ + + return code(argc,argv); /* execute */ +} + diff --git a/rdoff/symtab.c b/rdoff/symtab.c new file mode 100644 index 0000000..c0ff3e5 --- /dev/null +++ b/rdoff/symtab.c @@ -0,0 +1,80 @@ +/* symtab.c Routines to maintain and manipulate a symbol table + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ +#include <stdio.h> +#include <stdlib.h> + +#include "symtab.h" + +/* TODO: Implement a hash table, not this stupid implementation which + is too slow to be of practical use */ + +/* Private data types */ + +typedef struct tagSymtab { + symtabEnt ent; + struct tagSymtab * next; +} symtabList; + +typedef symtabList * _symtab; + +void *symtabNew(void) +{ + void *p = malloc(sizeof(_symtab)); + if (p == NULL) { + fprintf(stderr,"symtab: out of memory\n"); + exit(3); + } + *(_symtab *)p = NULL; + + return p; +} + +void symtabDone(void *symtab) +{ + /* DO SOMETHING HERE! */ +} + +void symtabInsert(void *symtab,symtabEnt *ent) +{ + symtabList *l = malloc(sizeof(symtabList)); + + if (l == NULL) { + fprintf(stderr,"symtab: out of memory\n"); + exit(3); + } + + l->ent = *ent; + l->next = *(_symtab *)symtab; + *(_symtab *)symtab = l; +} + +symtabEnt *symtabFind(void *symtab,char *name) +{ + symtabList *l = *(_symtab *)symtab; + + while (l) { + if (!strcmp(l->ent.name,name)) { + return &(l->ent); + } + l = l->next; + } + return NULL; +} + +void symtabDump(void *symtab,FILE *of) +{ + symtabList *l = *(_symtab *)symtab; + + while(l) { + fprintf(of,"%32s %s:%08lx (%ld)\n",l->ent.name, + l->ent.segment ? "data" : "code" , + l->ent.offset, l->ent.flags); + l = l->next; + } +} + diff --git a/rdoff/symtab.h b/rdoff/symtab.h new file mode 100644 index 0000000..5780d44 --- /dev/null +++ b/rdoff/symtab.h @@ -0,0 +1,22 @@ +/* symtab.h Header file for symbol table manipulation routines + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +typedef struct { + char *name; + long segment; + long offset; + long flags; +} symtabEnt; + +void *symtabNew(void); +void symtabDone(void *symtab); +void symtabInsert(void *symtab,symtabEnt *ent); +symtabEnt *symtabFind(void *symtab,char *name); +void symtabDump(void *symtab,FILE *of); + + @@ -0,0 +1,84 @@ +/* sync.c the Netwide Disassembler synchronisation processing module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include <stdio.h> +#include <limits.h> + +#include "sync.h" + +#define SYNC_MAX 4096 /* max # of sync points */ + +static struct Sync { + unsigned long pos; + unsigned long length; +} synx[SYNC_MAX]; +static int nsynx; + +void init_sync(void) { + nsynx = 0; +} + +void add_sync(unsigned long pos, unsigned long length) { + int i; + + if (nsynx == SYNC_MAX) + return; /* can't do anything - overflow */ + + nsynx++; + synx[nsynx].pos = pos; + synx[nsynx].length = length; + + for (i = nsynx; i > 1; i /= 2) { + if (synx[i/2].pos > synx[i].pos) { + struct Sync t; + t = synx[i/2]; /* structure copy */ + synx[i/2] = synx[i]; /* structure copy again */ + synx[i] = t; /* another structure copy */ + } + } +} + +unsigned long next_sync(unsigned long position, unsigned long *length) { + while (nsynx > 0 && synx[1].pos + synx[1].length <= position) { + int i, j; + struct Sync t; + t = synx[nsynx]; /* structure copy */ + synx[nsynx] = synx[1]; /* structure copy */ + synx[1] = t; /* ditto */ + + nsynx--; + + i = 1; + while (i*2 <= nsynx) { + j = i*2; + if (synx[j].pos < synx[i].pos && + (j+1 > nsynx || synx[j+1].pos > synx[j].pos)) { + t = synx[j]; /* structure copy */ + synx[j] = synx[i]; /* lots of these... */ + synx[i] = t; /* ...aren't there? */ + i = j; + } else if (j+1 <= nsynx && synx[j+1].pos < synx[i].pos) { + t = synx[j+1]; /* structure copy */ + synx[j+1] = synx[i]; /* structure <yawn> copy */ + synx[i] = t; /* structure copy <zzzz....> */ + i = j+1; + } else + break; + } + } + + if (nsynx > 0) { + if (length) + *length = synx[1].length; + return synx[1].pos; + } else { + if (length) + *length = 0L; + return ULONG_MAX; + } +} @@ -0,0 +1,16 @@ +/* sync.h header file for sync.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_SYNC_H +#define NASM_SYNC_H + +void init_sync(void); +void add_sync(unsigned long position, unsigned long length); +unsigned long next_sync(unsigned long position, unsigned long *length); + +#endif diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..5f0e5c6 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,2 @@ +clean: + rm -f *.o *.obj *.com bintest inctest diff --git a/test/aouttest.asm b/test/aouttest.asm new file mode 100644 index 0000000..c52f112 --- /dev/null +++ b/test/aouttest.asm @@ -0,0 +1,83 @@ +; test source file for assembling to a.out +; build with: +; nasm -f aout aouttest.asm +; gcc -o aouttest aouttest.c aouttest.o +; (assuming your gcc is a.out) + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL _lrotate] ; [1] +[GLOBAL _greet] ; [1] +[GLOBAL _asmstr] ; [2] +[GLOBAL _textptr] ; [2] +[GLOBAL _selfptr] ; [2] +[GLOBAL _integer] ; [3] +[EXTERN _printf] ; [10] +[COMMON _commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +_lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +_greet mov eax,[_integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [_commvar] + mov eax,[localptr] ; [13] + push dword [eax] ; + push dword [_integer] ; [1] [14] + push dword _printfstr ; [13] + call _printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +_asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +_printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +_textptr dd _greet ; [15] +_selfptr dd _selfptr ; [16] + +[SECTION .bss] + +; an integer +_integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/aouttest.c b/test/aouttest.c new file mode 100644 index 0000000..9a8eba3 --- /dev/null +++ b/test/aouttest.c @@ -0,0 +1,35 @@ +/* + * test source file for assembling to a.out + * build with: + * nasm -f aout aouttest.asm + * gcc -o aouttest aouttest.c aouttest.o + * (assuming your gcc is a.out) + */ + +#include <stdio.h> + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/bintest.asm b/test/bintest.asm new file mode 100644 index 0000000..0a3c4ae --- /dev/null +++ b/test/bintest.asm @@ -0,0 +1,56 @@ +; test source file for assembling to binary files +; build with: +; nasm -f bin -o bintest.com bintest.asm + +; When run (as a DOS .COM file), this program should print +; hello, world +; on two successive lines, then exit cleanly. + +; This file should test the following: +; [1] Define a text-section symbol +; [2] Define a data-section symbol +; [3] Define a BSS-section symbol +; [4] Define a NASM local label +; [5] Reference a NASM local label +; [6] Reference a text-section symbol in the text section +; [7] Reference a data-section symbol in the text section +; [8] Reference a BSS-section symbol in the text section +; [9] Reference a text-section symbol in the data section +; [10] Reference a data-section symbol in the data section +; [11] Reference a BSS-section symbol in the data section + +[BITS 16] +[ORG 0x100] + +[SECTION .text] + + jmp start ; [6] + +end mov ax,0x4c00 ; [1] + int 0x21 + +start mov byte [bss_sym],',' ; [1] [8] + mov bx,[bssptr] ; [7] + mov al,[bx] + mov bx,[dataptr] ; [7] + mov [bx],al + mov cx,2 +.loop mov dx,datasym ; [1] [4] [7] + mov ah,9 + push cx + int 0x21 + pop cx + loop .loop ; [5] [6] + mov bx,[textptr] ; [7] + jmp bx + +[SECTION .data] + +datasym db 'hello world', 13, 10, '$' ; [2] +bssptr dw bss_sym ; [2] [11] +dataptr dw datasym+5 ; [2] [10] +textptr dw end ; [2] [9] + +[SECTION .bss] + +bss_sym resb 1 ; [3] diff --git a/test/cofftest.asm b/test/cofftest.asm new file mode 100644 index 0000000..bb843a1 --- /dev/null +++ b/test/cofftest.asm @@ -0,0 +1,82 @@ +; test source file for assembling to COFF +; build with (under DJGPP, for example): +; nasm -f coff cofftest.asm +; gcc -o cofftest cofftest.c cofftest.o + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL _lrotate] ; [1] +[GLOBAL _greet] ; [1] +[GLOBAL _asmstr] ; [2] +[GLOBAL _textptr] ; [2] +[GLOBAL _selfptr] ; [2] +[GLOBAL _integer] ; [3] +[EXTERN _printf] ; [10] +[COMMON _commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +_lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +_greet mov eax,[_integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [_commvar] + mov eax,[localptr] ; [13] + push dword [eax] + push dword [_integer] ; [1] [14] + push dword _printfstr ; [13] + call _printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +_asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +_printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +_textptr dd _greet ; [15] +_selfptr dd _selfptr ; [16] + +[SECTION .bss] + +; an integer +_integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/cofftest.c b/test/cofftest.c new file mode 100644 index 0000000..4dec0df --- /dev/null +++ b/test/cofftest.c @@ -0,0 +1,34 @@ +/* + * test source file for assembling to COFF + * build with (under DJGPP, for example): + * nasm -f coff cofftest.asm + * gcc -o cofftest cofftest.c cofftest.o + */ + +#include <stdio.h> + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/elftest.asm b/test/elftest.asm new file mode 100644 index 0000000..a6034a6 --- /dev/null +++ b/test/elftest.asm @@ -0,0 +1,83 @@ +; test source file for assembling to ELF +; build with: +; nasm -f elf elftest.asm +; gcc -o elftest elftest.c elftest.o +; (assuming your gcc is ELF) + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL lrotate] ; [1] +[GLOBAL greet] ; [1] +[GLOBAL asmstr] ; [2] +[GLOBAL textptr] ; [2] +[GLOBAL selfptr] ; [2] +[GLOBAL integer] ; [3] +[EXTERN printf] ; [10] +[COMMON commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +greet mov eax,[integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [commvar] + mov eax,[localptr] ; [13] + push dword [eax] + push dword [integer] ; [1] [14] + push dword printfstr ; [13] + call printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +textptr dd greet ; [15] +selfptr dd selfptr ; [16] + +[SECTION .bss] + +; an integer +integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/elftest.c b/test/elftest.c new file mode 100644 index 0000000..1965fcf --- /dev/null +++ b/test/elftest.c @@ -0,0 +1,35 @@ +/* + * test source file for assembling to ELF + * build with: + * nasm -f elf elftest.asm + * gcc -o elftest elftest.c elftest.o + * (assuming your gcc is ELF) + */ + +#include <stdio.h> + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/inc1.asm b/test/inc1.asm new file mode 100644 index 0000000..e9e5819 --- /dev/null +++ b/test/inc1.asm @@ -0,0 +1,4 @@ +; This file is part of the include test. +; See inctest.asm for build instructions. + +message: db 'hello, world',13,10,'$' diff --git a/test/inc2.asm b/test/inc2.asm new file mode 100644 index 0000000..c3ba2f7 --- /dev/null +++ b/test/inc2.asm @@ -0,0 +1,8 @@ +; This file is part of the include test. +; See inctest.asm for build instructions. + +_main: mov dx,message + mov ah,9 + int 21h + mov ax,4c00h + int 21h diff --git a/test/inctest.asm b/test/inctest.asm new file mode 100644 index 0000000..95ab40f --- /dev/null +++ b/test/inctest.asm @@ -0,0 +1,15 @@ +; This file, plus inc1.asm and inc2.asm, test NASM's file inclusion +; mechanism. +; +; This produces a DOS .COM file: to assemble, use +; nasm -f bin inctest.asm -o inctest.com +; and when run, it should print `hello, world'. + +[BITS 16] +[ORG 0x100] + + jmp _main + +[INC inc1.asm] + +[INCLUDE inc2.asm] diff --git a/test/objlink.c b/test/objlink.c new file mode 100644 index 0000000..2f92f05 --- /dev/null +++ b/test/objlink.c @@ -0,0 +1,30 @@ +/* + * test source file for assembling to Microsoft 16-bit .OBJ + * build with (16-bit Microsoft C): + * nasm -f obj objtest.asm + * cl /AL objtest.obj objlink.c + * other compilers should work too, provided they handle large + * model in the same way as MS C + */ + +#include <stdio.h> + +char text[] = "hello, world\n"; + +extern void function(char *); +extern int bsssym, commvar; +extern void *selfptr; +extern void *selfptr2; + +int main(void) { + printf("these should be identical: %p, %p\n", + (long) selfptr, (long) &selfptr); + printf("these should be equivalent but different: %p, %p\n", + (long) selfptr2, (long) &selfptr2); + printf("you should see \"hello, world\" twice:\n"); + bsssym = 0xF00D; + commvar = 0xD00F; + function(text); + printf("this should be 0xF00E: 0x%X\n", bsssym); + printf("this should be 0xD00E: 0x%X\n", commvar); +} diff --git a/test/objtest.asm b/test/objtest.asm new file mode 100644 index 0000000..8530bae --- /dev/null +++ b/test/objtest.asm @@ -0,0 +1,82 @@ +; test source file for assembling to Microsoft 16-bit .OBJ +; build with (16-bit Microsoft C): +; nasm -f obj objtest.asm +; cl /AL objtest.obj objlink.c +; other compilers should work too, provided they handle large +; model in the same way as MS C + +; This file should test the following: +; [1] Define and export a global symbol +; [2] Define a non-global symbol +; [3] Define a common symbol +; [4] Define a NASM local label +; [5] Reference a NASM local label +; [6] Import an external symbol +; [7] Make a PC-relative relocated reference +; [8] Reference a symbol in the same section as itself +; [9] Reference a symbol in a different segment from itself +; [10] Define a segment group +; [11] Take the offset of a symbol in a grouped segment w.r.t. its segment +; [12] Reserve uninitialised data space in a segment +; [13] Directly take the segment address of a segment +; [14] Directly take the segment address of a group +; [15] Use SEG on a non-external +; [16] Use SEG on an external + +[bits 16] + +[global _bsssym] ; [1] +[global _function] ; [1] +[global _selfptr] ; [1] +[global _selfptr2] ; [1] +[common _commvar 2] ; [3] +[extern _printf] ; [6] + +[group mygroup mybss mydata] ; [10] +[group mygroup2 mycode mycode2] ; [10] + +[segment mycode private] + +_function push bp + mov bp,sp + push ds + mov ax,mygroup ; [14] + mov ds,ax + inc word [_bsssym] ; [9] + mov ax,seg _commvar + mov ds,ax + dec word [_commvar] + pop ds + mov ax,[bp+6] + mov dx,[bp+8] + push dx + push ax + push dx + push ax + call far [cs:.printf] ; [5] [8] + pop ax + pop ax + call trampoline ; [7] + pop ax + pop ax + mov sp,bp + pop bp + retf + +.printf dw _printf, seg _printf ; [2] [4] [16] + +[segment mycode2 private] + +trampoline: pop ax + push cs + push ax + jmp far _printf + +[segment mybss private] + +_bsssym resw 64 ; [12] + +[segment mydata private] + +_selfptr dw _selfptr, seg _selfptr ; [8] [15] +_selfptr2 dw _selfptr2 wrt mydata, mydata ; [11] [13] |